From ed6b242d8b1c867a23ba904138eed8733d7a8c02 Mon Sep 17 00:00:00 2001 From: jeremiah Date: Thu, 18 Apr 2024 20:23:24 +0800 Subject: [PATCH] FEAT(loongarch): Support loongarch with 8u402 --story=117047250 --- .../autoconf/build-aux/autoconf-config.guess | 3 + common/autoconf/build-aux/autoconf-config.sub | 1 + common/autoconf/build-aux/config.guess | 11 + common/autoconf/configure.ac | 7 + common/autoconf/generated-configure.sh | 84 + common/autoconf/platform.m4 | 71 + common/autoconf/spec.gmk.in | 23 + hotspot/agent/make/saenv.sh | 14 + .../agent/src/os/linux/LinuxDebuggerLocal.c | 72 +- hotspot/agent/src/os/linux/Makefile | 10 +- hotspot/agent/src/os/linux/libproc.h | 15 +- hotspot/agent/src/os/linux/ps_proc.c | 8 +- .../classes/sun/jvm/hotspot/HotSpotAgent.java | 12 + .../sun/jvm/hotspot/asm/Disassembler.java | 6 + .../MachineDescriptionLOONGARCH64.java | 41 + .../debugger/MachineDescriptionMIPS64.java | 41 + .../debugger/linux/LinuxCDebugger.java | 18 + .../linux/LinuxThreadContextFactory.java | 6 + .../loongarch64/LinuxLOONGARCH64CFrame.java | 80 + .../LinuxLOONGARCH64ThreadContext.java | 47 + .../linux/mips64/LinuxMIPS64CFrame.java | 80 + .../mips64/LinuxMIPS64ThreadContext.java | 47 + .../loongarch64/LOONGARCH64ThreadContext.java | 123 + .../debugger/mips64/MIPS64ThreadContext.java | 123 + .../hotspot/debugger/posix/elf/ELFHeader.java | 2 + .../debugger/proc/ProcDebuggerLocal.java | 12 + .../loongarch64/ProcLOONGARCH64Thread.java | 92 + .../ProcLOONGARCH64ThreadContext.java | 47 + .../ProcLOONGARCH64ThreadFactory.java | 45 + .../proc/mips64/ProcMIPS64Thread.java | 92 + .../proc/mips64/ProcMIPS64ThreadContext.java | 47 + .../proc/mips64/ProcMIPS64ThreadFactory.java | 45 + .../debugger/remote/RemoteDebuggerClient.java | 12 + .../loongarch64/RemoteLOONGARCH64Thread.java | 54 + .../RemoteLOONGARCH64ThreadContext.java | 51 + .../RemoteLOONGARCH64ThreadFactory.java | 45 + .../remote/mips64/RemoteMIPS64Thread.java | 54 + .../mips64/RemoteMIPS64ThreadContext.java | 51 + .../mips64/RemoteMIPS64ThreadFactory.java | 45 + .../sun/jvm/hotspot/runtime/Threads.java | 6 + .../LinuxLOONGARCH64JavaThreadPDAccess.java | 133 + .../LinuxMIPS64JavaThreadPDAccess.java | 132 + .../LOONGARCH64CurrentFrameGuess.java | 217 + .../runtime/loongarch64/LOONGARCH64Frame.java | 534 + .../LOONGARCH64JavaCallWrapper.java | 57 + .../loongarch64/LOONGARCH64RegisterMap.java | 52 + .../mips64/MIPS64CurrentFrameGuess.java | 217 + .../hotspot/runtime/mips64/MIPS64Frame.java | 547 + .../runtime/mips64/MIPS64JavaCallWrapper.java | 57 + .../runtime/mips64/MIPS64RegisterMap.java | 52 + .../jvm/hotspot/utilities/PlatformInfo.java | 11 + hotspot/make/defs.make | 37 +- hotspot/make/linux/Makefile | 4 + hotspot/make/linux/makefiles/defs.make | 56 + hotspot/make/linux/makefiles/gcc.make | 11 +- hotspot/make/linux/makefiles/loongarch64.make | 43 + hotspot/make/linux/makefiles/mips64.make | 43 + hotspot/make/linux/makefiles/sa.make | 8 + hotspot/make/linux/makefiles/saproc.make | 12 + hotspot/make/linux/makefiles/sparcWorks.make | 7 + hotspot/make/linux/makefiles/vm.make | 28 + hotspot/make/linux/platform_loongarch64 | 17 + hotspot/make/linux/platform_mips64 | 17 + hotspot/make/sa.files | 16 + .../aarch64/vm/c1_LIRAssembler_aarch64.cpp | 17 +- .../aarch64/vm/c1_LIRGenerator_aarch64.cpp | 19 +- .../cpu/loongarch/vm/assembler_loongarch.cpp | 855 + .../cpu/loongarch/vm/assembler_loongarch.hpp | 2810 ++++ .../vm/assembler_loongarch.inline.hpp | 33 + .../vm/bytecodeInterpreter_loongarch.hpp | 110 + .../bytecodeInterpreter_loongarch.inline.hpp | 286 + .../cpu/loongarch/vm/bytecodes_loongarch.cpp | 38 + .../cpu/loongarch/vm/bytecodes_loongarch.hpp | 31 + .../src/cpu/loongarch/vm/bytes_loongarch.hpp | 75 + .../vm/c1_CodeStubs_loongarch_64.cpp | 387 + .../cpu/loongarch/vm/c1_Defs_loongarch.hpp | 79 + .../loongarch/vm/c1_FpuStackSim_loongarch.hpp | 32 + .../vm/c1_FpuStackSim_loongarch_64.cpp | 31 + .../loongarch/vm/c1_FrameMap_loongarch.hpp | 143 + .../loongarch/vm/c1_FrameMap_loongarch_64.cpp | 362 + .../vm/c1_LIRAssembler_loongarch.hpp | 83 + .../vm/c1_LIRAssembler_loongarch_64.cpp | 3377 ++++ .../vm/c1_LIRGenerator_loongarch_64.cpp | 1442 ++ .../loongarch/vm/c1_LinearScan_loongarch.hpp | 70 + .../vm/c1_LinearScan_loongarch_64.cpp | 33 + .../vm/c1_MacroAssembler_loongarch.hpp | 112 + .../vm/c1_MacroAssembler_loongarch_64.cpp | 346 + .../loongarch/vm/c1_Runtime1_loongarch_64.cpp | 1252 ++ .../cpu/loongarch/vm/c1_globals_loongarch.hpp | 69 + .../cpu/loongarch/vm/c2_globals_loongarch.hpp | 87 + .../cpu/loongarch/vm/c2_init_loongarch.cpp | 34 + .../cpu/loongarch/vm/codeBuffer_loongarch.hpp | 35 + .../cpu/loongarch/vm/compiledIC_loongarch.cpp | 167 + .../src/cpu/loongarch/vm/copy_loongarch.hpp | 90 + .../vm/cppInterpreterGenerator_loongarch.hpp | 53 + .../loongarch/vm/cppInterpreter_loongarch.cpp | 215 + .../src/cpu/loongarch/vm/debug_loongarch.cpp | 51 + .../cpu/loongarch/vm/depChecker_loongarch.cpp | 30 + .../cpu/loongarch/vm/depChecker_loongarch.hpp | 31 + .../loongarch/vm/disassembler_loongarch.hpp | 37 + .../src/cpu/loongarch/vm/frame_loongarch.cpp | 711 + .../src/cpu/loongarch/vm/frame_loongarch.hpp | 229 + .../loongarch/vm/frame_loongarch.inline.hpp | 312 + .../vm/globalDefinitions_loongarch.hpp | 41 + .../cpu/loongarch/vm/globals_loongarch.hpp | 103 + .../cpu/loongarch/vm/icBuffer_loongarch.cpp | 101 + .../src/cpu/loongarch/vm/icache_loongarch.cpp | 42 + .../src/cpu/loongarch/vm/icache_loongarch.hpp | 41 + .../loongarch/vm/interp_masm_loongarch_64.cpp | 1960 +++ .../loongarch/vm/interp_masm_loongarch_64.hpp | 269 + .../vm/interpreterGenerator_loongarch.hpp | 51 + .../loongarch/vm/interpreterRT_loongarch.hpp | 66 + .../vm/interpreterRT_loongarch_64.cpp | 274 + .../loongarch/vm/interpreter_loongarch.hpp | 50 + .../loongarch/vm/interpreter_loongarch_64.cpp | 277 + .../vm/javaFrameAnchor_loongarch.hpp | 87 + .../vm/jniFastGetField_loongarch_64.cpp | 169 + .../cpu/loongarch/vm/jniTypes_loongarch.hpp | 144 + hotspot/src/cpu/loongarch/vm/jni_loongarch.h | 51 + hotspot/src/cpu/loongarch/vm/loongarch.ad | 24 + hotspot/src/cpu/loongarch/vm/loongarch_64.ad | 12861 ++++++++++++++ .../loongarch/vm/macroAssembler_loongarch.cpp | 3895 +++++ .../loongarch/vm/macroAssembler_loongarch.hpp | 771 + .../vm/macroAssembler_loongarch.inline.hpp | 34 + .../vm/metaspaceShared_loongarch_64.cpp | 120 + .../loongarch/vm/methodHandles_loongarch.cpp | 566 + .../loongarch/vm/methodHandles_loongarch.hpp | 62 + .../cpu/loongarch/vm/nativeInst_loongarch.cpp | 485 + .../cpu/loongarch/vm/nativeInst_loongarch.hpp | 513 + .../loongarch/vm/registerMap_loongarch.hpp | 45 + .../vm/register_definitions_loongarch.cpp | 103 + .../cpu/loongarch/vm/register_loongarch.cpp | 59 + .../cpu/loongarch/vm/register_loongarch.hpp | 436 + .../cpu/loongarch/vm/relocInfo_loongarch.cpp | 130 + .../cpu/loongarch/vm/relocInfo_loongarch.hpp | 40 + .../cpu/loongarch/vm/runtime_loongarch_64.cpp | 199 + .../vm/sharedRuntime_loongarch_64.cpp | 3453 ++++ .../vm/stubGenerator_loongarch_64.cpp | 3445 ++++ .../vm/stubRoutines_loongarch_64.cpp | 264 + .../vm/stubRoutines_loongarch_64.hpp | 60 + ...templateInterpreterGenerator_loongarch.hpp | 35 + .../vm/templateInterpreter_loongarch.hpp | 41 + .../vm/templateInterpreter_loongarch_64.cpp | 2335 +++ .../vm/templateTable_loongarch_64.cpp | 4024 +++++ .../vm/templateTable_loongarch_64.hpp | 44 + .../cpu/loongarch/vm/vmStructs_loongarch.hpp | 68 + .../loongarch/vm/vm_version_ext_loongarch.cpp | 84 + .../loongarch/vm/vm_version_ext_loongarch.hpp | 54 + .../cpu/loongarch/vm/vm_version_loongarch.cpp | 443 + .../cpu/loongarch/vm/vm_version_loongarch.hpp | 299 + .../src/cpu/loongarch/vm/vmreg_loongarch.cpp | 51 + .../src/cpu/loongarch/vm/vmreg_loongarch.hpp | 35 + .../loongarch/vm/vmreg_loongarch.inline.hpp | 66 + .../loongarch/vm/vtableStubs_loongarch_64.cpp | 300 + hotspot/src/cpu/mips/vm/assembler_mips.cpp | 774 + hotspot/src/cpu/mips/vm/assembler_mips.hpp | 1789 ++ .../src/cpu/mips/vm/assembler_mips.inline.hpp | 33 + .../cpu/mips/vm/bytecodeInterpreter_mips.cpp | 53 + .../cpu/mips/vm/bytecodeInterpreter_mips.hpp | 110 + .../vm/bytecodeInterpreter_mips.inline.hpp | 286 + hotspot/src/cpu/mips/vm/bytecodes_mips.cpp | 38 + hotspot/src/cpu/mips/vm/bytecodes_mips.hpp | 31 + hotspot/src/cpu/mips/vm/bytes_mips.hpp | 193 + hotspot/src/cpu/mips/vm/c2_globals_mips.hpp | 100 + hotspot/src/cpu/mips/vm/c2_init_mips.cpp | 34 + hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp | 35 + hotspot/src/cpu/mips/vm/compiledIC_mips.cpp | 173 + hotspot/src/cpu/mips/vm/copy_mips.hpp | 90 + .../mips/vm/cppInterpreterGenerator_mips.hpp | 53 + .../src/cpu/mips/vm/cppInterpreter_mips.cpp | 215 + .../src/cpu/mips/vm/cppInterpreter_mips.hpp | 40 + hotspot/src/cpu/mips/vm/debug_mips.cpp | 51 + hotspot/src/cpu/mips/vm/depChecker_mips.cpp | 30 + hotspot/src/cpu/mips/vm/depChecker_mips.hpp | 31 + hotspot/src/cpu/mips/vm/disassembler_mips.hpp | 37 + hotspot/src/cpu/mips/vm/frame_mips.cpp | 711 + hotspot/src/cpu/mips/vm/frame_mips.hpp | 229 + hotspot/src/cpu/mips/vm/frame_mips.inline.hpp | 312 + .../cpu/mips/vm/globalDefinitions_mips.hpp | 41 + hotspot/src/cpu/mips/vm/globals_mips.hpp | 124 + hotspot/src/cpu/mips/vm/icBuffer_mips.cpp | 97 + hotspot/src/cpu/mips/vm/icache_mips.cpp | 41 + hotspot/src/cpu/mips/vm/icache_mips.hpp | 41 + .../src/cpu/mips/vm/interp_masm_mips_64.cpp | 2084 +++ .../src/cpu/mips/vm/interp_masm_mips_64.hpp | 269 + .../cpu/mips/vm/interpreterGenerator_mips.hpp | 49 + .../src/cpu/mips/vm/interpreterRT_mips.hpp | 61 + .../src/cpu/mips/vm/interpreterRT_mips_64.cpp | 259 + hotspot/src/cpu/mips/vm/interpreter_mips.hpp | 50 + .../src/cpu/mips/vm/interpreter_mips_64.cpp | 286 + .../src/cpu/mips/vm/javaFrameAnchor_mips.hpp | 87 + .../cpu/mips/vm/jniFastGetField_mips_64.cpp | 172 + hotspot/src/cpu/mips/vm/jniTypes_mips.hpp | 144 + hotspot/src/cpu/mips/vm/jni_mips.h | 51 + .../src/cpu/mips/vm/macroAssembler_mips.cpp | 4332 +++++ .../src/cpu/mips/vm/macroAssembler_mips.hpp | 701 + .../mips/vm/macroAssembler_mips.inline.hpp | 34 + .../cpu/mips/vm/metaspaceShared_mips_64.cpp | 123 + .../src/cpu/mips/vm/methodHandles_mips.cpp | 576 + .../src/cpu/mips/vm/methodHandles_mips.hpp | 62 + hotspot/src/cpu/mips/vm/mips.ad | 25 + hotspot/src/cpu/mips/vm/mips_64.ad | 14036 ++++++++++++++++ hotspot/src/cpu/mips/vm/nativeInst_mips.cpp | 1829 ++ hotspot/src/cpu/mips/vm/nativeInst_mips.hpp | 735 + hotspot/src/cpu/mips/vm/registerMap_mips.hpp | 47 + .../cpu/mips/vm/register_definitions_mips.cpp | 103 + hotspot/src/cpu/mips/vm/register_mips.cpp | 52 + hotspot/src/cpu/mips/vm/register_mips.hpp | 346 + hotspot/src/cpu/mips/vm/relocInfo_mips.cpp | 156 + hotspot/src/cpu/mips/vm/relocInfo_mips.hpp | 40 + hotspot/src/cpu/mips/vm/runtime_mips_64.cpp | 206 + .../src/cpu/mips/vm/sharedRuntime_mips_64.cpp | 3816 +++++ .../src/cpu/mips/vm/stubGenerator_mips_64.cpp | 2147 +++ .../src/cpu/mips/vm/stubRoutines_mips_64.cpp | 35 + .../src/cpu/mips/vm/stubRoutines_mips_64.hpp | 59 + .../vm/templateInterpreterGenerator_mips.hpp | 35 + .../cpu/mips/vm/templateInterpreter_mips.hpp | 41 + .../mips/vm/templateInterpreter_mips_64.cpp | 2306 +++ .../src/cpu/mips/vm/templateTable_mips.hpp | 34 + .../src/cpu/mips/vm/templateTable_mips_64.cpp | 4623 +++++ .../src/cpu/mips/vm/templateTable_mips_64.hpp | 44 + hotspot/src/cpu/mips/vm/vmStructs_mips.hpp | 68 + .../src/cpu/mips/vm/vm_version_ext_mips.cpp | 89 + .../src/cpu/mips/vm/vm_version_ext_mips.hpp | 54 + hotspot/src/cpu/mips/vm/vm_version_mips.cpp | 510 + hotspot/src/cpu/mips/vm/vm_version_mips.hpp | 221 + hotspot/src/cpu/mips/vm/vmreg_mips.cpp | 51 + hotspot/src/cpu/mips/vm/vmreg_mips.hpp | 35 + hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp | 68 + .../src/cpu/mips/vm/vtableStubs_mips_64.cpp | 301 + .../src/cpu/x86/vm/c1_LIRAssembler_x86.cpp | 10 + .../src/cpu/x86/vm/c1_LIRGenerator_x86.cpp | 21 +- hotspot/src/os/linux/vm/os_linux.cpp | 32 +- hotspot/src/os/linux/vm/os_perf_linux.cpp | 6 + .../vm/assembler_linux_loongarch.cpp | 92 + .../vm/atomic_linux_loongarch.inline.hpp | 206 + .../vm/bytes_linux_loongarch.inline.hpp | 37 + .../vm/copy_linux_loongarch.inline.hpp | 125 + .../vm/globals_linux_loongarch.hpp | 43 + .../vm/orderAccess_linux_loongarch.inline.hpp | 115 + .../linux_loongarch/vm/os_linux_loongarch.cpp | 750 + .../linux_loongarch/vm/os_linux_loongarch.hpp | 39 + .../vm/prefetch_linux_loongarch.inline.hpp | 56 + .../vm/threadLS_linux_loongarch.cpp | 84 + .../vm/threadLS_linux_loongarch.hpp | 61 + .../vm/thread_linux_loongarch.cpp | 99 + .../vm/thread_linux_loongarch.hpp | 75 + .../vm/vmStructs_linux_loongarch.hpp | 55 + .../vm/vm_version_linux_loongarch.cpp | 29 + .../linux_mips/vm/assembler_linux_mips.cpp | 111 + .../vm/atomic_linux_mips.inline.hpp | 258 + .../linux_mips/vm/bytes_linux_mips.inline.hpp | 37 + .../linux_mips/vm/copy_linux_mips.inline.hpp | 125 + .../linux_mips/vm/globals_linux_mips.hpp | 51 + .../src/os_cpu/linux_mips/vm/linux_mips.ad | 153 + hotspot/src/os_cpu/linux_mips/vm/linux_mips.s | 25 + .../src/os_cpu/linux_mips/vm/linux_mips_64.ad | 50 + .../vm/orderAccess_linux_mips.inline.hpp | 115 + .../os_cpu/linux_mips/vm/os_linux_mips.cpp | 1015 ++ .../os_cpu/linux_mips/vm/os_linux_mips.hpp | 39 + .../vm/prefetch_linux_mips.inline.hpp | 58 + .../linux_mips/vm/threadLS_linux_mips.cpp | 84 + .../linux_mips/vm/threadLS_linux_mips.hpp | 61 + .../linux_mips/vm/thread_linux_mips.cpp | 99 + .../linux_mips/vm/thread_linux_mips.hpp | 75 + .../linux_mips/vm/vmStructs_linux_mips.hpp | 55 + .../linux_mips/vm/vm_version_linux_mips.cpp | 28 + hotspot/src/share/tools/hsdis/Makefile | 13 + hotspot/src/share/tools/hsdis/hsdis.c | 10 + hotspot/src/share/vm/adlc/main.cpp | 14 + hotspot/src/share/vm/asm/assembler.hpp | 20 + hotspot/src/share/vm/asm/assembler.inline.hpp | 12 + hotspot/src/share/vm/asm/codeBuffer.cpp | 7 + hotspot/src/share/vm/asm/codeBuffer.hpp | 12 + hotspot/src/share/vm/asm/macroAssembler.hpp | 13 +- .../share/vm/asm/macroAssembler.inline.hpp | 12 + hotspot/src/share/vm/asm/register.hpp | 12 + hotspot/src/share/vm/c1/c1_Defs.hpp | 12 + hotspot/src/share/vm/c1/c1_FpuStackSim.hpp | 9 + hotspot/src/share/vm/c1/c1_FrameMap.cpp | 9 + hotspot/src/share/vm/c1/c1_FrameMap.hpp | 9 + hotspot/src/share/vm/c1/c1_LIR.cpp | 220 +- hotspot/src/share/vm/c1/c1_LIR.hpp | 199 +- hotspot/src/share/vm/c1/c1_LIRAssembler.cpp | 22 + hotspot/src/share/vm/c1/c1_LIRAssembler.hpp | 12 + hotspot/src/share/vm/c1/c1_LIRGenerator.cpp | 114 +- hotspot/src/share/vm/c1/c1_LIRGenerator.hpp | 17 +- hotspot/src/share/vm/c1/c1_LinearScan.cpp | 81 +- hotspot/src/share/vm/c1/c1_LinearScan.hpp | 9 + hotspot/src/share/vm/c1/c1_MacroAssembler.hpp | 9 + hotspot/src/share/vm/c1/c1_Runtime1.cpp | 48 + hotspot/src/share/vm/c1/c1_globals.hpp | 9 + .../share/vm/classfile/bytecodeAssembler.cpp | 12 + .../share/vm/classfile/classFileStream.hpp | 12 + .../src/share/vm/classfile/stackMapTable.hpp | 12 + hotspot/src/share/vm/classfile/verifier.cpp | 12 + hotspot/src/share/vm/code/codeBlob.cpp | 12 + hotspot/src/share/vm/code/compiledIC.hpp | 12 + hotspot/src/share/vm/code/relocInfo.hpp | 45 +- hotspot/src/share/vm/code/vmreg.hpp | 18 + .../src/share/vm/compiler/disassembler.cpp | 12 + .../src/share/vm/compiler/disassembler.hpp | 12 + .../parallelScavenge/cardTableExtension.hpp | 3 + .../parallelScavenge/parMarkBitMap.cpp | 3 + .../psCompactionManager.inline.hpp | 6 + .../parallelScavenge/psParallelCompact.cpp | 9 + .../parallelScavenge/psParallelCompact.hpp | 6 + .../psPromotionManager.inline.hpp | 24 +- .../parallelScavenge/psScavenge.inline.hpp | 21 +- .../vm/interpreter/abstractInterpreter.hpp | 10 + hotspot/src/share/vm/interpreter/bytecode.hpp | 12 + .../vm/interpreter/bytecodeInterpreter.hpp | 15 + .../bytecodeInterpreter.inline.hpp | 12 + .../share/vm/interpreter/bytecodeStream.hpp | 12 + .../src/share/vm/interpreter/bytecodes.cpp | 12 + .../src/share/vm/interpreter/bytecodes.hpp | 12 + .../share/vm/interpreter/cppInterpreter.hpp | 12 + .../interpreter/cppInterpreterGenerator.hpp | 12 + .../src/share/vm/interpreter/interpreter.hpp | 12 + .../vm/interpreter/interpreterGenerator.hpp | 12 + .../vm/interpreter/interpreterRuntime.cpp | 14 +- .../vm/interpreter/interpreterRuntime.hpp | 14 +- .../vm/interpreter/templateInterpreter.hpp | 12 + .../templateInterpreterGenerator.hpp | 12 + .../share/vm/interpreter/templateTable.hpp | 14 + .../share/vm/jfr/utilities/jfrBigEndian.hpp | 2 +- .../src/share/vm/jfr/writers/jfrEncoders.hpp | 12 + hotspot/src/share/vm/memory/barrierSet.hpp | 25 +- .../src/share/vm/memory/cardTableModRefBS.hpp | 12 +- hotspot/src/share/vm/memory/cardTableRS.cpp | 10 +- hotspot/src/share/vm/memory/cardTableRS.hpp | 9 +- hotspot/src/share/vm/memory/metaspace.cpp | 13 +- hotspot/src/share/vm/oops/constantPool.hpp | 13 + hotspot/src/share/vm/oops/klass.hpp | 17 +- hotspot/src/share/vm/oops/oop.hpp | 8 +- hotspot/src/share/vm/oops/oop.inline.hpp | 12 + hotspot/src/share/vm/oops/oop.pcgc.inline.hpp | 8 +- hotspot/src/share/vm/opto/buildOopMap.cpp | 12 + hotspot/src/share/vm/opto/bytecodeInfo.cpp | 11 + hotspot/src/share/vm/opto/c2_globals.hpp | 12 + hotspot/src/share/vm/opto/c2compiler.cpp | 10 + hotspot/src/share/vm/opto/chaitin.hpp | 14 + hotspot/src/share/vm/opto/compile.cpp | 10 + hotspot/src/share/vm/opto/compile.hpp | 2 +- hotspot/src/share/vm/opto/gcm.cpp | 10 + hotspot/src/share/vm/opto/lcm.cpp | 10 + hotspot/src/share/vm/opto/locknode.hpp | 10 + hotspot/src/share/vm/opto/matcher.cpp | 10 + hotspot/src/share/vm/opto/output.cpp | 43 + hotspot/src/share/vm/opto/output.hpp | 10 + hotspot/src/share/vm/opto/regmask.cpp | 10 + hotspot/src/share/vm/opto/regmask.hpp | 10 + hotspot/src/share/vm/opto/runtime.cpp | 10 + hotspot/src/share/vm/opto/type.cpp | 16 + hotspot/src/share/vm/prims/jniCheck.cpp | 12 + hotspot/src/share/vm/prims/jni_md.h | 12 + .../vm/prims/jvmtiClassFileReconstituter.cpp | 12 + hotspot/src/share/vm/prims/methodHandles.hpp | 13 + .../src/share/vm/runtime/atomic.inline.hpp | 12 + .../src/share/vm/runtime/deoptimization.cpp | 18 + hotspot/src/share/vm/runtime/dtraceJSDT.hpp | 12 + hotspot/src/share/vm/runtime/frame.cpp | 13 + hotspot/src/share/vm/runtime/frame.hpp | 16 + hotspot/src/share/vm/runtime/frame.inline.hpp | 18 + hotspot/src/share/vm/runtime/globals.hpp | 26 +- hotspot/src/share/vm/runtime/icache.hpp | 13 +- hotspot/src/share/vm/runtime/java.cpp | 12 + hotspot/src/share/vm/runtime/javaCalls.hpp | 12 + .../src/share/vm/runtime/javaFrameAnchor.hpp | 12 + hotspot/src/share/vm/runtime/os.cpp | 3 +- hotspot/src/share/vm/runtime/os.hpp | 12 + .../src/share/vm/runtime/prefetch.inline.hpp | 6 + hotspot/src/share/vm/runtime/registerMap.hpp | 18 + hotspot/src/share/vm/runtime/relocator.hpp | 12 + hotspot/src/share/vm/runtime/safepoint.cpp | 14 + .../src/share/vm/runtime/sharedRuntime.cpp | 16 +- .../share/vm/runtime/sharedRuntimeTrig.cpp | 15 + .../share/vm/runtime/stackValueCollection.cpp | 12 + hotspot/src/share/vm/runtime/statSampler.cpp | 12 + hotspot/src/share/vm/runtime/stubRoutines.hpp | 16 + hotspot/src/share/vm/runtime/thread.cpp | 6 + hotspot/src/share/vm/runtime/thread.hpp | 12 + .../share/vm/runtime/threadLocalStorage.hpp | 12 + hotspot/src/share/vm/runtime/virtualspace.cpp | 16 + hotspot/src/share/vm/runtime/vmStructs.cpp | 22 + hotspot/src/share/vm/runtime/vm_version.cpp | 20 + hotspot/src/share/vm/utilities/copy.hpp | 13 + hotspot/src/share/vm/utilities/debug.cpp | 1 + .../share/vm/utilities/globalDefinitions.hpp | 12 + hotspot/src/share/vm/utilities/macros.hpp | 30 + hotspot/src/share/vm/utilities/taskqueue.hpp | 74 +- hotspot/src/share/vm/utilities/vmError.cpp | 14 +- .../argumentcorruption/Test8167409.sh | 18 + .../testcases/GenericTestCaseForOtherCPU.java | 5 +- .../sha/predicate/IntrinsicPredicates.java | 10 +- hotspot/test/runtime/6929067/Test6929067.sh | 4 + hotspot/test/runtime/Unsafe/RangeCheck.java | 1 + hotspot/test/test_env.sh | 23 + .../com/oracle/java/testlibrary/Platform.java | 8 + ...stMutuallyExclusivePlatformPredicates.java | 2 +- jdk/make/Images.gmk | 20 + jdk/make/gensrc/GensrcMisc.gmk | 7 + jdk/make/gensrc/GensrcMisc.gmk.orig | 172 + jdk/make/lib/SoundLibraries.gmk | 14 + .../classes/sun/misc/Version.java.template | 10 + .../sun/misc/Version.java.template.orig | 367 + jdk/src/solaris/bin/loongarch64/jvm.cfg | 36 + jdk/src/solaris/bin/mips64/jvm.cfg | 36 + .../jdk/jfr/event/os/TestCPUInformation.java | 4 +- .../bootstrap/linux-loongarch64/launcher | 0 .../bootstrap/linux-mips64el/launcher | 0 jdk/test/sun/security/pkcs11/PKCS11Test.java | 8 + .../sun/security/pkcs11/PKCS11Test.java.orig | 704 + 413 files changed, 112423 insertions(+), 205 deletions(-) create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java create mode 100644 hotspot/make/linux/makefiles/loongarch64.make create mode 100644 hotspot/make/linux/makefiles/mips64.make create mode 100644 hotspot/make/linux/platform_loongarch64 create mode 100644 hotspot/make/linux/platform_mips64 create mode 100644 hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/jni_loongarch.h create mode 100644 hotspot/src/cpu/loongarch/vm/loongarch.ad create mode 100644 hotspot/src/cpu/loongarch/vm/loongarch_64.ad create mode 100644 hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/register_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/register_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp create mode 100644 hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp create mode 100644 hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/assembler_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/assembler_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp create mode 100644 hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp create mode 100644 hotspot/src/cpu/mips/vm/bytecodes_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/bytecodes_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/bytes_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/c2_globals_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/c2_init_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/compiledIC_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/copy_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/debug_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/depChecker_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/depChecker_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/disassembler_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/frame_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/frame_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/frame_mips.inline.hpp create mode 100644 hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/globals_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/icBuffer_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/icache_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/icache_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp create mode 100644 hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/interpreter_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/jniTypes_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/jni_mips.h create mode 100644 hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp create mode 100644 hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/methodHandles_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/methodHandles_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/mips.ad create mode 100644 hotspot/src/cpu/mips/vm/mips_64.ad create mode 100644 hotspot/src/cpu/mips/vm/nativeInst_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/nativeInst_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/registerMap_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/register_definitions_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/register_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/register_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/relocInfo_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/relocInfo_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/runtime_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp create mode 100644 hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/templateTable_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp create mode 100644 hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp create mode 100644 hotspot/src/cpu/mips/vm/vmStructs_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/vm_version_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/vm_version_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/vmreg_mips.cpp create mode 100644 hotspot/src/cpu/mips/vm/vmreg_mips.hpp create mode 100644 hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp create mode 100644 hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad create mode 100644 hotspot/src/os_cpu/linux_mips/vm/linux_mips.s create mode 100644 hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad create mode 100644 hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp create mode 100644 hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp create mode 100644 jdk/make/gensrc/GensrcMisc.gmk.orig create mode 100644 jdk/src/share/classes/sun/misc/Version.java.template.orig create mode 100644 jdk/src/solaris/bin/loongarch64/jvm.cfg create mode 100644 jdk/src/solaris/bin/mips64/jvm.cfg create mode 100755 jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher create mode 100644 jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher create mode 100644 jdk/test/sun/security/pkcs11/PKCS11Test.java.orig diff --git a/common/autoconf/build-aux/autoconf-config.guess b/common/autoconf/build-aux/autoconf-config.guess index 15ee4389269..3d7555b52d3 100644 --- a/common/autoconf/build-aux/autoconf-config.guess +++ b/common/autoconf/build-aux/autoconf-config.guess @@ -977,6 +977,9 @@ EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'` test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } ;; + loongarch64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; or32:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; diff --git a/common/autoconf/build-aux/autoconf-config.sub b/common/autoconf/build-aux/autoconf-config.sub index 1aab2b303e3..bd910bddbe1 100644 --- a/common/autoconf/build-aux/autoconf-config.sub +++ b/common/autoconf/build-aux/autoconf-config.sub @@ -275,6 +275,7 @@ case $basic_machine in | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ + | loongarch | loongarch64 \ | m32c | m32r | m32rle | m68000 | m68k | m88k \ | maxq | mb | microblaze | mcore | mep \ | mips | mipsbe | mipseb | mipsel | mipsle \ diff --git a/common/autoconf/build-aux/config.guess b/common/autoconf/build-aux/config.guess index 355c91e4ebb..d03d029ce39 100644 --- a/common/autoconf/build-aux/config.guess +++ b/common/autoconf/build-aux/config.guess @@ -86,4 +86,15 @@ if [ "x$OUT" = x ]; then fi fi +# Test and fix little endian MIPS. +if [ "x$OUT" = x ]; then + if [ `uname -s` = Linux ]; then + if [ `uname -m` = mipsel ]; then + OUT=mipsel-unknown-linux-gnu + elif [ `uname -m` = mips64el ]; then + OUT=mips64el-unknown-linux-gnu + fi + fi +fi + echo $OUT diff --git a/common/autoconf/configure.ac b/common/autoconf/configure.ac index 151e5a109f8..5072409dd4c 100644 --- a/common/autoconf/configure.ac +++ b/common/autoconf/configure.ac @@ -23,6 +23,12 @@ # questions. # +# +# This file has been modified by Loongson Technology in 2018. These +# modifications are Copyright (c) 2018 Loongson Technology, and are made +# available on the same license terms set forth above. +# + ############################################################################### # # Includes and boilerplate @@ -186,6 +192,7 @@ FLAGS_SETUP_INIT_FLAGS # Now we can test some aspects on the target using configure macros. PLATFORM_SETUP_OPENJDK_TARGET_BITS PLATFORM_SETUP_OPENJDK_TARGET_ENDIANNESS +GET_BUILDER_AND_HOST_DATA # Configure flags for the tools FLAGS_SETUP_COMPILER_FLAGS_FOR_LIBS diff --git a/common/autoconf/generated-configure.sh b/common/autoconf/generated-configure.sh index a89cb30f373..19b18ece406 100644 --- a/common/autoconf/generated-configure.sh +++ b/common/autoconf/generated-configure.sh @@ -716,6 +716,9 @@ SET_EXECUTABLE_ORIGIN SHARED_LIBRARY_FLAGS CXX_FLAG_REORDER C_FLAG_REORDER +HOST_NAME +BUILDER_NAME +BUILDER_ID SYSROOT_LDFLAGS SYSROOT_CFLAGS RC_FLAGS @@ -4069,6 +4072,12 @@ fi # questions. # +# +# This file has been modified by Loongson Technology in 2022. These +# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made +# available on the same license terms set forth above. +# + # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD. # Converts autoconf style CPU name to OpenJDK style, into # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN. @@ -13918,6 +13927,18 @@ test -n "$target_alias" && VAR_CPU_BITS=64 VAR_CPU_ENDIAN=big ;; + mips64el) + VAR_CPU=mips64 + VAR_CPU_ARCH=mips + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=little + ;; + loongarch64) + VAR_CPU=loongarch64 + VAR_CPU_ARCH=loongarch + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=little + ;; *) as_fn_error $? "unsupported cpu $build_cpu" "$LINENO" 5 ;; @@ -14056,6 +14077,18 @@ $as_echo "$OPENJDK_BUILD_OS-$OPENJDK_BUILD_CPU" >&6; } VAR_CPU_BITS=64 VAR_CPU_ENDIAN=big ;; + mips64el) + VAR_CPU=mips64 + VAR_CPU_ARCH=mips + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=little + ;; + loongarch64) + VAR_CPU=loongarch64 + VAR_CPU_ARCH=loongarch + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=little + ;; *) as_fn_error $? "unsupported cpu $host_cpu" "$LINENO" 5 ;; @@ -14178,6 +14211,8 @@ $as_echo "$COMPILE_TYPE" >&6; } OPENJDK_TARGET_CPU_LEGACY_LIB="i386" elif test "x$OPENJDK_TARGET_CPU" = xx86_64; then OPENJDK_TARGET_CPU_LEGACY_LIB="amd64" + elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then + OPENJDK_TARGET_CPU_LEGACY_LIB="mips64el" fi @@ -14211,6 +14246,9 @@ $as_echo "$COMPILE_TYPE" >&6; } elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then # On all platforms except macosx, we replace x86_64 with amd64. OPENJDK_TARGET_CPU_OSARCH="amd64" + elif test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then + # System.getProperty("os.arch"): mips64 -> mips64el + OPENJDK_TARGET_CPU_OSARCH="mips64el" fi @@ -14220,6 +14258,8 @@ $as_echo "$COMPILE_TYPE" >&6; } elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then # On all platforms except macosx, we replace x86_64 with amd64. OPENJDK_TARGET_CPU_JLI="amd64" + elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then + OPENJDK_TARGET_CPU_JLI="mips64el" fi # Now setup the -D flags for building libjli. OPENJDK_TARGET_CPU_JLI_CFLAGS="-DLIBARCHNAME='\"$OPENJDK_TARGET_CPU_JLI\"'" @@ -14232,6 +14272,9 @@ $as_echo "$COMPILE_TYPE" >&6; } elif test "x$OPENJDK_TARGET_OS" = xmacosx && test "x$TOOLCHAIN_TYPE" = xclang ; then OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -stdlib=libc++ -mmacosx-version-min=\$(MACOSX_VERSION_MIN)" fi + if test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then + OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -DLIBARCH32NAME='\"mips32el\"' -DLIBARCH64NAME='\"mips64el\"'" + fi # Setup OPENJDK_TARGET_OS_API_DIR, used in source paths. @@ -42412,6 +42455,47 @@ $as_echo "$ac_cv_c_bigendian" >&6; } fi +BUILDER_NAME="$build_os" +BUILDER_ID="Custom build ($(date))" +if test -f /etc/issue; then + etc_issue_info=`cat /etc/issue` + if test -n "$etc_issue_info"; then + BUILDER_NAME=`cat /etc/issue | head -n 1 | cut -d " " -f 1` + fi +fi +if test -f /etc/redhat-release; then + etc_issue_info=`cat /etc/redhat-release` + if test -n "$etc_issue_info"; then + BUILDER_NAME=`cat /etc/redhat-release | head -n 1 | cut -d " " -f 1` + fi +fi +if test -f /etc/neokylin-release; then + etc_issue_info=`cat /etc/neokylin-release` + if test -n "$etc_issue_info"; then + BUILDER_NAME=`cat /etc/neokylin-release | head -n 1 | cut -d " " -f 1` + fi +fi +if test -z "$BUILDER_NAME"; then + BUILDER_NAME="unknown" +fi +BUILDER_NAME=`echo $BUILDER_NAME | sed -r "s/-//g"` +if test -n "$OPENJDK_TARGET_CPU_OSARCH"; then + HOST_NAME="$OPENJDK_TARGET_CPU_OSARCH" +else + HOST_NAME="unknown" +fi +if test -f "/usr/bin/cpp"; then + # gcc_with_arch_info=`gcc -v 2>&1 | grep '\-\-with-arch=' | sed 's/.*--with-arch=//;s/ .*$//'` + gcc_with_arch_info=`cpp -dM /dev/null | grep '\<_MIPS_ARCH\>' | sed 's/^#define _MIPS_ARCH "//;s/"$//'` + if test -n "$gcc_with_arch_info"; then + HOST_NAME="$gcc_with_arch_info" + fi +fi + + + + + # Configure flags for the tools ############################################################################### diff --git a/common/autoconf/platform.m4 b/common/autoconf/platform.m4 index 51df988f619..51cc28c312f 100644 --- a/common/autoconf/platform.m4 +++ b/common/autoconf/platform.m4 @@ -23,6 +23,12 @@ # questions. # +# +# This file has been modified by Loongson Technology in 2022. These +# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made +# available on the same license terms set forth above. +# + # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD. # Converts autoconf style CPU name to OpenJDK style, into # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN. @@ -96,6 +102,18 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU], VAR_CPU_BITS=64 VAR_CPU_ENDIAN=big ;; + mips64el) + VAR_CPU=mips64 + VAR_CPU_ARCH=mips + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=little + ;; + loongarch64) + VAR_CPU=loongarch64 + VAR_CPU_ARCH=loongarch + VAR_CPU_BITS=64 + VAR_CPU_ENDIAN=little + ;; *) AC_MSG_ERROR([unsupported cpu $1]) ;; @@ -283,6 +301,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS], OPENJDK_TARGET_CPU_LEGACY_LIB="i386" elif test "x$OPENJDK_TARGET_CPU" = xx86_64; then OPENJDK_TARGET_CPU_LEGACY_LIB="amd64" + elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then + OPENJDK_TARGET_CPU_LEGACY_LIB="mips64el" fi AC_SUBST(OPENJDK_TARGET_CPU_LEGACY_LIB) @@ -316,6 +336,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS], elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then # On all platforms except macosx, we replace x86_64 with amd64. OPENJDK_TARGET_CPU_OSARCH="amd64" + elif test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then + # System.getProperty("os.arch"): mips64 -> mips64el + OPENJDK_TARGET_CPU_OSARCH="mips64el" fi AC_SUBST(OPENJDK_TARGET_CPU_OSARCH) @@ -325,6 +348,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS], elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then # On all platforms except macosx, we replace x86_64 with amd64. OPENJDK_TARGET_CPU_JLI="amd64" + elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then + OPENJDK_TARGET_CPU_JLI="mips64el" fi # Now setup the -D flags for building libjli. OPENJDK_TARGET_CPU_JLI_CFLAGS="-DLIBARCHNAME='\"$OPENJDK_TARGET_CPU_JLI\"'" @@ -337,6 +362,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS], elif test "x$OPENJDK_TARGET_OS" = xmacosx && test "x$TOOLCHAIN_TYPE" = xclang ; then OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -stdlib=libc++ -mmacosx-version-min=\$(MACOSX_VERSION_MIN)" fi + if test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then + OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -DLIBARCH32NAME='\"mips32el\"' -DLIBARCH64NAME='\"mips64el\"'" + fi AC_SUBST(OPENJDK_TARGET_CPU_JLI_CFLAGS) # Setup OPENJDK_TARGET_OS_API_DIR, used in source paths. @@ -550,3 +578,46 @@ AC_DEFUN_ONCE([PLATFORM_SETUP_OPENJDK_TARGET_ENDIANNESS], AC_MSG_ERROR([The tested endian in the target ($ENDIAN) differs from the endian expected to be found in the target ($OPENJDK_TARGET_CPU_ENDIAN)]) fi ]) + +AC_DEFUN([GET_BUILDER_AND_HOST_DATA], +[ +BUILDER_NAME="$build_os" +BUILDER_ID="Custom build ($(date))" +if test -f /etc/issue; then + etc_issue_info=`cat /etc/issue` + if test -n "$etc_issue_info"; then + BUILDER_NAME=`cat /etc/issue | head -n 1 | cut -d " " -f 1` + fi +fi +if test -f /etc/redhat-release; then + etc_issue_info=`cat /etc/redhat-release` + if test -n "$etc_issue_info"; then + BUILDER_NAME=`cat /etc/redhat-release | head -n 1 | cut -d " " -f 1` + fi +fi +if test -f /etc/neokylin-release; then + etc_issue_info=`cat /etc/neokylin-release` + if test -n "$etc_issue_info"; then + BUILDER_NAME=`cat /etc/neokylin-release | head -n 1 | cut -d " " -f 1` + fi +fi +if test -z "$BUILDER_NAME"; then + BUILDER_NAME="unknown" +fi +BUILDER_NAME=`echo $BUILDER_NAME | sed -r "s/-//g"` +if test -n "$OPENJDK_TARGET_CPU_OSARCH"; then + HOST_NAME="$OPENJDK_TARGET_CPU_OSARCH" +else + HOST_NAME="unknown" +fi +if test -f "/usr/bin/cpp"; then + # gcc_with_arch_info=`gcc -v 2>&1 | grep '\-\-with-arch=' | sed 's/.*--with-arch=//;s/ .*$//'` + gcc_with_arch_info=`cpp -dM /dev/null | grep '\<_MIPS_ARCH\>' | sed 's/^#define _MIPS_ARCH "//;s/"$//'` + if test -n "$gcc_with_arch_info"; then + HOST_NAME="$gcc_with_arch_info" + fi +fi +AC_SUBST(BUILDER_ID) +AC_SUBST(BUILDER_NAME) +AC_SUBST(HOST_NAME) +]) diff --git a/common/autoconf/spec.gmk.in b/common/autoconf/spec.gmk.in index 0263c4dbb30..9d039103086 100644 --- a/common/autoconf/spec.gmk.in +++ b/common/autoconf/spec.gmk.in @@ -23,6 +23,12 @@ # questions. # +# +# This file has been modified by Loongson Technology in 2023. These +# modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made +# available on the same license terms set forth above. +# + # Configured @DATE_WHEN_CONFIGURED@ to build # for target system @OPENJDK_TARGET_OS@-@OPENJDK_TARGET_CPU@ # (called @OPENJDK_TARGET_AUTOCONF_NAME@ by autoconf) @@ -219,6 +225,23 @@ else endif JRE_RELEASE_VERSION:=$(FULL_VERSION) +# Build OS and host values for use in Loongson OpenJDK release +BUILDER_ID:=@BUILDER_ID@ +BUILDER_NAME:=@BUILDER_NAME@ +HOST_NAME:=@HOST_NAME@ + +# Loongson OpenJDK Version info +VER=8.1.18 +ifeq ($(HOST_NAME), ) + HOST_NAME=unknown +endif +ifeq ($(BUILDER_NAME), ) + BUILDER_NAME=unknown +endif +HOST_NAME_STRING=-$(HOST_NAME) +BUILDER_NAME_STRING=-$(BUILDER_NAME) +LOONGSON_RUNTIME_NAME=Loongson $(VER)$(HOST_NAME_STRING)$(BUILDER_NAME_STRING) + # How to compile the code: release, fastdebug or slowdebug DEBUG_LEVEL:=@DEBUG_LEVEL@ diff --git a/hotspot/agent/make/saenv.sh b/hotspot/agent/make/saenv.sh index ab9a0a431c4..a2de3fc3291 100644 --- a/hotspot/agent/make/saenv.sh +++ b/hotspot/agent/make/saenv.sh @@ -23,6 +23,12 @@ # # +# +# This file has been modified by Loongson Technology in 2020. These +# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made +# available on the same license terms set forth above. +# + # This file sets common environment variables for all SA scripts OS=`uname` @@ -42,6 +48,14 @@ if [ "$OS" = "Linux" ]; then SA_LIBPATH=$STARTDIR/../src/os/linux/amd64:$STARTDIR/linux/amd64 OPTIONS="-Dsa.library.path=$SA_LIBPATH" CPU=amd64 + elif [ "$ARCH" = "mips64" ] ; then + SA_LIBPATH=$STARTDIR/../src/os/linux/mips:$STARTDIR/linux/mips + OPTIONS="-Dsa.library.path=$SA_LIBPATH" + CPU=mips + elif [ "$ARCH" = "loongarch64" ] ; then + SA_LIBPATH=$STARTDIR/../src/os/linux/loongarch64:$STARTDIR/linux/loongarch64 + OPTIONS="-Dsa.library.path=$SA_LIBPATH" + CPU=loongarch64 else SA_LIBPATH=$STARTDIR/../src/os/linux/i386:$STARTDIR/linux/i386 OPTIONS="-Dsa.library.path=$SA_LIBPATH" diff --git a/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c b/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c index d6a0c7d9a93..b3b1380b298 100644 --- a/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c +++ b/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c @@ -22,6 +22,13 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + * + */ + #include #include "libproc.h" @@ -49,10 +56,18 @@ #include "sun_jvm_hotspot_debugger_sparc_SPARCThreadContext.h" #endif +#if defined(mips64el) || defined(mips64) +#include "sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext.h" +#endif + #ifdef aarch64 #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h" #endif +#ifdef loongarch64 +#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h" +#endif + static jfieldID p_ps_prochandle_ID = 0; static jfieldID threadList_ID = 0; static jfieldID loadObjectList_ID = 0; @@ -337,7 +352,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo return (err == PS_OK)? array : 0; } -#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(aarch64) +#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(aarch64) || defined(loongarch64) JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0 (JNIEnv *env, jobject this_obj, jint lwp_id) { @@ -364,6 +379,12 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo #endif #if defined(sparc) || defined(sparcv9) #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG +#endif +#ifdef loongarch64 +#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG +#endif +#if defined(mips64) || defined(mips64el) +#define NPRGREG sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_NPRGREG #endif array = (*env)->NewLongArray(env, NPRGREG); @@ -470,6 +491,55 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo } #endif /* aarch64 */ +#if defined(loongarch64) + +#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg + + { + int i; + for (i = 0; i < 31; i++) + regs[i] = gregs.regs[i]; + regs[REG_INDEX(PC)] = gregs.csr_era; + } +#endif /* loongarch64 */ +#if defined(mips64) || defined(mips64el) + +#define REG_INDEX(reg) sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_##reg + + regs[REG_INDEX(ZERO)] = gregs.regs[0]; + regs[REG_INDEX(AT)] = gregs.regs[1]; + regs[REG_INDEX(V0)] = gregs.regs[2]; + regs[REG_INDEX(V1)] = gregs.regs[3]; + regs[REG_INDEX(A0)] = gregs.regs[4]; + regs[REG_INDEX(A1)] = gregs.regs[5]; + regs[REG_INDEX(A2)] = gregs.regs[6]; + regs[REG_INDEX(A3)] = gregs.regs[7]; + regs[REG_INDEX(T0)] = gregs.regs[8]; + regs[REG_INDEX(T1)] = gregs.regs[9]; + regs[REG_INDEX(T2)] = gregs.regs[10]; + regs[REG_INDEX(T3)] = gregs.regs[11]; + regs[REG_INDEX(T4)] = gregs.regs[12]; + regs[REG_INDEX(T5)] = gregs.regs[13]; + regs[REG_INDEX(T6)] = gregs.regs[14]; + regs[REG_INDEX(T7)] = gregs.regs[15]; + regs[REG_INDEX(S0)] = gregs.regs[16]; + regs[REG_INDEX(S1)] = gregs.regs[17]; + regs[REG_INDEX(S2)] = gregs.regs[18]; + regs[REG_INDEX(S3)] = gregs.regs[19]; + regs[REG_INDEX(S4)] = gregs.regs[20]; + regs[REG_INDEX(S5)] = gregs.regs[21]; + regs[REG_INDEX(S6)] = gregs.regs[22]; + regs[REG_INDEX(S7)] = gregs.regs[23]; + regs[REG_INDEX(T8)] = gregs.regs[24]; + regs[REG_INDEX(T9)] = gregs.regs[25]; + regs[REG_INDEX(K0)] = gregs.regs[26]; + regs[REG_INDEX(K1)] = gregs.regs[27]; + regs[REG_INDEX(GP)] = gregs.regs[28]; + regs[REG_INDEX(SP)] = gregs.regs[29]; + regs[REG_INDEX(FP)] = gregs.regs[30]; + regs[REG_INDEX(S8)] = gregs.regs[30]; + regs[REG_INDEX(RA)] = gregs.regs[31]; +#endif /* mips64 */ (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT); return array; diff --git a/hotspot/agent/src/os/linux/Makefile b/hotspot/agent/src/os/linux/Makefile index c0b5c869c1f..2cc50b6fab1 100644 --- a/hotspot/agent/src/os/linux/Makefile +++ b/hotspot/agent/src/os/linux/Makefile @@ -22,7 +22,13 @@ # # -ARCH := $(shell if ([ `uname -m` = "ia64" ]) ; then echo ia64 ; elif ([ `uname -m` = "x86_64" ]) ; then echo amd64; elif ([ `uname -m` = "sparc64" ]) ; then echo sparc; else echo i386 ; fi ) +# +# This file has been modified by Loongson Technology in 2020. These +# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made +# available on the same license terms set forth above. +# + +ARCH := $(shell if ([ `uname -m` = "ia64" ]) ; then echo ia64 ; elif ([ `uname -m` = "mips64el" ]) ; then echo mips64 ; elif ([ `uname -m` = "x86_64" ]) ; then echo amd64; elif ([ `uname -m` = "sparc64" ]) ; then echo sparc; else echo i386 ; fi ) GCC = gcc JAVAH = ${JAVA_HOME}/bin/javah @@ -53,6 +59,8 @@ $(ARCH)/LinuxDebuggerLocal.o: LinuxDebuggerLocal.c $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \ sun.jvm.hotspot.debugger.x86.X86ThreadContext \ sun.jvm.hotspot.debugger.sparc.SPARCThreadContext \ + sun.jvm.hotspot.debugger.mips64.MIPS64ThreadContext \ + sun.jvm.hotspot.debugger.loongarch64.LOONGARCH64ThreadContext \ sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext \ sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext $(GCC) $(CFLAGS) $< -o $@ diff --git a/hotspot/agent/src/os/linux/libproc.h b/hotspot/agent/src/os/linux/libproc.h index 6b6e41cab47..5eb8211aa93 100644 --- a/hotspot/agent/src/os/linux/libproc.h +++ b/hotspot/agent/src/os/linux/libproc.h @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef _LIBPROC_H_ #define _LIBPROC_H_ @@ -36,7 +42,7 @@ #include -#if defined(aarch64) +#if defined(aarch64) || defined(loongarch64) #include "asm/ptrace.h" #endif @@ -76,7 +82,12 @@ combination of ptrace and /proc calls. #include #define user_regs_struct pt_regs #endif -#if defined(aarch64) + +#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el) +#include +#define user_regs_struct pt_regs +#endif +#if defined(aarch64) || defined(loongarch64) #define user_regs_struct user_pt_regs #endif diff --git a/hotspot/agent/src/os/linux/ps_proc.c b/hotspot/agent/src/os/linux/ps_proc.c index c4d6a9ecc5d..7000e927235 100644 --- a/hotspot/agent/src/os/linux/ps_proc.c +++ b/hotspot/agent/src/os/linux/ps_proc.c @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include #include #include @@ -141,7 +147,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use #define PTRACE_GETREGS_REQ PT_GETREGS #endif -#ifdef PTRACE_GETREGS_REQ +#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64) if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) { print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid); return false; diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java index c963350591d..20e6f35b9cf 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + * + */ package sun.jvm.hotspot; import java.rmi.RemoteException; @@ -37,6 +43,8 @@ import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; +import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64; +import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64; import sun.jvm.hotspot.debugger.NoSuchSymbolException; import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal; import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal; @@ -594,6 +602,10 @@ private void setupDebuggerLinux() { } else { machDesc = new MachineDescriptionSPARC32Bit(); } + } else if (cpu.equals("mips64")) { + machDesc = new MachineDescriptionMIPS64(); + } else if (cpu.equals("loongarch64")) { + machDesc = new MachineDescriptionLOONGARCH64(); } else { try { machDesc = (MachineDescription) diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java index 993bf7bb477..1e075aa57ee 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java @@ -94,6 +94,12 @@ private Disassembler(long startPc, byte[] code) { } else if (arch.equals("amd64") || arch.equals("x86_64")) { path.append(sep + "lib" + sep + "amd64" + sep); libname += "-amd64.so"; + } else if (arch.equals("mips64") || arch.equals("mips64el")) { + path.append(sep + "lib" + sep + "mips64" + sep); + libname += "-mips64.so"; + } else if (arch.equals("loongarch64")) { + path.append(sep + "lib" + sep + "loongarch64" + sep); + libname += "-loongarch64.so"; } else { path.append(sep + "lib" + sep + arch + sep); libname += "-" + arch + ".so"; diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java new file mode 100644 index 00000000000..0531427dabb --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger; + +public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription { + public long getAddressSize() { + return 8; + } + + + public boolean isBigEndian() { + return false; + } + + public boolean isLP64() { + return true; + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java new file mode 100644 index 00000000000..1b49efd2017 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger; + +public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription { + public long getAddressSize() { + return 8; + } + + + public boolean isBigEndian() { + return "big".equals(System.getProperty("sun.cpu.endian")); + } + + public boolean isLP64() { + return true; + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java index f178d6a6e7f..019e794bbb4 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java @@ -32,11 +32,15 @@ import sun.jvm.hotspot.debugger.x86.*; import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.sparc.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.loongarch64.*; import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.amd64.*; import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.linux.aarch64.*; import sun.jvm.hotspot.debugger.linux.sparc.*; +import sun.jvm.hotspot.debugger.linux.mips64.*; +import sun.jvm.hotspot.debugger.linux.loongarch64.*; import sun.jvm.hotspot.utilities.*; class LinuxCDebugger implements CDebugger { @@ -106,6 +110,20 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException { Address pc = context.getRegisterAsAddress(AARCH64ThreadContext.PC); if (pc == null) return null; return new LinuxAARCH64CFrame(dbg, fp, pc); + } else if (cpu.equals("mips64")) { + MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); + Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); + if (sp == null) return null; + Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); + if (pc == null) return null; + return new LinuxMIPS64CFrame(dbg, sp, pc); + } else if (cpu.equals("loongarch64")) { + LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); + Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); + if (sp == null) return null; + Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); + if (pc == null) return null; + return new LinuxLOONGARCH64CFrame(dbg, sp, pc); } else { // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu ThreadContext context = (ThreadContext) thread.getContext(); diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java index 44c2265d7a0..3b6747ac0a3 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java @@ -30,6 +30,8 @@ import sun.jvm.hotspot.debugger.linux.ia64.*; import sun.jvm.hotspot.debugger.linux.x86.*; import sun.jvm.hotspot.debugger.linux.sparc.*; +import sun.jvm.hotspot.debugger.linux.mips64.*; +import sun.jvm.hotspot.debugger.linux.loongarch64.*; class LinuxThreadContextFactory { static ThreadContext createThreadContext(LinuxDebugger dbg) { @@ -42,6 +44,10 @@ static ThreadContext createThreadContext(LinuxDebugger dbg) { return new LinuxIA64ThreadContext(dbg); } else if (cpu.equals("sparc")) { return new LinuxSPARCThreadContext(dbg); + } else if (cpu.equals("mips64")) { + return new LinuxMIPS64ThreadContext(dbg); + } else if (cpu.equals("loongarch64")) { + return new LinuxLOONGARCH64ThreadContext(dbg); } else { try { Class tcc = Class.forName("sun.jvm.hotspot.debugger.linux." + diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java new file mode 100644 index 00000000000..3b20dbbd87e --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.linux.*; +import sun.jvm.hotspot.debugger.cdbg.*; +import sun.jvm.hotspot.debugger.cdbg.basic.*; +import sun.jvm.hotspot.debugger.loongarch64.*; + +final public class LinuxLOONGARCH64CFrame extends BasicCFrame { + // package/class internals only + public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) { + super(dbg.getCDebugger()); + this.fp = fp; + this.pc = pc; + this.dbg = dbg; + } + + // override base class impl to avoid ELF parsing + public ClosestSymbol closestSymbolToPC() { + // try native lookup in debugger. + return dbg.lookup(dbg.getAddressValue(pc())); + } + + public Address pc() { + return pc; + } + + public Address localVariableBase() { + return fp; + } + + public CFrame sender(ThreadProxy thread) { + LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext(); + Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); + + if ((fp == null) || fp.lessThan(sp)) { + return null; + } + + Address nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE); + if (nextFP == null) { + return null; + } + Address nextPC = fp.getAddressAt(-1 * ADDRESS_SIZE); + if (nextPC == null) { + return null; + } + return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC); + } + + private static final int ADDRESS_SIZE = 8; + private Address pc; + private Address fp; + private LinuxDebugger dbg; +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java new file mode 100644 index 00000000000..9f22133eaff --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.debugger.linux.*; + +public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { + private LinuxDebugger debugger; + + public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java new file mode 100644 index 00000000000..2e3eb564da2 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.linux.*; +import sun.jvm.hotspot.debugger.cdbg.*; +import sun.jvm.hotspot.debugger.cdbg.basic.*; +import sun.jvm.hotspot.debugger.mips64.*; + +final public class LinuxMIPS64CFrame extends BasicCFrame { + // package/class internals only + public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) { + super(dbg.getCDebugger()); + this.ebp = ebp; + this.pc = pc; + this.dbg = dbg; + } + + // override base class impl to avoid ELF parsing + public ClosestSymbol closestSymbolToPC() { + // try native lookup in debugger. + return dbg.lookup(dbg.getAddressValue(pc())); + } + + public Address pc() { + return pc; + } + + public Address localVariableBase() { + return ebp; + } + + public CFrame sender(ThreadProxy thread) { + MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext(); + Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); + + if ( (ebp == null) || ebp.lessThan(esp) ) { + return null; + } + + Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE); + if (nextEBP == null) { + return null; + } + Address nextPC = ebp.getAddressAt( 1 * ADDRESS_SIZE); + if (nextPC == null) { + return null; + } + return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC); + } + + private static final int ADDRESS_SIZE = 4; + private Address pc; + private Address ebp; + private LinuxDebugger dbg; +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java new file mode 100644 index 00000000000..98e0f3f0bcf --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.linux.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.linux.*; + +public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext { + private LinuxDebugger debugger; + + public LinuxMIPS64ThreadContext(LinuxDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java new file mode 100644 index 00000000000..90b0cf97e35 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.cdbg.*; + +/** Specifies the thread context on loongarch64 platforms; only a sub-portion + of the context is guaranteed to be present on all operating + systems. */ + +public abstract class LOONGARCH64ThreadContext implements ThreadContext { + + // NOTE: the indices for the various registers must be maintained as + // listed across various operating systems. However, only a small + // subset of the registers' values are guaranteed to be present (and + // must be present for the SA's stack walking to work): EAX, EBX, + // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. + + public static final int ZERO = 0; + public static final int RA = 1; + public static final int TP = 2; + public static final int SP = 3; + public static final int A0 = 4; + public static final int A1 = 5; + public static final int A2 = 6; + public static final int A3 = 7; + public static final int A4 = 8; + public static final int A5 = 9; + public static final int A6 = 10; + public static final int A7 = 11; + public static final int T0 = 12; + public static final int T1 = 13; + public static final int T2 = 14; + public static final int T3 = 15; + public static final int T4 = 16; + public static final int T5 = 17; + public static final int T6 = 18; + public static final int T7 = 19; + public static final int T8 = 20; + public static final int RX = 21; + public static final int FP = 22; + public static final int S0 = 23; + public static final int S1 = 24; + public static final int S2 = 25; + public static final int S3 = 26; + public static final int S4 = 27; + public static final int S5 = 28; + public static final int S6 = 29; + public static final int S7 = 30; + public static final int S8 = 31; + public static final int PC = 32; + public static final int NPRGREG = 33; + + private static final String[] regNames = { + "ZERO", "RA", "TP", "SP", + "A0", "A1", "A2", "A3", + "A4", "A5", "A6", "A7", + "T0", "T1", "T2", "T3", + "T4", "T5", "T6", "T7", + "T8", "RX", "FP", "S0", + "S1", "S2", "S3", "S4", + "S5", "S6", "S7", "S8", + "PC" + }; + + private long[] data; + + public LOONGARCH64ThreadContext() { + data = new long[NPRGREG]; + } + + public int getNumRegisters() { + return NPRGREG; + } + + public String getRegisterName(int index) { + return regNames[index]; + } + + public void setRegister(int index, long value) { + data[index] = value; + } + + public long getRegister(int index) { + return data[index]; + } + + public CFrame getTopFrame(Debugger dbg) { + return null; + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public abstract void setRegisterAsAddress(int index, Address value); + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public abstract Address getRegisterAsAddress(int index); +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java new file mode 100644 index 00000000000..c57ee9dfc97 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.cdbg.*; + +/** Specifies the thread context on mips64 platforms; only a sub-portion + of the context is guaranteed to be present on all operating + systems. */ + +public abstract class MIPS64ThreadContext implements ThreadContext { + + // NOTE: the indices for the various registers must be maintained as + // listed across various operating systems. However, only a small + // subset of the registers' values are guaranteed to be present (and + // must be present for the SA's stack walking to work): EAX, EBX, + // ECX, EDX, ESI, EDI, EBP, ESP, and EIP. + + public static final int ZERO = 0; + public static final int AT = 1; + public static final int V0 = 2; + public static final int V1 = 3; + public static final int A0 = 4; + public static final int A1 = 5; + public static final int A2 = 6; + public static final int A3 = 7; + public static final int T0 = 8; + public static final int T1 = 9; + public static final int T2 = 10; + public static final int T3 = 11; + public static final int T4 = 12; + public static final int T5 = 13; + public static final int T6 = 14; + public static final int T7 = 15; + public static final int S0 = 16; + public static final int S1 = 17; + public static final int S2 = 18; + public static final int S3 = 19; + public static final int S4 = 20; + public static final int S5 = 21; + public static final int S6 = 22; + public static final int S7 = 23; + public static final int T8 = 24; + public static final int T9 = 25; + public static final int K0 = 26; + public static final int K1 = 27; + public static final int GP = 28; + public static final int SP = 29; + public static final int FP = 30; + public static final int RA = 31; + public static final int PC = 32; + public static final int NPRGREG = 33; + + private static final String[] regNames = { + "ZERO", "AT", "V0", "V1", + "A0", "A1", "A2", "A3", + "T0", "T1", "T2", "T3", + "T4", "T5", "T6", "T7", + "S0", "S1", "S2", "S3", + "S4", "S5", "S6", "S7", + "T8", "T9", "K0", "K1", + "GP", "SP", "FP", "RA", + "PC" + }; + + private long[] data; + + public MIPS64ThreadContext() { + data = new long[NPRGREG]; + } + + public int getNumRegisters() { + return NPRGREG; + } + + public String getRegisterName(int index) { + return regNames[index]; + } + + public void setRegister(int index, long value) { + data[index] = value; + } + + public long getRegister(int index) { + return data[index]; + } + + public CFrame getTopFrame(Debugger dbg) { + return null; + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public abstract void setRegisterAsAddress(int index, Address value); + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public abstract Address getRegisterAsAddress(int index); +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java index 7113a3a497b..24273888c2d 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java @@ -63,6 +63,8 @@ public interface ELFHeader { public static final int ARCH_i860 = 7; /** MIPS architecture type. */ public static final int ARCH_MIPS = 8; + /** LOONGARCH architecture type. */ + public static final int ARCH_LOONGARCH = 9; /** Returns a file type which is defined by the file type constants. */ public short getFileType(); diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java index ca1a2575ff4..2afa6c55f88 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java @@ -34,10 +34,14 @@ import sun.jvm.hotspot.debugger.proc.aarch64.*; import sun.jvm.hotspot.debugger.proc.sparc.*; import sun.jvm.hotspot.debugger.proc.x86.*; +import sun.jvm.hotspot.debugger.proc.mips64.*; +import sun.jvm.hotspot.debugger.proc.loongarch64.*; import sun.jvm.hotspot.debugger.amd64.*; import sun.jvm.hotspot.debugger.aarch64.*; import sun.jvm.hotspot.debugger.sparc.*; import sun.jvm.hotspot.debugger.x86.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.loongarch64.*; import sun.jvm.hotspot.utilities.*; /**

An implementation of the JVMDebugger interface which sits on @@ -92,6 +96,14 @@ public ProcDebuggerLocal(MachineDescription machDesc, boolean useCache) { threadFactory = new ProcAARCH64ThreadFactory(this); pcRegIndex = AARCH64ThreadContext.PC; fpRegIndex = AARCH64ThreadContext.FP; + } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { + threadFactory = new ProcMIPS64ThreadFactory(this); + pcRegIndex = MIPS64ThreadContext.PC; + fpRegIndex = MIPS64ThreadContext.FP; + } else if (cpu.equals("loongarch64")) { + threadFactory = new ProcLOONGARCH64ThreadFactory(this); + pcRegIndex = LOONGARCH64ThreadContext.PC; + fpRegIndex = LOONGARCH64ThreadContext.FP; } else { try { Class tfc = Class.forName("sun.jvm.hotspot.debugger.proc." + diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java new file mode 100644 index 00000000000..42a31e3486c --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.debugger.proc.*; +import sun.jvm.hotspot.utilities.*; + +public class ProcLOONGARCH64Thread implements ThreadProxy { + private ProcDebugger debugger; + private int id; + + public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) { + this.debugger = debugger; + + // FIXME: the size here should be configurable. However, making it + // so would produce a dependency on the "types" package from the + // debugger package, which is not desired. + this.id = (int) addr.getCIntegerAt(0, 4, true); + } + + public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) { + this.debugger = debugger; + this.id = (int) id; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger); + long[] regs = debugger.getThreadIntegerRegisterSet(id); + /* + _NGREG in reg.h is defined to be 19. Because we have included + debug registers LOONGARCH64ThreadContext.NPRGREG is 25. + */ + + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext context) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public String toString() { + return "t@" + id; + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) { + return false; + } + + return (((ProcLOONGARCH64Thread) obj).id == id); + } + + public int hashCode() { + return id; + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java new file mode 100644 index 00000000000..9054f16506a --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { + private ProcDebugger debugger; + + public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java new file mode 100644 index 00000000000..bc643351244 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory { + private ProcDebugger debugger; + + public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new ProcLOONGARCH64Thread(debugger, id); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java new file mode 100644 index 00000000000..5c1e0be8932 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.proc.*; +import sun.jvm.hotspot.utilities.*; + +public class ProcMIPS64Thread implements ThreadProxy { + private ProcDebugger debugger; + private int id; + + public ProcMIPS64Thread(ProcDebugger debugger, Address addr) { + this.debugger = debugger; + + // FIXME: the size here should be configurable. However, making it + // so would produce a dependency on the "types" package from the + // debugger package, which is not desired. + this.id = (int) addr.getCIntegerAt(0, 4, true); + } + + public ProcMIPS64Thread(ProcDebugger debugger, long id) { + this.debugger = debugger; + this.id = (int) id; + } + + public ThreadContext getContext() throws IllegalThreadStateException { + ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger); + long[] regs = debugger.getThreadIntegerRegisterSet(id); + /* + _NGREG in reg.h is defined to be 19. Because we have included + debug registers MIPS64ThreadContext.NPRGREG is 25. + */ + + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } + + public boolean canSetContext() throws DebuggerException { + return false; + } + + public void setContext(ThreadContext context) + throws IllegalThreadStateException, DebuggerException { + throw new DebuggerException("Unimplemented"); + } + + public String toString() { + return "t@" + id; + } + + public boolean equals(Object obj) { + if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) { + return false; + } + + return (((ProcMIPS64Thread) obj).id == id); + } + + public int hashCode() { + return id; + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java new file mode 100644 index 00000000000..d44223d768a --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcMIPS64ThreadContext extends MIPS64ThreadContext { + private ProcDebugger debugger; + + public ProcMIPS64ThreadContext(ProcDebugger debugger) { + super(); + this.debugger = debugger; + } + + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java new file mode 100644 index 00000000000..bad478fc5ca --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.proc.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.proc.*; + +public class ProcMIPS64ThreadFactory implements ProcThreadFactory { + private ProcDebugger debugger; + + public ProcMIPS64ThreadFactory(ProcDebugger debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new ProcMIPS64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new ProcMIPS64Thread(debugger, id); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java index ffa61b548e7..9cf3ee2da33 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java @@ -33,6 +33,8 @@ import sun.jvm.hotspot.debugger.remote.sparc.*; import sun.jvm.hotspot.debugger.remote.x86.*; import sun.jvm.hotspot.debugger.remote.amd64.*; +import sun.jvm.hotspot.debugger.remote.mips64.*; +import sun.jvm.hotspot.debugger.remote.loongarch64.*; /** An implementation of Debugger which wraps a RemoteDebugger, providing remote debugging via RMI. @@ -70,6 +72,16 @@ public RemoteDebuggerClient(RemoteDebugger remoteDebugger) throws DebuggerExcept cachePageSize = 4096; cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); unalignedAccessesOkay = true; + } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { + threadFactory = new RemoteMIPS64ThreadFactory(this); + cachePageSize = 4096; + cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); + unalignedAccessesOkay = true; + } else if (cpu.equals("loongarch64")) { + threadFactory = new RemoteLOONGARCH64ThreadFactory(this); + cachePageSize = 4096; + cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize); + unalignedAccessesOkay = true; } else { try { Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." + diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java new file mode 100644 index 00000000000..01e3f8954bb --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.debugger.remote.*; +import sun.jvm.hotspot.utilities.*; + +public class RemoteLOONGARCH64Thread extends RemoteThread { + public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) { + super(debugger, addr); + } + + public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) { + super(debugger, id); + } + + public ThreadContext getContext() throws IllegalThreadStateException { + RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger); + long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : + debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java new file mode 100644 index 00000000000..ad25bccc8d2 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext { + private RemoteDebuggerClient debugger; + + public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) { + super(); + this.debugger = debugger; + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java new file mode 100644 index 00000000000..d8bf50ea5ba --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory { + private RemoteDebuggerClient debugger; + + public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new RemoteLOONGARCH64Thread(debugger, id); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java new file mode 100644 index 00000000000..a9285a3b946 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.remote.*; +import sun.jvm.hotspot.utilities.*; + +public class RemoteMIPS64Thread extends RemoteThread { + public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) { + super(debugger, addr); + } + + public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) { + super(debugger, id); + } + + public ThreadContext getContext() throws IllegalThreadStateException { + RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger); + long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) : + debugger.getThreadIntegerRegisterSet(id); + if (Assert.ASSERTS_ENABLED) { + Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match"); + } + for (int i = 0; i < regs.length; i++) { + context.setRegister(i, regs[i]); + } + return context; + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java new file mode 100644 index 00000000000..4d711f9ba7c --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext { + private RemoteDebuggerClient debugger; + + public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) { + super(); + this.debugger = debugger; + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public void setRegisterAsAddress(int index, Address value) { + setRegister(index, debugger.getAddressValue(value)); + } + + /** This can't be implemented in this class since we would have to + tie the implementation to, for example, the debugging system */ + public Address getRegisterAsAddress(int index) { + return debugger.newAddress(getRegister(index)); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java new file mode 100644 index 00000000000..020a2f1ff96 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger.remote.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.remote.*; + +public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory { + private RemoteDebuggerClient debugger; + + public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) { + this.debugger = debugger; + } + + public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) { + return new RemoteMIPS64Thread(debugger, threadIdentifierAddr); + } + + public ThreadProxy createThreadWrapper(long id) { + return new RemoteMIPS64Thread(debugger, id); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java index 842a3b357dd..81efdd02f86 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java @@ -34,6 +34,8 @@ import sun.jvm.hotspot.runtime.win32_x86.Win32X86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess; +import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess; +import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess; import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess; import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess; @@ -90,6 +92,10 @@ private static synchronized void initialize(TypeDataBase db) { access = new LinuxSPARCJavaThreadPDAccess(); } else if (cpu.equals("aarch64")) { access = new LinuxAARCH64JavaThreadPDAccess(); + } else if (cpu.equals("mips64")) { + access = new LinuxMIPS64JavaThreadPDAccess(); + } else if (cpu.equals("loongarch64")) { + access = new LinuxLOONGARCH64JavaThreadPDAccess(); } else { try { access = (JavaThreadPDAccess) diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java new file mode 100644 index 00000000000..77c45c2e998 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.linux_loongarch64; + +import java.io.*; +import java.util.*; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.loongarch64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; + private static AddressField osThreadField; + + // Field from OSThread + private static CIntegerField osThreadThreadIDField; + + // This is currently unneeded but is being kept in case we change + // the currentFrameGuess algorithm + private static final long GUESS_SCAN_RANGE = 128 * 1024; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaThread"); + osThreadField = type.getAddressField("_osthread"); + + Type anchorType = db.lookupType("JavaFrameAnchor"); + lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); + + Type osThreadType = db.lookupType("OSThread"); + osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); + } + + public Address getLastJavaFP(Address addr) { + return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); + } + + public Address getLastJavaPC(Address addr) { + return null; + } + + public Address getBaseOfStackPointer(Address addr) { + return null; + } + + public Frame getLastFramePD(JavaThread thread, Address addr) { + Address fp = thread.getLastJavaFP(); + if (fp == null) { + return null; // no information + } + return new LOONGARCH64Frame(thread.getLastJavaSP(), fp); + } + + public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { + return new LOONGARCH64RegisterMap(thread, updateMap); + } + + public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { + ThreadProxy t = getThreadProxy(addr); + LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); + LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread); + if (!guesser.run(GUESS_SCAN_RANGE)) { + return null; + } + if (guesser.getPC() == null) { + return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP()); + } else { + return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); + } + } + + public void printThreadIDOn(Address addr, PrintStream tty) { + tty.print(getThreadProxy(addr)); + } + + public void printInfoOn(Address threadAddr, PrintStream tty) { + tty.print("Thread id: "); + printThreadIDOn(threadAddr, tty); + // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); + } + + public Address getLastSP(Address addr) { + ThreadProxy t = getThreadProxy(addr); + LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext(); + return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); + } + + public ThreadProxy getThreadProxy(Address addr) { + // Addr is the address of the JavaThread. + // Fetch the OSThread (for now and for simplicity, not making a + // separate "OSThread" class in this package) + Address osThreadAddr = osThreadField.getValue(addr); + // Get the address of the _thread_id from the OSThread + Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); + + JVMDebugger debugger = VM.getVM().getDebugger(); + return debugger.getThreadForIdentifierAddress(threadIdAddr); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java new file mode 100644 index 00000000000..a0fd73fa673 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.linux_mips64; + +import java.io.*; +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.runtime.mips64.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess { + private static AddressField lastJavaFPField; + private static AddressField osThreadField; + + // Field from OSThread + private static CIntegerField osThreadThreadIDField; + + // This is currently unneeded but is being kept in case we change + // the currentFrameGuess algorithm + private static final long GUESS_SCAN_RANGE = 128 * 1024; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaThread"); + osThreadField = type.getAddressField("_osthread"); + + Type anchorType = db.lookupType("JavaFrameAnchor"); + lastJavaFPField = anchorType.getAddressField("_last_Java_fp"); + + Type osThreadType = db.lookupType("OSThread"); + osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id"); + } + + public Address getLastJavaFP(Address addr) { + return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset())); + } + + public Address getLastJavaPC(Address addr) { + return null; + } + + public Address getBaseOfStackPointer(Address addr) { + return null; + } + + public Frame getLastFramePD(JavaThread thread, Address addr) { + Address fp = thread.getLastJavaFP(); + if (fp == null) { + return null; // no information + } + return new MIPS64Frame(thread.getLastJavaSP(), fp); + } + + public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) { + return new MIPS64RegisterMap(thread, updateMap); + } + + public Frame getCurrentFrameGuess(JavaThread thread, Address addr) { + ThreadProxy t = getThreadProxy(addr); + MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); + MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread); + if (!guesser.run(GUESS_SCAN_RANGE)) { + return null; + } + if (guesser.getPC() == null) { + return new MIPS64Frame(guesser.getSP(), guesser.getFP()); + } else { + return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC()); + } + } + + public void printThreadIDOn(Address addr, PrintStream tty) { + tty.print(getThreadProxy(addr)); + } + + public void printInfoOn(Address threadAddr, PrintStream tty) { + tty.print("Thread id: "); + printThreadIDOn(threadAddr, tty); +// tty.println("\nPostJavaState: " + getPostJavaState(threadAddr)); + } + + public Address getLastSP(Address addr) { + ThreadProxy t = getThreadProxy(addr); + MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext(); + return context.getRegisterAsAddress(MIPS64ThreadContext.SP); + } + + public ThreadProxy getThreadProxy(Address addr) { + // Addr is the address of the JavaThread. + // Fetch the OSThread (for now and for simplicity, not making a + // separate "OSThread" class in this package) + Address osThreadAddr = osThreadField.getValue(addr); + // Get the address of the _thread_id from the OSThread + Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset()); + + JVMDebugger debugger = VM.getVM().getDebugger(); + return debugger.getThreadForIdentifierAddress(threadIdAddr); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java new file mode 100644 index 00000000000..0208e6e2241 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.loongarch64.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.interpreter.*; +import sun.jvm.hotspot.runtime.*; + +/**

Should be able to be used on all loongarch64 platforms we support + (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's + "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext; + output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the + LOONGARCH64Frame is left to the caller, since we may need to subclass + LOONGARCH64Frame to support signal handler frames on Unix platforms.

+ +

Algorithm is to walk up the stack within a given range (say, + 512K at most) looking for a plausible PC and SP for a Java frame, + also considering those coming in from the context. If we find a PC + that belongs to the VM (i.e., in generated code like the + interpreter or CodeCache) then we try to find an associated EBP. + We repeat this until we either find a complete frame or run out of + stack to look at.

*/ + +public class LOONGARCH64CurrentFrameGuess { + private LOONGARCH64ThreadContext context; + private JavaThread thread; + private Address spFound; + private Address fpFound; + private Address pcFound; + + private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") + != null; + + public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context, + JavaThread thread) { + this.context = context; + this.thread = thread; + } + + /** Returns false if not able to find a frame within a reasonable range. */ + public boolean run(long regionInBytesToSearch) { + Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP); + Address pc = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC); + Address fp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP); + if (sp == null) { + // Bail out if no last java frame eithe + if (thread.getLastJavaSP() != null) { + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + // Bail out + return false; + } + Address end = sp.addOffsetTo(regionInBytesToSearch); + VM vm = VM.getVM(); + + setValues(null, null, null); // Assume we're not going to find anything + + if (vm.isJavaPCDbg(pc)) { + if (vm.isClientCompiler()) { + // If the topmost frame is a Java frame, we are (pretty much) + // guaranteed to have a viable EBP. We should be more robust + // than this (we have the potential for losing entire threads' + // stack traces) but need to see how much work we really have + // to do here. Searching the stack for an (SP, FP) pair is + // hard since it's easy to misinterpret inter-frame stack + // pointers as base-of-frame pointers; we also don't know the + // sizes of C1 frames (not registered in the nmethod) so can't + // derive them from ESP. + + setValues(sp, fp, pc); + return true; + } else { + if (vm.getInterpreter().contains(pc)) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + + sp + ", fp = " + fp + ", pc = " + pc); + } + setValues(sp, fp, pc); + return true; + } + + // For the server compiler, EBP is not guaranteed to be valid + // for compiled code. In addition, an earlier attempt at a + // non-searching algorithm (see below) failed because the + // stack pointer from the thread context was pointing + // (considerably) beyond the ostensible end of the stack, into + // garbage; walking from the topmost frame back caused a crash. + // + // This algorithm takes the current PC as a given and tries to + // find the correct corresponding SP by walking up the stack + // and repeatedly performing stackwalks (very inefficient). + // + // FIXME: there is something wrong with stackwalking across + // adapter frames...this is likely to be the root cause of the + // failure with the simpler algorithm below. + + for (long offset = 0; + offset < regionInBytesToSearch; + offset += vm.getAddressSize()) { + try { + Address curSP = sp.addOffsetTo(offset); + Frame frame = new LOONGARCH64Frame(curSP, null, pc); + RegisterMap map = thread.newRegisterMap(false); + while (frame != null) { + if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { + // We were able to traverse all the way to the + // bottommost Java frame. + // This sp looks good. Keep it. + if (DEBUG) { + System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); + } + setValues(curSP, null, pc); + return true; + } + frame = frame.sender(map); + } + } catch (Exception e) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); + } + // Bad SP. Try another. + } + } + + // Were not able to find a plausible SP to go with this PC. + // Bail out. + return false; + + /* + // Original algorithm which does not work because SP was + // pointing beyond where it should have: + + // For the server compiler, EBP is not guaranteed to be valid + // for compiled code. We see whether the PC is in the + // interpreter and take care of that, otherwise we run code + // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame. + + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + + // See if we can derive a frame pointer from SP and PC + // NOTE: This is the code duplicated from LOONGARCH64Frame + Address saved_fp = null; + int llink_offset = cb.getLinkOffset(); + if (llink_offset >= 0) { + // Restore base-pointer, since next frame might be an interpreter frame. + Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); + saved_fp = fp_addr.getAddressAt(0); + } + + setValues(sp, saved_fp, pc); + return true; + } + */ + } + } else { + // If the current program counter was not known to us as a Java + // PC, we currently assume that we are in the run-time system + // and attempt to look to thread-local storage for saved ESP and + // EBP. Note that if these are null (because we were, in fact, + // in Java code, i.e., vtable stubs or similar, and the SA + // didn't have enough insight into the target VM to understand + // that) then we are going to lose the entire stack trace for + // the thread, which is sub-optimal. FIXME. + + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + + thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); + } + if (thread.getLastJavaSP() == null) { + return false; // No known Java frames on stack + } + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + } + + public Address getSP() { return spFound; } + public Address getFP() { return fpFound; } + /** May be null if getting values from thread-local storage; take + care to call the correct LOONGARCH64Frame constructor to recover this if + necessary */ + public Address getPC() { return pcFound; } + + private void setValues(Address sp, Address fp, Address pc) { + spFound = sp; + fpFound = fp; + pcFound = pc; + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java new file mode 100644 index 00000000000..fdf0c79c1ac --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java @@ -0,0 +1,534 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.loongarch64; + +import java.util.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.compiler.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +/** Specialization of and implementation of abstract methods of the + Frame class for the loongarch64 family of CPUs. */ + +public class LOONGARCH64Frame extends Frame { + private static final boolean DEBUG; + static { + DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null; + } + + // Java frames + private static final int JAVA_FRAME_LINK_OFFSET = 0; + private static final int JAVA_FRAME_RETURN_ADDR_OFFSET = 1; + private static final int JAVA_FRAME_SENDER_SP_OFFSET = 2; + + // Native frames + private static final int NATIVE_FRAME_LINK_OFFSET = -2; + private static final int NATIVE_FRAME_RETURN_ADDR_OFFSET = -1; + private static final int NATIVE_FRAME_SENDER_SP_OFFSET = 0; + + // Interpreter frames + private static final int INTERPRETER_FRAME_MIRROR_OFFSET = 2; // for native calls only + private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; + private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; + private static final int INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1; + private static final int INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; + private static final int INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; + private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; + private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + + // Entry frames + private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -9; + + // Native frames + private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; + + private static VMReg fp = new VMReg(22 << 1); + + // an additional field beyond sp and pc: + Address raw_fp; // frame pointer + private Address raw_unextendedSP; + + private LOONGARCH64Frame() { + } + + private void adjustForDeopt() { + if ( pc != null) { + // Look for a deopt pc and if it is deopted convert to original pc + CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); + if (cb != null && cb.isJavaMethod()) { + NMethod nm = (NMethod) cb; + if (pc.equals(nm.deoptHandlerBegin())) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); + } + // adjust pc if frame is deoptimized. + pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); + deoptimized = true; + } + } + } + } + + public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this); + dumpStack(); + } + } + + public LOONGARCH64Frame(Address raw_sp, Address raw_fp) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = raw_fp.getAddressAt(1 * VM.getVM().getAddressSize()); + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("LOONGARCH64Frame(sp, fp): " + this); + dumpStack(); + } + } + + public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_unextendedSp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this); + dumpStack(); + } + + } + + public Object clone() { + LOONGARCH64Frame frame = new LOONGARCH64Frame(); + frame.raw_sp = raw_sp; + frame.raw_unextendedSP = raw_unextendedSP; + frame.raw_fp = raw_fp; + frame.pc = pc; + frame.deoptimized = deoptimized; + return frame; + } + + public boolean equals(Object arg) { + if (arg == null) { + return false; + } + + if (!(arg instanceof LOONGARCH64Frame)) { + return false; + } + + LOONGARCH64Frame other = (LOONGARCH64Frame) arg; + + return (AddressOps.equal(getSP(), other.getSP()) && + AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && + AddressOps.equal(getFP(), other.getFP()) && + AddressOps.equal(getPC(), other.getPC())); + } + + public int hashCode() { + if (raw_sp == null) { + return 0; + } + + return raw_sp.hashCode(); + } + + public String toString() { + return "sp: " + (getSP() == null? "null" : getSP().toString()) + + ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + + ", fp: " + (getFP() == null? "null" : getFP().toString()) + + ", pc: " + (pc == null? "null" : pc.toString()); + } + + // accessors for the instance variables + public Address getFP() { return raw_fp; } + public Address getSP() { return raw_sp; } + public Address getID() { return raw_sp; } + + // FIXME: not implemented yet (should be done for Solaris/LOONGARCH) + public boolean isSignalHandlerFrameDbg() { return false; } + public int getSignalNumberDbg() { return 0; } + public String getSignalNameDbg() { return null; } + + public boolean isInterpretedFrameValid() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "Not an interpreted frame"); + } + + // These are reasonable sanity checks + if (getFP() == null || getFP().andWithMask(0x3) != null) { + return false; + } + + if (getSP() == null || getSP().andWithMask(0x3) != null) { + return false; + } + + if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { + return false; + } + + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (getFP().lessThanOrEqual(getSP())) { + // this attempts to deal with unsigned comparison above + return false; + } + + if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { + // stack frames shouldn't be large. + return false; + } + + return true; + } + + // FIXME: not applicable in current system + // void patch_pc(Thread* thread, address pc); + + public Frame sender(RegisterMap regMap, CodeBlob cb) { + LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap; + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map.setIncludeArgumentOops(false); + + if (isEntryFrame()) return senderForEntryFrame(map); + if (isInterpretedFrame()) return senderForInterpreterFrame(map); + + if(cb == null) { + cb = VM.getVM().getCodeCache().findBlob(getPC()); + } else { + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); + } + } + + if (cb != null) { + return senderForCompiledFrame(map, cb); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC()); + } + + private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForEntryFrame"); + } + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); + Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); + } + LOONGARCH64Frame fr; + if (jcw.getLastJavaPC() != null) { + fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); + } else { + fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); + } + map.clear(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); + } + return fr; + } + + //------------------------------------------------------------------------------ + // frame::adjust_unextended_sp + private void adjustUnextendedSP() { + // On loongarch, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + CodeBlob cb = cb(); + NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); + if (senderNm != null) { + // If the sender PC is a deoptimization point, get the original PC. + if (senderNm.isDeoptEntry(getPC()) || + senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); + } + } + } + + private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForInterpreterFrame"); + } + Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + Address sp = getSenderSP(); + // We do not need to update the callee-save register mapping because above + // us is either another interpreter frame or a converter-frame, but never + // directly a compiled frame. + // 11/24/04 SFG. With the removal of adapter frames this is no longer true. + // However c2 no longer uses callee save register for java calls so there + // are no callee register to find. + + if (map.getUpdateMap()) + updateMapWithSavedLink(map, addressOfStackSlot(JAVA_FRAME_LINK_OFFSET)); + + return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC()); + } + + private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { + map.setLocation(fp, savedFPAddr); + } + + private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) { + if (DEBUG) { + System.out.println("senderForCompiledFrame"); + } + + // + // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess + // + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // frame owned by optimizing compiler + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); + } + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // On Intel the return_address is always the word on the stack + Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of EBP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame (or C1?). + Address savedFPAddr = senderSP.addOffsetTo(- JAVA_FRAME_SENDER_SP_OFFSET * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map.setIncludeArgumentOops(cb.callerMustGCArguments()); + + if (cb.getOopMaps() != null) { + OopMapSet.updateRegisterMap(this, cb, map, true); + } + + // Since the prolog does the save and restore of EBP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + updateMapWithSavedLink(map, savedFPAddr); + } + + return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + + protected boolean hasSenderPD() { + // FIXME + // Check for null ebp? Need to do some tests. + return true; + } + + public long frameSize() { + return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + + public Address getLink() { + if (isJavaFrame()) + return addressOfStackSlot(JAVA_FRAME_LINK_OFFSET).getAddressAt(0); + return addressOfStackSlot(NATIVE_FRAME_LINK_OFFSET).getAddressAt(0); + } + + public Address getUnextendedSP() { return raw_unextendedSP; } + + // Return address: + public Address getSenderPCAddr() { + if (isJavaFrame()) + return addressOfStackSlot(JAVA_FRAME_RETURN_ADDR_OFFSET); + return addressOfStackSlot(NATIVE_FRAME_RETURN_ADDR_OFFSET); + } + + public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } + + public Address getSenderSP() { + if (isJavaFrame()) + return addressOfStackSlot(JAVA_FRAME_SENDER_SP_OFFSET); + return addressOfStackSlot(NATIVE_FRAME_SENDER_SP_OFFSET); + } + + // return address of param, zero origin index. + public Address getNativeParamAddr(int idx) { + return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); + } + + public Address addressOfInterpreterFrameLocals() { + return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + + private Address addressOfInterpreterFrameBCX() { + return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + + public int getInterpreterFrameBCI() { + // FIXME: this is not atomic with respect to GC and is unsuitable + // for use in a non-debugging, or reflective, system. Need to + // figure out how to express this. + Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); + Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); + Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); + return bcpToBci(bcp, method); + } + + public Address addressOfInterpreterFrameMDX() { + return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + + // FIXME + //inline int frame::interpreter_frame_monitor_size() { + // return BasicObjectLock::size(); + //} + + // expression stack + // (the max_stack arguments are used by the GC; see class FrameClosure) + + public Address addressOfInterpreterFrameExpressionStack() { + Address monitorEnd = interpreterFrameMonitorEnd().address(); + return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + + public int getInterpreterFrameExpressionStackDirection() { return -1; } + + // top of expression stack + public Address addressOfInterpreterFrameTOS() { + return getSP(); + } + + /** Expression stack from top down */ + public Address addressOfInterpreterFrameTOSAt(int slot) { + return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } + + public Address getInterpreterFrameSenderSP() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "interpreted frame expected"); + } + return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + } + + // Monitors + public BasicObjectLock interpreterFrameMonitorBegin() { + return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); + } + + public BasicObjectLock interpreterFrameMonitorEnd() { + Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); + if (Assert.ASSERTS_ENABLED) { + // make sure the pointer points inside the frame + Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); + Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); + } + return new BasicObjectLock(result); + } + + public int interpreterFrameMonitorSize() { + return BasicObjectLock.size(); + } + + // Method + public Address addressOfInterpreterFrameMethod() { + return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); + } + + // Constant pool cache + public Address addressOfInterpreterFrameCPCache() { + return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); + } + + // Entry frames + public JavaCallWrapper getEntryFrameCallWrapper() { + return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); + } + + protected Address addressOfSavedOopResult() { + // offset is 2 for compiler2 and 3 for compiler1 + return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * + VM.getVM().getAddressSize()); + } + + protected Address addressOfSavedReceiver() { + return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + } + + private void dumpStack() { + if (getFP() != null) { + for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); + AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } else { + for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); + AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java new file mode 100644 index 00000000000..f7dbbcaacd9 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.loongarch64; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; + +public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaFrameAnchor"); + + lastJavaFPField = type.getAddressField("_last_Java_fp"); + } + + public LOONGARCH64JavaCallWrapper(Address addr) { + super(addr); + } + + public Address getLastJavaFP() { + return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java new file mode 100644 index 00000000000..021ef523e31 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.loongarch64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; + +public class LOONGARCH64RegisterMap extends RegisterMap { + + /** This is the only public constructor */ + public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) { + super(thread, updateMap); + } + + protected LOONGARCH64RegisterMap(RegisterMap map) { + super(map); + } + + public Object clone() { + LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this); + return retval; + } + + // no PD state to clear or copy: + protected void clearPD() {} + protected void initializePD() {} + protected void initializeFromPD(RegisterMap map) {} + protected Address getLocationPD(VMReg reg) { return null; } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java new file mode 100644 index 00000000000..21259a4d32a --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.debugger.mips64.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.interpreter.*; +import sun.jvm.hotspot.runtime.*; + +/**

Should be able to be used on all mips64 platforms we support + (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's + "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext; + output is SP, FP, and PC for an MIPS64Frame. Instantiation of the + MIPS64Frame is left to the caller, since we may need to subclass + MIPS64Frame to support signal handler frames on Unix platforms.

+ +

Algorithm is to walk up the stack within a given range (say, + 512K at most) looking for a plausible PC and SP for a Java frame, + also considering those coming in from the context. If we find a PC + that belongs to the VM (i.e., in generated code like the + interpreter or CodeCache) then we try to find an associated EBP. + We repeat this until we either find a complete frame or run out of + stack to look at.

*/ + +public class MIPS64CurrentFrameGuess { + private MIPS64ThreadContext context; + private JavaThread thread; + private Address spFound; + private Address fpFound; + private Address pcFound; + + private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") + != null; + + public MIPS64CurrentFrameGuess(MIPS64ThreadContext context, + JavaThread thread) { + this.context = context; + this.thread = thread; + } + + /** Returns false if not able to find a frame within a reasonable range. */ + public boolean run(long regionInBytesToSearch) { + Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP); + Address pc = context.getRegisterAsAddress(MIPS64ThreadContext.PC); + Address fp = context.getRegisterAsAddress(MIPS64ThreadContext.FP); + if (sp == null) { + // Bail out if no last java frame eithe + if (thread.getLastJavaSP() != null) { + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + // Bail out + return false; + } + Address end = sp.addOffsetTo(regionInBytesToSearch); + VM vm = VM.getVM(); + + setValues(null, null, null); // Assume we're not going to find anything + + if (vm.isJavaPCDbg(pc)) { + if (vm.isClientCompiler()) { + // If the topmost frame is a Java frame, we are (pretty much) + // guaranteed to have a viable EBP. We should be more robust + // than this (we have the potential for losing entire threads' + // stack traces) but need to see how much work we really have + // to do here. Searching the stack for an (SP, FP) pair is + // hard since it's easy to misinterpret inter-frame stack + // pointers as base-of-frame pointers; we also don't know the + // sizes of C1 frames (not registered in the nmethod) so can't + // derive them from ESP. + + setValues(sp, fp, pc); + return true; + } else { + if (vm.getInterpreter().contains(pc)) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " + + sp + ", fp = " + fp + ", pc = " + pc); + } + setValues(sp, fp, pc); + return true; + } + + // For the server compiler, EBP is not guaranteed to be valid + // for compiled code. In addition, an earlier attempt at a + // non-searching algorithm (see below) failed because the + // stack pointer from the thread context was pointing + // (considerably) beyond the ostensible end of the stack, into + // garbage; walking from the topmost frame back caused a crash. + // + // This algorithm takes the current PC as a given and tries to + // find the correct corresponding SP by walking up the stack + // and repeatedly performing stackwalks (very inefficient). + // + // FIXME: there is something wrong with stackwalking across + // adapter frames...this is likely to be the root cause of the + // failure with the simpler algorithm below. + + for (long offset = 0; + offset < regionInBytesToSearch; + offset += vm.getAddressSize()) { + try { + Address curSP = sp.addOffsetTo(offset); + Frame frame = new MIPS64Frame(curSP, null, pc); + RegisterMap map = thread.newRegisterMap(false); + while (frame != null) { + if (frame.isEntryFrame() && frame.entryFrameIsFirst()) { + // We were able to traverse all the way to the + // bottommost Java frame. + // This sp looks good. Keep it. + if (DEBUG) { + System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc); + } + setValues(curSP, null, pc); + return true; + } + frame = frame.sender(map); + } + } catch (Exception e) { + if (DEBUG) { + System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset); + } + // Bad SP. Try another. + } + } + + // Were not able to find a plausible SP to go with this PC. + // Bail out. + return false; + + /* + // Original algorithm which does not work because SP was + // pointing beyond where it should have: + + // For the server compiler, EBP is not guaranteed to be valid + // for compiled code. We see whether the PC is in the + // interpreter and take care of that, otherwise we run code + // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame. + + CodeCache cc = vm.getCodeCache(); + if (cc.contains(pc)) { + CodeBlob cb = cc.findBlob(pc); + + // See if we can derive a frame pointer from SP and PC + // NOTE: This is the code duplicated from MIPS64Frame + Address saved_fp = null; + int llink_offset = cb.getLinkOffset(); + if (llink_offset >= 0) { + // Restore base-pointer, since next frame might be an interpreter frame. + Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset); + saved_fp = fp_addr.getAddressAt(0); + } + + setValues(sp, saved_fp, pc); + return true; + } + */ + } + } else { + // If the current program counter was not known to us as a Java + // PC, we currently assume that we are in the run-time system + // and attempt to look to thread-local storage for saved ESP and + // EBP. Note that if these are null (because we were, in fact, + // in Java code, i.e., vtable stubs or similar, and the SA + // didn't have enough insight into the target VM to understand + // that) then we are going to lose the entire stack trace for + // the thread, which is sub-optimal. FIXME. + + if (DEBUG) { + System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " + + thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP()); + } + if (thread.getLastJavaSP() == null) { + return false; // No known Java frames on stack + } + setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null); + return true; + } + } + + public Address getSP() { return spFound; } + public Address getFP() { return fpFound; } + /** May be null if getting values from thread-local storage; take + care to call the correct MIPS64Frame constructor to recover this if + necessary */ + public Address getPC() { return pcFound; } + + private void setValues(Address sp, Address fp, Address pc) { + spFound = sp; + fpFound = fp; + pcFound = pc; + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java new file mode 100644 index 00000000000..0cc5cf4e7ca --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java @@ -0,0 +1,547 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.mips64; + +import java.util.*; +import sun.jvm.hotspot.code.*; +import sun.jvm.hotspot.compiler.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.oops.*; +import sun.jvm.hotspot.runtime.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.utilities.*; + +/** Specialization of and implementation of abstract methods of the + Frame class for the mips64 family of CPUs. */ + +public class MIPS64Frame extends Frame { + private static final boolean DEBUG; + static { + DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null; + } + + // All frames + private static final int LINK_OFFSET = 0; + private static final int RETURN_ADDR_OFFSET = 1; + private static final int SENDER_SP_OFFSET = 2; + + // Interpreter frames + private static final int INTERPRETER_FRAME_MIRROR_OFFSET = 2; // for native calls only + private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1; + private static final int INTERPRETER_FRAME_LAST_SP_OFFSET = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1; + private static final int INTERPRETER_FRAME_METHOD_OFFSET = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; + private static int INTERPRETER_FRAME_MDX_OFFSET; // Non-core builds only + private static int INTERPRETER_FRAME_CACHE_OFFSET; + private static int INTERPRETER_FRAME_LOCALS_OFFSET; + private static int INTERPRETER_FRAME_BCX_OFFSET; + private static int INTERPRETER_FRAME_INITIAL_SP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET; + private static int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET; + + // Entry frames + private static int ENTRY_FRAME_CALL_WRAPPER_OFFSET; + + // Native frames + private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET = 2; + + private static VMReg rbp; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1; + INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; + INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; + INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; + INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; + INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; + + ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset"); + if (VM.getVM().getAddressSize() == 4) { + rbp = new VMReg(5); + } else { + rbp = new VMReg(5 << 1); + } + } + + + // an additional field beyond sp and pc: + Address raw_fp; // frame pointer + private Address raw_unextendedSP; + + private MIPS64Frame() { + } + + private void adjustForDeopt() { + if ( pc != null) { + // Look for a deopt pc and if it is deopted convert to original pc + CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc); + if (cb != null && cb.isJavaMethod()) { + NMethod nm = (NMethod) cb; + if (pc.equals(nm.deoptHandlerBegin())) { + if (Assert.ASSERTS_ENABLED) { + Assert.that(this.getUnextendedSP() != null, "null SP in Java frame"); + } + // adjust pc if frame is deoptimized. + pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset()); + deoptimized = true; + } + } + } + } + + public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("MIPS64Frame(sp, fp, pc): " + this); + dumpStack(); + } + } + + public MIPS64Frame(Address raw_sp, Address raw_fp) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_sp; + this.raw_fp = raw_fp; + this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize()); + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("MIPS64Frame(sp, fp): " + this); + dumpStack(); + } + } + + public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) { + this.raw_sp = raw_sp; + this.raw_unextendedSP = raw_unextendedSp; + this.raw_fp = raw_fp; + this.pc = pc; + adjustUnextendedSP(); + + // Frame must be fully constructed before this call + adjustForDeopt(); + + if (DEBUG) { + System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this); + dumpStack(); + } + + } + + public Object clone() { + MIPS64Frame frame = new MIPS64Frame(); + frame.raw_sp = raw_sp; + frame.raw_unextendedSP = raw_unextendedSP; + frame.raw_fp = raw_fp; + frame.pc = pc; + frame.deoptimized = deoptimized; + return frame; + } + + public boolean equals(Object arg) { + if (arg == null) { + return false; + } + + if (!(arg instanceof MIPS64Frame)) { + return false; + } + + MIPS64Frame other = (MIPS64Frame) arg; + + return (AddressOps.equal(getSP(), other.getSP()) && + AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) && + AddressOps.equal(getFP(), other.getFP()) && + AddressOps.equal(getPC(), other.getPC())); + } + + public int hashCode() { + if (raw_sp == null) { + return 0; + } + + return raw_sp.hashCode(); + } + + public String toString() { + return "sp: " + (getSP() == null? "null" : getSP().toString()) + + ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) + + ", fp: " + (getFP() == null? "null" : getFP().toString()) + + ", pc: " + (pc == null? "null" : pc.toString()); + } + + // accessors for the instance variables + public Address getFP() { return raw_fp; } + public Address getSP() { return raw_sp; } + public Address getID() { return raw_sp; } + + // FIXME: not implemented yet (should be done for Solaris/MIPS64) + public boolean isSignalHandlerFrameDbg() { return false; } + public int getSignalNumberDbg() { return 0; } + public String getSignalNameDbg() { return null; } + + public boolean isInterpretedFrameValid() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "Not an interpreted frame"); + } + + // These are reasonable sanity checks + if (getFP() == null || getFP().andWithMask(0x3) != null) { + return false; + } + + if (getSP() == null || getSP().andWithMask(0x3) != null) { + return false; + } + + if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) { + return false; + } + + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (getFP().lessThanOrEqual(getSP())) { + // this attempts to deal with unsigned comparison above + return false; + } + + if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) { + // stack frames shouldn't be large. + return false; + } + + return true; + } + + // FIXME: not applicable in current system + // void patch_pc(Thread* thread, address pc); + + public Frame sender(RegisterMap regMap, CodeBlob cb) { + MIPS64RegisterMap map = (MIPS64RegisterMap) regMap; + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map.setIncludeArgumentOops(false); + + if (isEntryFrame()) return senderForEntryFrame(map); + if (isInterpretedFrame()) return senderForInterpreterFrame(map); + + if(cb == null) { + cb = VM.getVM().getCodeCache().findBlob(getPC()); + } else { + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same"); + } + } + + if (cb != null) { + return senderForCompiledFrame(map, cb); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC()); + } + + private Frame senderForEntryFrame(MIPS64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForEntryFrame"); + } + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero"); + Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack"); + } + MIPS64Frame fr; + if (jcw.getLastJavaPC() != null) { + fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC()); + } else { + fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP()); + } + map.clear(); + if (Assert.ASSERTS_ENABLED) { + Assert.that(map.getIncludeArgumentOops(), "should be set by clear"); + } + return fr; + } + + //------------------------------------------------------------------------------ + // frame::adjust_unextended_sp + private void adjustUnextendedSP() { + // On mips64, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + CodeBlob cb = cb(); + NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); + if (senderNm != null) { + // If the sender PC is a deoptimization point, get the original PC. + if (senderNm.isDeoptEntry(getPC()) || + senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); + } + } + } + + private Frame senderForInterpreterFrame(MIPS64RegisterMap map) { + if (DEBUG) { + System.out.println("senderForInterpreterFrame"); + } + Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + Address sp = addressOfStackSlot(SENDER_SP_OFFSET); + // We do not need to update the callee-save register mapping because above + // us is either another interpreter frame or a converter-frame, but never + // directly a compiled frame. + // 11/24/04 SFG. With the removal of adapter frames this is no longer true. + // However c2 no longer uses callee save register for java calls so there + // are no callee register to find. + + if (map.getUpdateMap()) + updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET)); + + return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC()); + } + + private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) { + map.setLocation(rbp, savedFPAddr); + } + + private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) { + if (DEBUG) { + System.out.println("senderForCompiledFrame"); + } + + // + // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess + // + + if (Assert.ASSERTS_ENABLED) { + Assert.that(map != null, "map must be set"); + } + + // frame owned by optimizing compiler + if (Assert.ASSERTS_ENABLED) { + Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size"); + } + Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize()); + + // On Intel the return_address is always the word on the stack + Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize()); + + // This is the saved value of EBP which may or may not really be an FP. + // It is only an FP if the sender is an interpreter frame (or C1?). + Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize()); + + if (map.getUpdateMap()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map.setIncludeArgumentOops(cb.callerMustGCArguments()); + + if (cb.getOopMaps() != null) { + OopMapSet.updateRegisterMap(this, cb, map, true); + } + + // Since the prolog does the save and restore of EBP there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + updateMapWithSavedLink(map, savedFPAddr); + } + + return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC); + } + + protected boolean hasSenderPD() { + // FIXME + // Check for null ebp? Need to do some tests. + return true; + } + + public long frameSize() { + return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize()); + } + + public Address getLink() { + return addressOfStackSlot(LINK_OFFSET).getAddressAt(0); + } + + // FIXME: not implementable yet + //inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } + + public Address getUnextendedSP() { return raw_unextendedSP; } + + // Return address: + public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); } + public Address getSenderPC() { return getSenderPCAddr().getAddressAt(0); } + + // return address of param, zero origin index. + public Address getNativeParamAddr(int idx) { + return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx); + } + + public Address getSenderSP() { return addressOfStackSlot(SENDER_SP_OFFSET); } + + public Address addressOfInterpreterFrameLocals() { + return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET); + } + + private Address addressOfInterpreterFrameBCX() { + return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET); + } + + public int getInterpreterFrameBCI() { + // FIXME: this is not atomic with respect to GC and is unsuitable + // for use in a non-debugging, or reflective, system. Need to + // figure out how to express this. + Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0); + Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0); + Method method = (Method)Metadata.instantiateWrapperFor(methodHandle); + return bcpToBci(bcp, method); + } + + public Address addressOfInterpreterFrameMDX() { + return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET); + } + + // FIXME + //inline int frame::interpreter_frame_monitor_size() { + // return BasicObjectLock::size(); + //} + + // expression stack + // (the max_stack arguments are used by the GC; see class FrameClosure) + + public Address addressOfInterpreterFrameExpressionStack() { + Address monitorEnd = interpreterFrameMonitorEnd().address(); + return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize()); + } + + public int getInterpreterFrameExpressionStackDirection() { return -1; } + + // top of expression stack + public Address addressOfInterpreterFrameTOS() { + return getSP(); + } + + /** Expression stack from top down */ + public Address addressOfInterpreterFrameTOSAt(int slot) { + return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize()); + } + + public Address getInterpreterFrameSenderSP() { + if (Assert.ASSERTS_ENABLED) { + Assert.that(isInterpretedFrame(), "interpreted frame expected"); + } + return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0); + } + + // Monitors + public BasicObjectLock interpreterFrameMonitorBegin() { + return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET)); + } + + public BasicObjectLock interpreterFrameMonitorEnd() { + Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0); + if (Assert.ASSERTS_ENABLED) { + // make sure the pointer points inside the frame + Assert.that(AddressOps.gt(getFP(), result), "result must < than frame pointer"); + Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer"); + } + return new BasicObjectLock(result); + } + + public int interpreterFrameMonitorSize() { + return BasicObjectLock.size(); + } + + // Method + public Address addressOfInterpreterFrameMethod() { + return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET); + } + + // Constant pool cache + public Address addressOfInterpreterFrameCPCache() { + return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET); + } + + // Entry frames + public JavaCallWrapper getEntryFrameCallWrapper() { + return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0)); + } + + protected Address addressOfSavedOopResult() { + // offset is 2 for compiler2 and 3 for compiler1 + return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) * + VM.getVM().getAddressSize()); + } + + protected Address addressOfSavedReceiver() { + return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize()); + } + + private void dumpStack() { + if (getFP() != null) { + for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); + AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } else { + for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize()); + AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize())); + addr = addr.addOffsetTo(VM.getVM().getAddressSize())) { + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java new file mode 100644 index 00000000000..81fcb5b5689 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.mips64; + +import java.util.*; +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.types.*; +import sun.jvm.hotspot.runtime.*; + +public class MIPS64JavaCallWrapper extends JavaCallWrapper { + private static AddressField lastJavaFPField; + + static { + VM.registerVMInitializedObserver(new Observer() { + public void update(Observable o, Object data) { + initialize(VM.getVM().getTypeDataBase()); + } + }); + } + + private static synchronized void initialize(TypeDataBase db) { + Type type = db.lookupType("JavaFrameAnchor"); + + lastJavaFPField = type.getAddressField("_last_Java_fp"); + } + + public MIPS64JavaCallWrapper(Address addr) { + super(addr); + } + + public Address getLastJavaFP() { + return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); + } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java new file mode 100644 index 00000000000..648503792d9 --- /dev/null +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime.mips64; + +import sun.jvm.hotspot.debugger.*; +import sun.jvm.hotspot.runtime.*; + +public class MIPS64RegisterMap extends RegisterMap { + + /** This is the only public constructor */ + public MIPS64RegisterMap(JavaThread thread, boolean updateMap) { + super(thread, updateMap); + } + + protected MIPS64RegisterMap(RegisterMap map) { + super(map); + } + + public Object clone() { + MIPS64RegisterMap retval = new MIPS64RegisterMap(this); + return retval; + } + + // no PD state to clear or copy: + protected void clearPD() {} + protected void initializePD() {} + protected void initializeFromPD(RegisterMap map) {} + protected Address getLocationPD(VMReg reg) { return null; } +} diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java index aa692578665..9c97d09bc34 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java @@ -22,6 +22,13 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + * + */ + package sun.jvm.hotspot.utilities; /** Provides canonicalized OS and CPU information for the rest of the @@ -65,6 +72,10 @@ public static String getCPU() throws UnsupportedPlatformException { return cpu; } else if (cpu.equals("aarch64")) { return cpu; + } else if (cpu.equals("mips64") || cpu.equals("mips64el")) { + return "mips64"; + } else if (cpu.equals("loongarch64")) { + return "loongarch64"; } else { try { Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed"); diff --git a/hotspot/make/defs.make b/hotspot/make/defs.make index a3573da56f3..6e93182c928 100644 --- a/hotspot/make/defs.make +++ b/hotspot/make/defs.make @@ -22,6 +22,12 @@ # # +# +# This file has been modified by Loongson Technology in 2020. These +# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made +# available on the same license terms set forth above. +# + # The common definitions for hotspot builds. # Optionally include SPEC file generated by configure. @@ -285,7 +291,7 @@ ifneq ($(OSNAME),windows) # Use uname output for SRCARCH, but deal with platform differences. If ARCH # is not explicitly listed below, it is treated as x86. - SRCARCH ?= $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 ppc ppc64 ppc64le zero aarch64,$(ARCH))) + SRCARCH ?= $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 ppc ppc64 ppc64le zero aarch64 mips64 loongarch64,$(ARCH))) ARCH/ = x86 ARCH/sparc = sparc ARCH/sparc64= sparc @@ -295,6 +301,10 @@ ifneq ($(OSNAME),windows) ARCH/ppc64 = ppc ARCH/ppc64le= ppc ARCH/ppc = ppc + ARCH/mips64 = mips + ARCH/mips64el = mips + ARCH/loongarch64 = loongarch + ARCH/loongarch = loongarch ARCH/zero = zero ARCH/aarch64 = aarch64 @@ -317,6 +327,20 @@ ifneq ($(OSNAME),windows) BUILDARCH = ppc64 endif endif + ifeq ($(BUILDARCH), mips) + ifdef LP64 +# ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little) +# BUILDARCH = mips64el +# else + BUILDARCH = mips64 +# endif + endif + endif + ifeq ($(BUILDARCH), loongarch) + ifdef LP64 + BUILDARCH = loongarch64 + endif + endif # LIBARCH is 1:1 mapping from BUILDARCH, except for ARCH=ppc64le ifeq ($(ARCH),ppc64le) @@ -332,9 +356,18 @@ ifneq ($(OSNAME),windows) LIBARCH/sparcv9 = sparcv9 LIBARCH/ia64 = ia64 LIBARCH/ppc64 = ppc64 + LIBARCH/loongarch = loongarch64 LIBARCH/zero = $(ZERO_LIBARCH) - LP64_ARCH += sparcv9 amd64 ia64 ppc64 aarch64 zero + ifeq ($(LIBARCH), mips64) + ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little) + LIBARCH = mips64el + else + LIBARCH = mips64 + endif + endif + + LP64_ARCH += sparcv9 amd64 ia64 ppc64 aarch64 mips64 mips64el loongarch64 zero endif # Required make macro settings for all platforms diff --git a/hotspot/make/linux/Makefile b/hotspot/make/linux/Makefile index e8f2010412f..5aff01e87d9 100644 --- a/hotspot/make/linux/Makefile +++ b/hotspot/make/linux/Makefile @@ -74,6 +74,10 @@ ifneq (,$(findstring $(ARCH), ppc ppc64)) FORCE_TIERED=0 endif endif +# C1 is not ported on mips64, so we cannot build a tiered VM: +ifeq (mips64, $(findstring mips64, $(ARCH))) + FORCE_TIERED=0 +endif ifdef LP64 ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","") diff --git a/hotspot/make/linux/makefiles/defs.make b/hotspot/make/linux/makefiles/defs.make index ec414639d20..9ade73ab340 100644 --- a/hotspot/make/linux/makefiles/defs.make +++ b/hotspot/make/linux/makefiles/defs.make @@ -22,6 +22,12 @@ # # +# +# This file has been modified by Loongson Technology in 2020. These +# modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made +# available on the same license terms set forth above. +# + # The common definitions for hotspot linux builds. # Include the top level defs.make under make directory instead of this one. # This file is included into make/defs.make. @@ -39,6 +45,18 @@ ifndef ARCH ARCH := ppc64 endif endif +ifeq ($(ARCH), mips64el) + ARCH=mips64 +endif +ifeq ($(LP64), 1) + ifeq ($(ARCH), mips) + ARCH=mips64 + endif +endif + +ifeq ($(ARCH), loongarch) + ARCH=loongarch64 +endif PATH_SEP ?= : @@ -83,6 +101,36 @@ ifneq (,$(findstring $(ARCH), sparc)) HS_ARCH = sparc endif +# mips +ifeq ($(ARCH), mips64) + ifeq ($(ARCH_DATA_MODEL), 64) + ARCH_DATA_MODEL = 64 + MAKE_ARGS += LP64=1 + PLATFORM = linux-mips64 + VM_PLATFORM = linux_mips64 + else + ARCH_DATA_MODEL = 32 + PLATFORM = linux-mips32 + VM_PLATFORM = linux_mips32 + endif + HS_ARCH = mips +endif + +# loongarch +ifeq ($(ARCH), loongarch64) + ifeq ($(ARCH_DATA_MODEL), 64) + ARCH_DATA_MODEL = 64 + MAKE_ARGS += LP64=1 + PLATFORM = linux-loongarch64 + VM_PLATFORM = linux_loongarch64 + else + ARCH_DATA_MODEL = 32 + PLATFORM = linux-loongarch32 + VM_PLATFORM = linux_loongarch32 + endif + HS_ARCH = loongarch +endif + # i686/i586 and amd64/x86_64 ifneq (,$(findstring $(ARCH), amd64 x86_64 i686 i586)) ifeq ($(ARCH_DATA_MODEL), 64) @@ -311,16 +359,24 @@ ADD_SA_BINARIES/sparc = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ $(EXPORT_LIB_DIR)/sa-jdi.jar ADD_SA_BINARIES/aarch64 = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ $(EXPORT_LIB_DIR)/sa-jdi.jar +ADD_SA_BINARIES/mips = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ + $(EXPORT_LIB_DIR)/sa-jdi.jar +ADD_SA_BINARIES/loongarch = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \ + $(EXPORT_LIB_DIR)/sa-jdi.jar ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1) ifneq ($(STRIP_POLICY),no_strip) ifeq ($(ZIP_DEBUGINFO_FILES),1) ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz ADD_SA_BINARIES/sparc += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz ADD_SA_BINARIES/aarch64 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz + ADD_SA_BINARIES/mips += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz + ADD_SA_BINARIES/loongarch += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz else ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo ADD_SA_BINARIES/sparc += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo ADD_SA_BINARIES/aarch64 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo + ADD_SA_BINARIES/mips += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo + ADD_SA_BINARIES/loongarch += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo endif endif endif diff --git a/hotspot/make/linux/makefiles/gcc.make b/hotspot/make/linux/makefiles/gcc.make index 7dde7f0963e..94c6d1d0154 100644 --- a/hotspot/make/linux/makefiles/gcc.make +++ b/hotspot/make/linux/makefiles/gcc.make @@ -22,6 +22,12 @@ # # +# +# This file has been modified by Loongson Technology in 2020. These +# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made +# available on the same license terms set forth above. +# + #------------------------------------------------------------------------ # CC, CXX & AS @@ -177,6 +183,9 @@ ARCHFLAG/aarch64 = ARCHFLAG/ia64 = ARCHFLAG/sparc = -m32 -mcpu=v9 ARCHFLAG/sparcv9 = -m64 -mcpu=v9 +ARCHFLAG/mips64 = -mabi=64 +#ARCHFLAG/loongarch64 = -lp64 +ARCHFLAG/loongarch64 = ARCHFLAG/zero = $(ZERO_ARCHFLAG) ARCHFLAG/ppc64 = -m64 @@ -202,7 +211,7 @@ else endif # Compiler warnings are treated as errors -WARNINGS_ARE_ERRORS = -Werror +#WARNINGS_ARE_ERRORS = -Werror ifeq ($(USE_CLANG), true) # However we need to clean the code up before we can unrestrictedly enable this option with Clang diff --git a/hotspot/make/linux/makefiles/loongarch64.make b/hotspot/make/linux/makefiles/loongarch64.make new file mode 100644 index 00000000000..9e3cdb6f23a --- /dev/null +++ b/hotspot/make/linux/makefiles/loongarch64.make @@ -0,0 +1,43 @@ +# +# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# +# + +# Not included in includeDB because it has no dependencies +Obj_Files += linux_loongarch.o + +# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized +OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT) +# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized +OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT) +# Must also specify if CPU is little endian +CFLAGS += -DVM_LITTLE_ENDIAN + +CFLAGS += -DSICORTEX_ERRATA + +CFLAGS += -D_LP64=1 + +# The serviceability agent relies on frame pointer (%rbp) to walk thread stack +CFLAGS += -fno-omit-frame-pointer + +OPT_CFLAGS/compactingPermGenGen.o = -O1 diff --git a/hotspot/make/linux/makefiles/mips64.make b/hotspot/make/linux/makefiles/mips64.make new file mode 100644 index 00000000000..d9af3b13ab2 --- /dev/null +++ b/hotspot/make/linux/makefiles/mips64.make @@ -0,0 +1,43 @@ +# +# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# +# + +# Not included in includeDB because it has no dependencies +Obj_Files += linux_mips.o + +# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized +OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT) +# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized +OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT) +# Must also specify if CPU is little endian +CFLAGS += -DVM_LITTLE_ENDIAN + +CFLAGS += -DSICORTEX_ERRATA + +CFLAGS += -D_LP64=1 + +# The serviceability agent relies on frame pointer (%rbp) to walk thread stack +CFLAGS += -fno-omit-frame-pointer + +OPT_CFLAGS/compactingPermGenGen.o = -O1 diff --git a/hotspot/make/linux/makefiles/sa.make b/hotspot/make/linux/makefiles/sa.make index cdcb16a1a3f..34c71bd666c 100644 --- a/hotspot/make/linux/makefiles/sa.make +++ b/hotspot/make/linux/makefiles/sa.make @@ -22,6 +22,12 @@ # # +# +# This file has been modified by Loongson Technology in 2020. These +# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made +# available on the same license terms set forth above. +# + # This makefile (sa.make) is included from the sa.make in the # build directories. @@ -109,6 +115,8 @@ $(GENERATED)/sa-jdi.jar:: $(AGENT_FILES) $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext + $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.mips64.MIPS64ThreadContext + $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.loongarch64.LOONGARCH64ThreadContext $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.sparc.SPARCThreadContext $(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.asm.Disassembler diff --git a/hotspot/make/linux/makefiles/saproc.make b/hotspot/make/linux/makefiles/saproc.make index ffc0ec5ce5b..c04a6765df7 100644 --- a/hotspot/make/linux/makefiles/saproc.make +++ b/hotspot/make/linux/makefiles/saproc.make @@ -21,6 +21,13 @@ # questions. # # + +# +# This file has been modified by Loongson Technology in 2019. These +# modifications are Copyright (c) 2018, 2019, Loongson Technology, and are made +# available on the same license terms set forth above. +# + include $(GAMMADIR)/make/defs.make include $(GAMMADIR)/make/altsrc.make @@ -81,7 +88,12 @@ endif SA_LFLAGS = $(MAPFLAG:FILENAME=$(SAMAPFILE)) $(LDFLAGS_HASH_STYLE) \ $(LDFLAGS_NO_EXEC_STACK) $(EXTRA_LDFLAGS) +ifneq (mips64, $(findstring mips64, $(BUILDARCH))) SAARCH ?= $(BUILDARCH) +else +#If -Dmips64 is used, mips64 would be conflict with "struct mips64_watch_regs mips64" in /usr/include/asm/ptrace.h. +SAARCH ?= mips +endif $(LIBSAPROC): $(SASRCFILES) $(SAMAPFILE) $(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \ diff --git a/hotspot/make/linux/makefiles/sparcWorks.make b/hotspot/make/linux/makefiles/sparcWorks.make index e39116023c5..dbc2ace8257 100644 --- a/hotspot/make/linux/makefiles/sparcWorks.make +++ b/hotspot/make/linux/makefiles/sparcWorks.make @@ -22,6 +22,12 @@ # # +# +# This file has been modified by Loongson Technology in 2015. These +# modifications are Copyright (c) 2015 Loongson Technology, and are made +# available on the same license terms set forth above. +# + #------------------------------------------------------------------------ # CC, CXX & AS @@ -38,6 +44,7 @@ endif ARCHFLAG = $(ARCHFLAG/$(BUILDARCH)) ARCHFLAG/i486 = -m32 ARCHFLAG/amd64 = -m64 +ARCHFLAG/mips64 = -m64 CFLAGS += $(ARCHFLAG) AOUT_FLAGS += $(ARCHFLAG) diff --git a/hotspot/make/linux/makefiles/vm.make b/hotspot/make/linux/makefiles/vm.make index 04b7c202873..5e428538a0f 100644 --- a/hotspot/make/linux/makefiles/vm.make +++ b/hotspot/make/linux/makefiles/vm.make @@ -22,6 +22,12 @@ # # +# +# This file has been modified by Loongson Technology in 2020. These +# modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made +# available on the same license terms set forth above. +# + # Rules to build JVM and related libraries, included from vm.make in the build # directory. @@ -99,9 +105,22 @@ CXXFLAGS = \ ${HS_LIB_ARCH} \ ${VM_DISTRO} +ifeq ($(MIPS_ABI),n32) + CXXFLAGS += -DN32 +else + ifeq ($(MIPS_ABI),n64) + CXXFLAGS += -DN64 + endif +endif # This is VERY important! The version define must only be supplied to vm_version.o # If not, ccache will not re-use the cache at all, since the version string might contain # a time and date. +ifdef LOONGSON_RUNTIME_NAME + LOONGSON_VM_INFO = -DLOONGSON_RUNTIME_NAME="\"$(LOONGSON_RUNTIME_NAME)\"" +else + LOONGSON_VM_INFO = -DLOONGSON_RUNTIME_NAME="\"\"" +endif +CXXFLAGS/vmError.o += ${LOONGSON_VM_INFO} CXXFLAGS/vm_version.o += ${JRE_VERSION} ${VERSION_CFLAGS} CXXFLAGS/arguments.o += ${VERSION_CFLAGS} @@ -211,6 +230,15 @@ endif ifeq ($(Platform_arch_model), x86_64) Src_Files_EXCLUDE += \*x86_32\* endif +ifeq ($(Platform_arch_model), mips_32) +Src_Files_EXCLUDE += \*mips_64\* +endif +ifeq ($(Platform_arch_model), mips_64) +Src_Files_EXCLUDE += \*mips_32\* +endif +ifeq ($(Platform_arch_model), loongarch_64) +Src_Files_EXCLUDE += \*loongarch_32\* +endif # Alternate vm.make # This has to be included here to allow changes to the source diff --git a/hotspot/make/linux/platform_loongarch64 b/hotspot/make/linux/platform_loongarch64 new file mode 100644 index 00000000000..d704cf389ae --- /dev/null +++ b/hotspot/make/linux/platform_loongarch64 @@ -0,0 +1,17 @@ +os_family = linux + +arch = loongarch + +arch_model = loongarch_64 + +os_arch = linux_loongarch + +os_arch_model = linux_loongarch_64 + +lib_arch = loongarch64 + +compiler = gcc + +gnu_dis_arch = loongarch64 + +sysdefs = -DLINUX -D_GNU_SOURCE -DLOONGARCH64 diff --git a/hotspot/make/linux/platform_mips64 b/hotspot/make/linux/platform_mips64 new file mode 100644 index 00000000000..c283671f828 --- /dev/null +++ b/hotspot/make/linux/platform_mips64 @@ -0,0 +1,17 @@ +os_family = linux + +arch = mips + +arch_model = mips_64 + +os_arch = linux_mips + +os_arch_model = linux_mips_64 + +lib_arch = mips64 + +compiler = gcc + +gnu_dis_arch = mips64 + +sysdefs = -DLINUX -D_GNU_SOURCE -DMIPS64 diff --git a/hotspot/make/sa.files b/hotspot/make/sa.files index d6e728a9a8c..43b08e3ad19 100644 --- a/hotspot/make/sa.files +++ b/hotspot/make/sa.files @@ -22,6 +22,12 @@ # # +# +# This file has been modified by Loongson Technology in 2020. These +# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made +# available on the same license terms set forth above. +# + # This filelist macro is included in platform specific sa.make # included all packages/*.java. package list can be generated by # $(GAMMADIR)/agent/make/build-pkglist. @@ -52,14 +58,20 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/cdbg/basic/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/dummy/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/amd64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/mips64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/loongarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/aarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/sparc/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/mips64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/loongarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/elf/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/aarch64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/mips64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/loongarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/sparc/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/*.java \ @@ -94,8 +106,12 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_aarch64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_mips64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_loongarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_x86/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_sparc/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/mips64/*.java \ +$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/loongarch64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/posix/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/solaris_amd64/*.java \ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/solaris_sparc/*.java \ diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp index 35d34a08eaa..3b8cf4a11d9 100644 --- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp @@ -1177,7 +1177,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { } } - +void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { + ShouldNotReachHere(); +} void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { LIR_Opr src = op->in_opr(); @@ -1242,7 +1244,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { } case Bytecodes::_d2l: { - Register tmp = op->tmp1()->as_register(); + Register tmp = op->tmp()->as_register(); __ clear_fpsr(); __ fcvtzd(dest->as_register_lo(), src->as_double_reg()); __ get_fpsr(tmp); @@ -1253,7 +1255,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { } case Bytecodes::_f2i: { - Register tmp = op->tmp1()->as_register(); + Register tmp = op->tmp()->as_register(); __ clear_fpsr(); __ fcvtzsw(dest->as_register(), src->as_float_reg()); __ get_fpsr(tmp); @@ -1264,7 +1266,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { } case Bytecodes::_f2l: { - Register tmp = op->tmp1()->as_register(); + Register tmp = op->tmp()->as_register(); __ clear_fpsr(); __ fcvtzs(dest->as_register_lo(), src->as_float_reg()); __ get_fpsr(tmp); @@ -1275,7 +1277,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { } case Bytecodes::_d2i: { - Register tmp = op->tmp1()->as_register(); + Register tmp = op->tmp()->as_register(); __ clear_fpsr(); __ fcvtzdw(dest->as_register(), src->as_double_reg()); __ get_fpsr(tmp); @@ -1731,6 +1733,11 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond); } +void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, + LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { + ShouldNotReachHere(); +} + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp index 120dd1a7dfa..6a3289022dd 100644 --- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp @@ -277,18 +277,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ store(reg, addr); } -void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { LIR_Opr reg = new_register(T_INT); __ load(generate_address(base, disp, T_INT), reg, info); - __ cmp(condition, reg, LIR_OprFact::intConst(c)); + __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt); } -void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +template +void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { LIR_Opr reg1 = new_register(T_INT); __ load(generate_address(base, disp, type), reg1, info); - __ cmp(condition, reg, reg1); + __ cmp_branch(condition, reg, reg1, type, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp new file mode 100644 index 00000000000..2996ef7aa70 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp @@ -0,0 +1,855 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Implementation of AddressLiteral + +AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { + _is_lval = false; + _target = target; + _rspec = rspec_from_rtype(rtype, target); +} + +// Implementation of Address + +Address Address::make_array(ArrayAddress adr) { + AddressLiteral base = adr.base(); + Address index = adr.index(); + assert(index._disp == 0, "must not have disp"); // maybe it can? + Address array(index._base, index._index, index._scale, (intptr_t) base.target()); + array._rspec = base._rspec; + return array; +} + +// exceedingly dangerous constructor +Address::Address(address loc, RelocationHolder spec) { + _base = noreg; + _index = noreg; + _scale = no_scale; + _disp = (intptr_t) loc; + _rspec = spec; +} + + +int Assembler::is_int_mask(int x) { + int xx = x; + int count = 0; + + while (x != 0) { + x &= (x - 1); + count++; + } + + if ((1<> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_b(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_b(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_b(dst, base, AT); + } + } +} + +void Assembler::ld_bu(Register rd, Address src) { + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_bu(dst, base, index); + } else { + add_d(AT, base, index); + ld_bu(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_bu(dst, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_bu(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_bu(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_bu(dst, base, AT); + } + } +} + +void Assembler::ld_d(Register rd, Address src){ + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_d(dst, base, index); + } else { + add_d(AT, base, index); + ld_d(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_d(dst, AT, disp); + } + } else if (is_simm(disp, 16) && !(disp & 3)) { + if (scale == 0) { + add_d(AT, base, index); + } else { + alsl_d(AT, index, base, scale - 1); + } + ldptr_d(dst, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_d(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_d(dst, base, disp); + } else if (is_simm(disp, 16) && !(disp & 3)) { + ldptr_d(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_d(dst, base, AT); + } + } +} + +void Assembler::ld_h(Register rd, Address src){ + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_h(dst, base, index); + } else { + add_d(AT, base, index); + ld_h(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_h(dst, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_h(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_h(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_h(dst, base, AT); + } + } +} + +void Assembler::ld_hu(Register rd, Address src){ + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_hu(dst, base, index); + } else { + add_d(AT, base, index); + ld_hu(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_hu(dst, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_hu(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_hu(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_hu(dst, base, AT); + } + } +} + +void Assembler::ll_w(Register rd, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + ll_w(rd, src.base(), src.disp()); +} + +void Assembler::ll_d(Register rd, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + ll_d(rd, src.base(), src.disp()); +} + +void Assembler::ld_w(Register rd, Address src){ + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_w(dst, base, index); + } else { + add_d(AT, base, index); + ld_w(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_w(dst, AT, disp); + } + } else if (is_simm(disp, 16) && !(disp & 3)) { + if (scale == 0) { + add_d(AT, base, index); + } else { + alsl_d(AT, index, base, scale - 1); + } + ldptr_w(dst, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_w(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_w(dst, base, disp); + } else if (is_simm(disp, 16) && !(disp & 3)) { + ldptr_w(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_w(dst, base, AT); + } + } +} + +void Assembler::ld_wu(Register rd, Address src){ + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + ldx_wu(dst, base, index); + } else { + add_d(AT, base, index); + ld_wu(dst, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + ld_wu(dst, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + ldx_wu(dst, base, AT); + } + } else { + if (is_simm(disp, 12)) { + ld_wu(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + ldx_wu(dst, base, AT); + } + } +} + +void Assembler::st_b(Register rd, Address dst) { + Register src = rd; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + assert_different_registers(src, AT); + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + stx_b(src, base, index); + } else { + add_d(AT, base, index); + st_b(src, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + st_b(src, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + stx_b(src, base, AT); + } + } else { + if (is_simm(disp, 12)) { + st_b(src, base, disp); + } else { + assert_different_registers(src, AT); + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + stx_b(src, base, AT); + } + } +} + +void Assembler::sc_w(Register rd, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sc_w(rd, dst.base(), dst.disp()); +} + +void Assembler::sc_d(Register rd, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sc_d(rd, dst.base(), dst.disp()); +} + +void Assembler::st_d(Register rd, Address dst) { + Register src = rd; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + assert_different_registers(src, AT); + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + stx_d(src, base, index); + } else { + add_d(AT, base, index); + st_d(src, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + st_d(src, AT, disp); + } + } else if (is_simm(disp, 16) && !(disp & 3)) { + if (scale == 0) { + add_d(AT, base, index); + } else { + alsl_d(AT, index, base, scale - 1); + } + stptr_d(src, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + stx_d(src, base, AT); + } + } else { + if (is_simm(disp, 12)) { + st_d(src, base, disp); + } else if (is_simm(disp, 16) && !(disp & 3)) { + stptr_d(src, base, disp); + } else { + assert_different_registers(src, AT); + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + stx_d(src, base, AT); + } + } +} + +void Assembler::st_h(Register rd, Address dst) { + Register src = rd; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + assert_different_registers(src, AT); + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + stx_h(src, base, index); + } else { + add_d(AT, base, index); + st_h(src, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + st_h(src, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + stx_h(src, base, AT); + } + } else { + if (is_simm(disp, 12)) { + st_h(src, base, disp); + } else { + assert_different_registers(src, AT); + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + stx_h(src, base, AT); + } + } +} + +void Assembler::st_w(Register rd, Address dst) { + Register src = rd; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + assert_different_registers(src, AT); + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + stx_w(src, base, index); + } else { + add_d(AT, base, index); + st_w(src, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + st_w(src, AT, disp); + } + } else if (is_simm(disp, 16) && !(disp & 3)) { + if (scale == 0) { + add_d(AT, base, index); + } else { + alsl_d(AT, index, base, scale - 1); + } + stptr_w(src, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + stx_w(src, base, AT); + } + } else { + if (is_simm(disp, 12)) { + st_w(src, base, disp); + } else if (is_simm(disp, 16) && !(disp & 3)) { + stptr_w(src, base, disp); + } else { + assert_different_registers(src, AT); + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + stx_w(src, base, AT); + } + } +} + +void Assembler::fld_s(FloatRegister fd, Address src) { + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + fldx_s(fd, base, index); + } else { + add_d(AT, base, index); + fld_s(fd, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + fld_s(fd, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + fldx_s(fd, base, AT); + } + } else { + if (is_simm(disp, 12)) { + fld_s(fd, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + fldx_s(fd, base, AT); + } + } +} + +void Assembler::fld_d(FloatRegister fd, Address src) { + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + fldx_d(fd, base, index); + } else { + add_d(AT, base, index); + fld_d(fd, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + fld_d(fd, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + fldx_d(fd, base, AT); + } + } else { + if (is_simm(disp, 12)) { + fld_d(fd, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + fldx_d(fd, base, AT); + } + } +} + +void Assembler::fst_s(FloatRegister fd, Address dst) { + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + fstx_s(fd, base, index); + } else { + add_d(AT, base, index); + fst_s(fd, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + fst_s(fd, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + fstx_s(fd, base, AT); + } + } else { + if (is_simm(disp, 12)) { + fst_s(fd, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + fstx_s(fd, base, AT); + } + } +} + +void Assembler::fst_d(FloatRegister fd, Address dst) { + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if (is_simm(disp, 12)) { + if (scale == 0) { + if (disp == 0) { + fstx_d(fd, base, index); + } else { + add_d(AT, base, index); + fst_d(fd, AT, disp); + } + } else { + alsl_d(AT, index, base, scale - 1); + fst_d(fd, AT, disp); + } + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + + if (scale == 0) { + add_d(AT, AT, index); + } else { + alsl_d(AT, index, AT, scale - 1); + } + fstx_d(fd, base, AT); + } + } else { + if (is_simm(disp, 12)) { + fst_d(fd, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + fstx_d(fd, base, AT); + } + } +} diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp new file mode 100644 index 00000000000..46b57cfe761 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp @@ -0,0 +1,2810 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP + +#include "asm/register.hpp" + +class BiasedLockingCounters; + + +// Note: A register location is represented via a Register, not +// via an address for efficiency & simplicity reasons. + +class ArrayAddress; + +class Address VALUE_OBJ_CLASS_SPEC { + public: + enum ScaleFactor { + no_scale = -1, + times_1 = 0, + times_2 = 1, + times_4 = 2, + times_8 = 3, + times_ptr = times_8 + }; + static ScaleFactor times(int size) { + assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); + if (size == 8) return times_8; + if (size == 4) return times_4; + if (size == 2) return times_2; + return times_1; + } + + private: + Register _base; + Register _index; + ScaleFactor _scale; + int _disp; + RelocationHolder _rspec; + + // Easily misused constructors make them private + Address(address loc, RelocationHolder spec); + Address(int disp, address loc, relocInfo::relocType rtype); + Address(int disp, address loc, RelocationHolder spec); + + public: + + // creation + Address() + : _base(noreg), + _index(noreg), + _scale(no_scale), + _disp(0) { + } + + // No default displacement otherwise Register can be implicitly + // converted to 0(Register) which is quite a different animal. + + Address(Register base, int disp = 0) + : _base(base), + _index(noreg), + _scale(no_scale), + _disp(disp) { + assert_different_registers(_base, AT); + } + + Address(Register base, Register index, ScaleFactor scale, int disp = 0) + : _base (base), + _index(index), + _scale(scale), + _disp (disp) { + assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); + assert_different_registers(_base, _index, AT); + } + + // The following two overloads are used in connection with the + // ByteSize type (see sizes.hpp). They simplify the use of + // ByteSize'd arguments in assembly code. Note that their equivalent + // for the optimized build are the member functions with int disp + // argument since ByteSize is mapped to an int type in that case. + // + // Note: DO NOT introduce similar overloaded functions for WordSize + // arguments as in the optimized mode, both ByteSize and WordSize + // are mapped to the same type and thus the compiler cannot make a + // distinction anymore (=> compiler errors). + +#ifdef ASSERT + Address(Register base, ByteSize disp) + : _base(base), + _index(noreg), + _scale(no_scale), + _disp(in_bytes(disp)) { + assert_different_registers(_base, AT); + } + + Address(Register base, Register index, ScaleFactor scale, ByteSize disp) + : _base(base), + _index(index), + _scale(scale), + _disp(in_bytes(disp)) { + assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); + assert_different_registers(_base, _index, AT); + } +#endif // ASSERT + + // accessors + bool uses(Register reg) const { return _base == reg || _index == reg; } + Register base() const { return _base; } + Register index() const { return _index; } + ScaleFactor scale() const { return _scale; } + int disp() const { return _disp; } + + static Address make_array(ArrayAddress); + + friend class Assembler; + friend class MacroAssembler; + friend class LIR_Assembler; // base/index/scale/disp +}; + +// Calling convention +class Argument VALUE_OBJ_CLASS_SPEC { + public: + enum { + n_register_parameters = 8, // 8 integer registers used to pass parameters + n_float_register_parameters = 8 // 8 float registers used to pass parameters + }; +}; + +// +// AddressLiteral has been split out from Address because operands of this type +// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out +// the few instructions that need to deal with address literals are unique and the +// MacroAssembler does not have to implement every instruction in the Assembler +// in order to search for address literals that may need special handling depending +// on the instruction and the platform. As small step on the way to merging i486/amd64 +// directories. +// +class AddressLiteral VALUE_OBJ_CLASS_SPEC { + friend class ArrayAddress; + RelocationHolder _rspec; + // Typically we use AddressLiterals we want to use their rval + // However in some situations we want the lval (effect address) of the item. + // We provide a special factory for making those lvals. + bool _is_lval; + + // If the target is far we'll need to load the ea of this to + // a register to reach it. Otherwise if near we can do rip + // relative addressing. + + address _target; + + protected: + // creation + AddressLiteral() + : _is_lval(false), + _target(NULL) + {} + + public: + + + AddressLiteral(address target, relocInfo::relocType rtype); + + AddressLiteral(address target, RelocationHolder const& rspec) + : _rspec(rspec), + _is_lval(false), + _target(target) + {} + // 32-bit complains about a multiple declaration for int*. + AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none) + : _target((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral addr() { + AddressLiteral ret = *this; + ret._is_lval = true; + return ret; + } + + + private: + + address target() { return _target; } + bool is_lval() { return _is_lval; } + + relocInfo::relocType reloc() const { return _rspec.type(); } + const RelocationHolder& rspec() const { return _rspec; } + + friend class Assembler; + friend class MacroAssembler; + friend class Address; + friend class LIR_Assembler; + RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { + switch (rtype) { + case relocInfo::external_word_type: + return external_word_Relocation::spec(addr); + case relocInfo::internal_word_type: + return internal_word_Relocation::spec(addr); + case relocInfo::opt_virtual_call_type: + return opt_virtual_call_Relocation::spec(); + case relocInfo::static_call_type: + return static_call_Relocation::spec(); + case relocInfo::runtime_call_type: + return runtime_call_Relocation::spec(); + case relocInfo::poll_type: + case relocInfo::poll_return_type: + return Relocation::spec_simple(rtype); + case relocInfo::none: + case relocInfo::oop_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + return RelocationHolder(); + default: + ShouldNotReachHere(); + return RelocationHolder(); + } + } + +}; + +// Convience classes +class RuntimeAddress: public AddressLiteral { + + public: + + RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} + +}; + +class OopAddress: public AddressLiteral { + + public: + + OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} + +}; + +class ExternalAddress: public AddressLiteral { + + public: + + ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} + +}; + +class InternalAddress: public AddressLiteral { + + public: + + InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} + +}; + +// x86 can do array addressing as a single operation since disp can be an absolute +// address amd64 can't. We create a class that expresses the concept but does extra +// magic on amd64 to get the final result + +class ArrayAddress VALUE_OBJ_CLASS_SPEC { + private: + + AddressLiteral _base; + Address _index; + + public: + + ArrayAddress() {}; + ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; + AddressLiteral base() { return _base; } + Address index() { return _index; } + +}; + +// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction +// level ; i.e., what you write is what you get. The Assembler is generating code into +// a CodeBuffer. + +class Assembler : public AbstractAssembler { + friend class AbstractAssembler; // for the non-virtual hack + friend class LIR_Assembler; // as_Address() + friend class StubGenerator; + + public: + // 22-bit opcode, highest 22 bits: bits[31...10] + enum ops22 { + clo_w_op = 0b0000000000000000000100, + clz_w_op = 0b0000000000000000000101, + cto_w_op = 0b0000000000000000000110, + ctz_w_op = 0b0000000000000000000111, + clo_d_op = 0b0000000000000000001000, + clz_d_op = 0b0000000000000000001001, + cto_d_op = 0b0000000000000000001010, + ctz_d_op = 0b0000000000000000001011, + revb_2h_op = 0b0000000000000000001100, + revb_4h_op = 0b0000000000000000001101, + revb_2w_op = 0b0000000000000000001110, + revb_d_op = 0b0000000000000000001111, + revh_2w_op = 0b0000000000000000010000, + revh_d_op = 0b0000000000000000010001, + bitrev_4b_op = 0b0000000000000000010010, + bitrev_8b_op = 0b0000000000000000010011, + bitrev_w_op = 0b0000000000000000010100, + bitrev_d_op = 0b0000000000000000010101, + ext_w_h_op = 0b0000000000000000010110, + ext_w_b_op = 0b0000000000000000010111, + rdtimel_w_op = 0b0000000000000000011000, + rdtimeh_w_op = 0b0000000000000000011001, + rdtime_d_op = 0b0000000000000000011010, + cpucfg_op = 0b0000000000000000011011, + fabs_s_op = 0b0000000100010100000001, + fabs_d_op = 0b0000000100010100000010, + fneg_s_op = 0b0000000100010100000101, + fneg_d_op = 0b0000000100010100000110, + flogb_s_op = 0b0000000100010100001001, + flogb_d_op = 0b0000000100010100001010, + fclass_s_op = 0b0000000100010100001101, + fclass_d_op = 0b0000000100010100001110, + fsqrt_s_op = 0b0000000100010100010001, + fsqrt_d_op = 0b0000000100010100010010, + frecip_s_op = 0b0000000100010100010101, + frecip_d_op = 0b0000000100010100010110, + frsqrt_s_op = 0b0000000100010100011001, + frsqrt_d_op = 0b0000000100010100011010, + fmov_s_op = 0b0000000100010100100101, + fmov_d_op = 0b0000000100010100100110, + movgr2fr_w_op = 0b0000000100010100101001, + movgr2fr_d_op = 0b0000000100010100101010, + movgr2frh_w_op = 0b0000000100010100101011, + movfr2gr_s_op = 0b0000000100010100101101, + movfr2gr_d_op = 0b0000000100010100101110, + movfrh2gr_s_op = 0b0000000100010100101111, + movgr2fcsr_op = 0b0000000100010100110000, + movfcsr2gr_op = 0b0000000100010100110010, + movfr2cf_op = 0b0000000100010100110100, + movcf2fr_op = 0b0000000100010100110101, + movgr2cf_op = 0b0000000100010100110110, + movcf2gr_op = 0b0000000100010100110111, + fcvt_s_d_op = 0b0000000100011001000110, + fcvt_d_s_op = 0b0000000100011001001001, + ftintrm_w_s_op = 0b0000000100011010000001, + ftintrm_w_d_op = 0b0000000100011010000010, + ftintrm_l_s_op = 0b0000000100011010001001, + ftintrm_l_d_op = 0b0000000100011010001010, + ftintrp_w_s_op = 0b0000000100011010010001, + ftintrp_w_d_op = 0b0000000100011010010010, + ftintrp_l_s_op = 0b0000000100011010011001, + ftintrp_l_d_op = 0b0000000100011010011010, + ftintrz_w_s_op = 0b0000000100011010100001, + ftintrz_w_d_op = 0b0000000100011010100010, + ftintrz_l_s_op = 0b0000000100011010101001, + ftintrz_l_d_op = 0b0000000100011010101010, + ftintrne_w_s_op = 0b0000000100011010110001, + ftintrne_w_d_op = 0b0000000100011010110010, + ftintrne_l_s_op = 0b0000000100011010111001, + ftintrne_l_d_op = 0b0000000100011010111010, + ftint_w_s_op = 0b0000000100011011000001, + ftint_w_d_op = 0b0000000100011011000010, + ftint_l_s_op = 0b0000000100011011001001, + ftint_l_d_op = 0b0000000100011011001010, + ffint_s_w_op = 0b0000000100011101000100, + ffint_s_l_op = 0b0000000100011101000110, + ffint_d_w_op = 0b0000000100011101001000, + ffint_d_l_op = 0b0000000100011101001010, + frint_s_op = 0b0000000100011110010001, + frint_d_op = 0b0000000100011110010010, + iocsrrd_b_op = 0b0000011001001000000000, + iocsrrd_h_op = 0b0000011001001000000001, + iocsrrd_w_op = 0b0000011001001000000010, + iocsrrd_d_op = 0b0000011001001000000011, + iocsrwr_b_op = 0b0000011001001000000100, + iocsrwr_h_op = 0b0000011001001000000101, + iocsrwr_w_op = 0b0000011001001000000110, + iocsrwr_d_op = 0b0000011001001000000111, + vpcnt_b_op = 0b0111001010011100001000, + vpcnt_h_op = 0b0111001010011100001001, + vpcnt_w_op = 0b0111001010011100001010, + vpcnt_d_op = 0b0111001010011100001011, + vneg_b_op = 0b0111001010011100001100, + vneg_h_op = 0b0111001010011100001101, + vneg_w_op = 0b0111001010011100001110, + vneg_d_op = 0b0111001010011100001111, + vfclass_s_op = 0b0111001010011100110101, + vfclass_d_op = 0b0111001010011100110110, + vfsqrt_s_op = 0b0111001010011100111001, + vfsqrt_d_op = 0b0111001010011100111010, + vfrint_s_op = 0b0111001010011101001101, + vfrint_d_op = 0b0111001010011101001110, + vfrintrm_s_op = 0b0111001010011101010001, + vfrintrm_d_op = 0b0111001010011101010010, + vfrintrp_s_op = 0b0111001010011101010101, + vfrintrp_d_op = 0b0111001010011101010110, + vfrintrz_s_op = 0b0111001010011101011001, + vfrintrz_d_op = 0b0111001010011101011010, + vfrintrne_s_op = 0b0111001010011101011101, + vfrintrne_d_op = 0b0111001010011101011110, + vfcvtl_s_h_op = 0b0111001010011101111010, + vfcvth_s_h_op = 0b0111001010011101111011, + vfcvtl_d_s_op = 0b0111001010011101111100, + vfcvth_d_s_op = 0b0111001010011101111101, + vffint_s_w_op = 0b0111001010011110000000, + vffint_s_wu_op = 0b0111001010011110000001, + vffint_d_l_op = 0b0111001010011110000010, + vffint_d_lu_op = 0b0111001010011110000011, + vffintl_d_w_op = 0b0111001010011110000100, + vffinth_d_w_op = 0b0111001010011110000101, + vftint_w_s_op = 0b0111001010011110001100, + vftint_l_d_op = 0b0111001010011110001101, + vftintrm_w_s_op = 0b0111001010011110001110, + vftintrm_l_d_op = 0b0111001010011110001111, + vftintrp_w_s_op = 0b0111001010011110010000, + vftintrp_l_d_op = 0b0111001010011110010001, + vftintrz_w_s_op = 0b0111001010011110010010, + vftintrz_l_d_op = 0b0111001010011110010011, + vftintrne_w_s_op = 0b0111001010011110010100, + vftintrne_l_d_op = 0b0111001010011110010101, + vftint_wu_s = 0b0111001010011110010110, + vftint_lu_d = 0b0111001010011110010111, + vftintrz_wu_f = 0b0111001010011110011100, + vftintrz_lu_d = 0b0111001010011110011101, + vftintl_l_s_op = 0b0111001010011110100000, + vftinth_l_s_op = 0b0111001010011110100001, + vftintrml_l_s_op = 0b0111001010011110100010, + vftintrmh_l_s_op = 0b0111001010011110100011, + vftintrpl_l_s_op = 0b0111001010011110100100, + vftintrph_l_s_op = 0b0111001010011110100101, + vftintrzl_l_s_op = 0b0111001010011110100110, + vftintrzh_l_s_op = 0b0111001010011110100111, + vftintrnel_l_s_op = 0b0111001010011110101000, + vftintrneh_l_s_op = 0b0111001010011110101001, + vreplgr2vr_b_op = 0b0111001010011111000000, + vreplgr2vr_h_op = 0b0111001010011111000001, + vreplgr2vr_w_op = 0b0111001010011111000010, + vreplgr2vr_d_op = 0b0111001010011111000011, + xvpcnt_b_op = 0b0111011010011100001000, + xvpcnt_h_op = 0b0111011010011100001001, + xvpcnt_w_op = 0b0111011010011100001010, + xvpcnt_d_op = 0b0111011010011100001011, + xvneg_b_op = 0b0111011010011100001100, + xvneg_h_op = 0b0111011010011100001101, + xvneg_w_op = 0b0111011010011100001110, + xvneg_d_op = 0b0111011010011100001111, + xvfclass_s_op = 0b0111011010011100110101, + xvfclass_d_op = 0b0111011010011100110110, + xvfsqrt_s_op = 0b0111011010011100111001, + xvfsqrt_d_op = 0b0111011010011100111010, + xvfrint_s_op = 0b0111011010011101001101, + xvfrint_d_op = 0b0111011010011101001110, + xvfrintrm_s_op = 0b0111011010011101010001, + xvfrintrm_d_op = 0b0111011010011101010010, + xvfrintrp_s_op = 0b0111011010011101010101, + xvfrintrp_d_op = 0b0111011010011101010110, + xvfrintrz_s_op = 0b0111011010011101011001, + xvfrintrz_d_op = 0b0111011010011101011010, + xvfrintrne_s_op = 0b0111011010011101011101, + xvfrintrne_d_op = 0b0111011010011101011110, + xvfcvtl_s_h_op = 0b0111011010011101111010, + xvfcvth_s_h_op = 0b0111011010011101111011, + xvfcvtl_d_s_op = 0b0111011010011101111100, + xvfcvth_d_s_op = 0b0111011010011101111101, + xvffint_s_w_op = 0b0111011010011110000000, + xvffint_s_wu_op = 0b0111011010011110000001, + xvffint_d_l_op = 0b0111011010011110000010, + xvffint_d_lu_op = 0b0111011010011110000011, + xvffintl_d_w_op = 0b0111011010011110000100, + xvffinth_d_w_op = 0b0111011010011110000101, + xvftint_w_s_op = 0b0111011010011110001100, + xvftint_l_d_op = 0b0111011010011110001101, + xvftintrm_w_s_op = 0b0111011010011110001110, + xvftintrm_l_d_op = 0b0111011010011110001111, + xvftintrp_w_s_op = 0b0111011010011110010000, + xvftintrp_l_d_op = 0b0111011010011110010001, + xvftintrz_w_s_op = 0b0111011010011110010010, + xvftintrz_l_d_op = 0b0111011010011110010011, + xvftintrne_w_s_op = 0b0111011010011110010100, + xvftintrne_l_d_op = 0b0111011010011110010101, + xvftint_wu_s = 0b0111011010011110010110, + xvftint_lu_d = 0b0111011010011110010111, + xvftintrz_wu_f = 0b0111011010011110011100, + xvftintrz_lu_d = 0b0111011010011110011101, + xvftintl_l_s_op = 0b0111011010011110100000, + xvftinth_l_s_op = 0b0111011010011110100001, + xvftintrml_l_s_op = 0b0111011010011110100010, + xvftintrmh_l_s_op = 0b0111011010011110100011, + xvftintrpl_l_s_op = 0b0111011010011110100100, + xvftintrph_l_s_op = 0b0111011010011110100101, + xvftintrzl_l_s_op = 0b0111011010011110100110, + xvftintrzh_l_s_op = 0b0111011010011110100111, + xvftintrnel_l_s_op = 0b0111011010011110101000, + xvftintrneh_l_s_op = 0b0111011010011110101001, + xvreplgr2vr_b_op = 0b0111011010011111000000, + xvreplgr2vr_h_op = 0b0111011010011111000001, + xvreplgr2vr_w_op = 0b0111011010011111000010, + xvreplgr2vr_d_op = 0b0111011010011111000011, + vext2xv_h_b_op = 0b0111011010011111000100, + vext2xv_w_b_op = 0b0111011010011111000101, + vext2xv_d_b_op = 0b0111011010011111000110, + vext2xv_w_h_op = 0b0111011010011111000111, + vext2xv_d_h_op = 0b0111011010011111001000, + vext2xv_d_w_op = 0b0111011010011111001001, + vext2xv_hu_bu_op = 0b0111011010011111001010, + vext2xv_wu_bu_op = 0b0111011010011111001011, + vext2xv_du_bu_op = 0b0111011010011111001100, + vext2xv_wu_hu_op = 0b0111011010011111001101, + vext2xv_du_hu_op = 0b0111011010011111001110, + vext2xv_du_wu_op = 0b0111011010011111001111, + xvreplve0_b_op = 0b0111011100000111000000, + xvreplve0_h_op = 0b0111011100000111100000, + xvreplve0_w_op = 0b0111011100000111110000, + xvreplve0_d_op = 0b0111011100000111111000, + xvreplve0_q_op = 0b0111011100000111111100, + + unknow_ops22 = 0b1111111111111111111111 + }; + + // 21-bit opcode, highest 21 bits: bits[31...11] + enum ops21 { + vinsgr2vr_d_op = 0b011100101110101111110, + vpickve2gr_d_op = 0b011100101110111111110, + vpickve2gr_du_op = 0b011100101111001111110, + vreplvei_d_op = 0b011100101111011111110, + + unknow_ops21 = 0b111111111111111111111 + }; + + // 20-bit opcode, highest 20 bits: bits[31...12] + enum ops20 { + vinsgr2vr_w_op = 0b01110010111010111110, + vpickve2gr_w_op = 0b01110010111011111110, + vpickve2gr_wu_op = 0b01110010111100111110, + vreplvei_w_op = 0b01110010111101111110, + xvinsgr2vr_d_op = 0b01110110111010111110, + xvpickve2gr_d_op = 0b01110110111011111110, + xvpickve2gr_du_op = 0b01110110111100111110, + xvinsve0_d_op = 0b01110110111111111110, + xvpickve_d_op = 0b01110111000000111110, + + unknow_ops20 = 0b11111111111111111111 + }; + + // 19-bit opcode, highest 19 bits: bits[31...13] + enum ops19 { + vrotri_b_op = 0b0111001010100000001, + vinsgr2vr_h_op = 0b0111001011101011110, + vpickve2gr_h_op = 0b0111001011101111110, + vpickve2gr_hu_op = 0b0111001011110011110, + vreplvei_h_op = 0b0111001011110111110, + vbitclri_b_op = 0b0111001100010000001, + vbitseti_b_op = 0b0111001100010100001, + vbitrevi_b_op = 0b0111001100011000001, + vslli_b_op = 0b0111001100101100001, + vsrli_b_op = 0b0111001100110000001, + vsrai_b_op = 0b0111001100110100001, + xvrotri_b_op = 0b0111011010100000001, + xvinsgr2vr_w_op = 0b0111011011101011110, + xvpickve2gr_w_op = 0b0111011011101111110, + xvpickve2gr_wu_op = 0b0111011011110011110, + xvinsve0_w_op = 0b0111011011111111110, + xvpickve_w_op = 0b0111011100000011110, + xvbitclri_b_op = 0b0111011100010000001, + xvbitseti_b_op = 0b0111011100010100001, + xvbitrevi_b_op = 0b0111011100011000001, + xvslli_b_op = 0b0111011100101100001, + xvsrli_b_op = 0b0111011100110000001, + xvsrai_b_op = 0b0111011100110100001, + + unknow_ops19 = 0b1111111111111111111 + }; + + // 18-bit opcode, highest 18 bits: bits[31...14] + enum ops18 { + vrotri_h_op = 0b011100101010000001, + vinsgr2vr_b_op = 0b011100101110101110, + vpickve2gr_b_op = 0b011100101110111110, + vpickve2gr_bu_op = 0b011100101111001110, + vreplvei_b_op = 0b011100101111011110, + vbitclri_h_op = 0b011100110001000001, + vbitseti_h_op = 0b011100110001010001, + vbitrevi_h_op = 0b011100110001100001, + vslli_h_op = 0b011100110010110001, + vsrli_h_op = 0b011100110011000001, + vsrai_h_op = 0b011100110011010001, + vsrlni_b_h_op = 0b011100110100000001, + xvrotri_h_op = 0b011101101010000001, + xvbitclri_h_op = 0b011101110001000001, + xvbitseti_h_op = 0b011101110001010001, + xvbitrevi_h_op = 0b011101110001100001, + xvslli_h_op = 0b011101110010110001, + xvsrli_h_op = 0b011101110011000001, + xvsrai_h_op = 0b011101110011010001, + + unknow_ops18 = 0b111111111111111111 + }; + + // 17-bit opcode, highest 17 bits: bits[31...15] + enum ops17 { + asrtle_d_op = 0b00000000000000010, + asrtgt_d_op = 0b00000000000000011, + add_w_op = 0b00000000000100000, + add_d_op = 0b00000000000100001, + sub_w_op = 0b00000000000100010, + sub_d_op = 0b00000000000100011, + slt_op = 0b00000000000100100, + sltu_op = 0b00000000000100101, + maskeqz_op = 0b00000000000100110, + masknez_op = 0b00000000000100111, + nor_op = 0b00000000000101000, + and_op = 0b00000000000101001, + or_op = 0b00000000000101010, + xor_op = 0b00000000000101011, + orn_op = 0b00000000000101100, + andn_op = 0b00000000000101101, + sll_w_op = 0b00000000000101110, + srl_w_op = 0b00000000000101111, + sra_w_op = 0b00000000000110000, + sll_d_op = 0b00000000000110001, + srl_d_op = 0b00000000000110010, + sra_d_op = 0b00000000000110011, + rotr_w_op = 0b00000000000110110, + rotr_d_op = 0b00000000000110111, + mul_w_op = 0b00000000000111000, + mulh_w_op = 0b00000000000111001, + mulh_wu_op = 0b00000000000111010, + mul_d_op = 0b00000000000111011, + mulh_d_op = 0b00000000000111100, + mulh_du_op = 0b00000000000111101, + mulw_d_w_op = 0b00000000000111110, + mulw_d_wu_op = 0b00000000000111111, + div_w_op = 0b00000000001000000, + mod_w_op = 0b00000000001000001, + div_wu_op = 0b00000000001000010, + mod_wu_op = 0b00000000001000011, + div_d_op = 0b00000000001000100, + mod_d_op = 0b00000000001000101, + div_du_op = 0b00000000001000110, + mod_du_op = 0b00000000001000111, + crc_w_b_w_op = 0b00000000001001000, + crc_w_h_w_op = 0b00000000001001001, + crc_w_w_w_op = 0b00000000001001010, + crc_w_d_w_op = 0b00000000001001011, + crcc_w_b_w_op = 0b00000000001001100, + crcc_w_h_w_op = 0b00000000001001101, + crcc_w_w_w_op = 0b00000000001001110, + crcc_w_d_w_op = 0b00000000001001111, + break_op = 0b00000000001010100, + fadd_s_op = 0b00000001000000001, + fadd_d_op = 0b00000001000000010, + fsub_s_op = 0b00000001000000101, + fsub_d_op = 0b00000001000000110, + fmul_s_op = 0b00000001000001001, + fmul_d_op = 0b00000001000001010, + fdiv_s_op = 0b00000001000001101, + fdiv_d_op = 0b00000001000001110, + fmax_s_op = 0b00000001000010001, + fmax_d_op = 0b00000001000010010, + fmin_s_op = 0b00000001000010101, + fmin_d_op = 0b00000001000010110, + fmaxa_s_op = 0b00000001000011001, + fmaxa_d_op = 0b00000001000011010, + fmina_s_op = 0b00000001000011101, + fmina_d_op = 0b00000001000011110, + fscaleb_s_op = 0b00000001000100001, + fscaleb_d_op = 0b00000001000100010, + fcopysign_s_op = 0b00000001000100101, + fcopysign_d_op = 0b00000001000100110, + ldx_b_op = 0b00111000000000000, + ldx_h_op = 0b00111000000001000, + ldx_w_op = 0b00111000000010000, + ldx_d_op = 0b00111000000011000, + stx_b_op = 0b00111000000100000, + stx_h_op = 0b00111000000101000, + stx_w_op = 0b00111000000110000, + stx_d_op = 0b00111000000111000, + ldx_bu_op = 0b00111000001000000, + ldx_hu_op = 0b00111000001001000, + ldx_wu_op = 0b00111000001010000, + fldx_s_op = 0b00111000001100000, + fldx_d_op = 0b00111000001101000, + fstx_s_op = 0b00111000001110000, + fstx_d_op = 0b00111000001111000, + vldx_op = 0b00111000010000000, + vstx_op = 0b00111000010001000, + xvldx_op = 0b00111000010010000, + xvstx_op = 0b00111000010011000, + amswap_w_op = 0b00111000011000000, + amswap_d_op = 0b00111000011000001, + amadd_w_op = 0b00111000011000010, + amadd_d_op = 0b00111000011000011, + amand_w_op = 0b00111000011000100, + amand_d_op = 0b00111000011000101, + amor_w_op = 0b00111000011000110, + amor_d_op = 0b00111000011000111, + amxor_w_op = 0b00111000011001000, + amxor_d_op = 0b00111000011001001, + ammax_w_op = 0b00111000011001010, + ammax_d_op = 0b00111000011001011, + ammin_w_op = 0b00111000011001100, + ammin_d_op = 0b00111000011001101, + ammax_wu_op = 0b00111000011001110, + ammax_du_op = 0b00111000011001111, + ammin_wu_op = 0b00111000011010000, + ammin_du_op = 0b00111000011010001, + amswap_db_w_op = 0b00111000011010010, + amswap_db_d_op = 0b00111000011010011, + amadd_db_w_op = 0b00111000011010100, + amadd_db_d_op = 0b00111000011010101, + amand_db_w_op = 0b00111000011010110, + amand_db_d_op = 0b00111000011010111, + amor_db_w_op = 0b00111000011011000, + amor_db_d_op = 0b00111000011011001, + amxor_db_w_op = 0b00111000011011010, + amxor_db_d_op = 0b00111000011011011, + ammax_db_w_op = 0b00111000011011100, + ammax_db_d_op = 0b00111000011011101, + ammin_db_w_op = 0b00111000011011110, + ammin_db_d_op = 0b00111000011011111, + ammax_db_wu_op = 0b00111000011100000, + ammax_db_du_op = 0b00111000011100001, + ammin_db_wu_op = 0b00111000011100010, + ammin_db_du_op = 0b00111000011100011, + dbar_op = 0b00111000011100100, + ibar_op = 0b00111000011100101, + fldgt_s_op = 0b00111000011101000, + fldgt_d_op = 0b00111000011101001, + fldle_s_op = 0b00111000011101010, + fldle_d_op = 0b00111000011101011, + fstgt_s_op = 0b00111000011101100, + fstgt_d_op = 0b00111000011101101, + fstle_s_op = 0b00111000011101110, + fstle_d_op = 0b00111000011101111, + ldgt_b_op = 0b00111000011110000, + ldgt_h_op = 0b00111000011110001, + ldgt_w_op = 0b00111000011110010, + ldgt_d_op = 0b00111000011110011, + ldle_b_op = 0b00111000011110100, + ldle_h_op = 0b00111000011110101, + ldle_w_op = 0b00111000011110110, + ldle_d_op = 0b00111000011110111, + stgt_b_op = 0b00111000011111000, + stgt_h_op = 0b00111000011111001, + stgt_w_op = 0b00111000011111010, + stgt_d_op = 0b00111000011111011, + stle_b_op = 0b00111000011111100, + stle_h_op = 0b00111000011111101, + stle_w_op = 0b00111000011111110, + stle_d_op = 0b00111000011111111, + vseq_b_op = 0b01110000000000000, + vseq_h_op = 0b01110000000000001, + vseq_w_op = 0b01110000000000010, + vseq_d_op = 0b01110000000000011, + vsle_b_op = 0b01110000000000100, + vsle_h_op = 0b01110000000000101, + vsle_w_op = 0b01110000000000110, + vsle_d_op = 0b01110000000000111, + vsle_bu_op = 0b01110000000001000, + vsle_hu_op = 0b01110000000001001, + vsle_wu_op = 0b01110000000001010, + vsle_du_op = 0b01110000000001011, + vslt_b_op = 0b01110000000001100, + vslt_h_op = 0b01110000000001101, + vslt_w_op = 0b01110000000001110, + vslt_d_op = 0b01110000000001111, + vslt_bu_op = 0b01110000000010000, + vslt_hu_op = 0b01110000000010001, + vslt_wu_op = 0b01110000000010010, + vslt_du_op = 0b01110000000010011, + vadd_b_op = 0b01110000000010100, + vadd_h_op = 0b01110000000010101, + vadd_w_op = 0b01110000000010110, + vadd_d_op = 0b01110000000010111, + vsub_b_op = 0b01110000000011000, + vsub_h_op = 0b01110000000011001, + vsub_w_op = 0b01110000000011010, + vsub_d_op = 0b01110000000011011, + vabsd_b_op = 0b01110000011000000, + vabsd_h_op = 0b01110000011000001, + vabsd_w_op = 0b01110000011000010, + vabsd_d_op = 0b01110000011000011, + vmax_b_op = 0b01110000011100000, + vmax_h_op = 0b01110000011100001, + vmax_w_op = 0b01110000011100010, + vmax_d_op = 0b01110000011100011, + vmin_b_op = 0b01110000011100100, + vmin_h_op = 0b01110000011100101, + vmin_w_op = 0b01110000011100110, + vmin_d_op = 0b01110000011100111, + vmul_b_op = 0b01110000100001000, + vmul_h_op = 0b01110000100001001, + vmul_w_op = 0b01110000100001010, + vmul_d_op = 0b01110000100001011, + vmuh_b_op = 0b01110000100001100, + vmuh_h_op = 0b01110000100001101, + vmuh_w_op = 0b01110000100001110, + vmuh_d_op = 0b01110000100001111, + vmuh_bu_op = 0b01110000100010000, + vmuh_hu_op = 0b01110000100010001, + vmuh_wu_op = 0b01110000100010010, + vmuh_du_op = 0b01110000100010011, + vmulwev_h_b_op = 0b01110000100100000, + vmulwev_w_h_op = 0b01110000100100001, + vmulwev_d_w_op = 0b01110000100100010, + vmulwev_q_d_op = 0b01110000100100011, + vmulwod_h_b_op = 0b01110000100100100, + vmulwod_w_h_op = 0b01110000100100101, + vmulwod_d_w_op = 0b01110000100100110, + vmulwod_q_d_op = 0b01110000100100111, + vmadd_b_op = 0b01110000101010000, + vmadd_h_op = 0b01110000101010001, + vmadd_w_op = 0b01110000101010010, + vmadd_d_op = 0b01110000101010011, + vmsub_b_op = 0b01110000101010100, + vmsub_h_op = 0b01110000101010101, + vmsub_w_op = 0b01110000101010110, + vmsub_d_op = 0b01110000101010111, + vsll_b_op = 0b01110000111010000, + vsll_h_op = 0b01110000111010001, + vsll_w_op = 0b01110000111010010, + vsll_d_op = 0b01110000111010011, + vsrl_b_op = 0b01110000111010100, + vsrl_h_op = 0b01110000111010101, + vsrl_w_op = 0b01110000111010110, + vsrl_d_op = 0b01110000111010111, + vsra_b_op = 0b01110000111011000, + vsra_h_op = 0b01110000111011001, + vsra_w_op = 0b01110000111011010, + vsra_d_op = 0b01110000111011011, + vrotr_b_op = 0b01110000111011100, + vrotr_h_op = 0b01110000111011101, + vrotr_w_op = 0b01110000111011110, + vrotr_d_op = 0b01110000111011111, + vbitclr_b_op = 0b01110001000011000, + vbitclr_h_op = 0b01110001000011001, + vbitclr_w_op = 0b01110001000011010, + vbitclr_d_op = 0b01110001000011011, + vbitset_b_op = 0b01110001000011100, + vbitset_h_op = 0b01110001000011101, + vbitset_w_op = 0b01110001000011110, + vbitset_d_op = 0b01110001000011111, + vbitrev_b_op = 0b01110001000100000, + vbitrev_h_op = 0b01110001000100001, + vbitrev_w_op = 0b01110001000100010, + vbitrev_d_op = 0b01110001000100011, + vand_v_op = 0b01110001001001100, + vor_v_op = 0b01110001001001101, + vxor_v_op = 0b01110001001001110, + vnor_v_op = 0b01110001001001111, + vandn_v_op = 0b01110001001010000, + vorn_v_op = 0b01110001001010001, + vadd_q_op = 0b01110001001011010, + vsub_q_op = 0b01110001001011011, + vfadd_s_op = 0b01110001001100001, + vfadd_d_op = 0b01110001001100010, + vfsub_s_op = 0b01110001001100101, + vfsub_d_op = 0b01110001001100110, + vfmul_s_op = 0b01110001001110001, + vfmul_d_op = 0b01110001001110010, + vfdiv_s_op = 0b01110001001110101, + vfdiv_d_op = 0b01110001001110110, + vfmax_s_op = 0b01110001001111001, + vfmax_d_op = 0b01110001001111010, + vfmin_s_op = 0b01110001001111101, + vfmin_d_op = 0b01110001001111110, + vfcvt_h_s_op = 0b01110001010001100, + vfcvt_s_d_op = 0b01110001010001101, + vffint_s_l_op = 0b01110001010010000, + vftint_w_d_op = 0b01110001010010011, + vftintrm_w_d_op = 0b01110001010010100, + vftintrp_w_d_op = 0b01110001010010101, + vftintrz_w_d_op = 0b01110001010010110, + vftintrne_w_d_op = 0b01110001010010111, + vshuf_h_op = 0b01110001011110101, + vshuf_w_op = 0b01110001011110110, + vshuf_d_op = 0b01110001011110111, + vslti_bu_op = 0b01110010100010000, + vslti_hu_op = 0b01110010100010001, + vslti_wu_op = 0b01110010100010010, + vslti_du_op = 0b01110010100010011, + vaddi_bu_op = 0b01110010100010100, + vaddi_hu_op = 0b01110010100010101, + vaddi_wu_op = 0b01110010100010110, + vaddi_du_op = 0b01110010100010111, + vsubi_bu_op = 0b01110010100011000, + vsubi_hu_op = 0b01110010100011001, + vsubi_wu_op = 0b01110010100011010, + vsubi_du_op = 0b01110010100011011, + vrotri_w_op = 0b01110010101000001, + vbitclri_w_op = 0b01110011000100001, + vbitseti_w_op = 0b01110011000101001, + vbitrevi_w_op = 0b01110011000110001, + vslli_w_op = 0b01110011001011001, + vsrli_w_op = 0b01110011001100001, + vsrai_w_op = 0b01110011001101001, + vsrlni_h_w_op = 0b01110011010000001, + xvseq_b_op = 0b01110100000000000, + xvseq_h_op = 0b01110100000000001, + xvseq_w_op = 0b01110100000000010, + xvseq_d_op = 0b01110100000000011, + xvsle_b_op = 0b01110100000000100, + xvsle_h_op = 0b01110100000000101, + xvsle_w_op = 0b01110100000000110, + xvsle_d_op = 0b01110100000000111, + xvsle_bu_op = 0b01110100000001000, + xvsle_hu_op = 0b01110100000001001, + xvsle_wu_op = 0b01110100000001010, + xvsle_du_op = 0b01110100000001011, + xvslt_b_op = 0b01110100000001100, + xvslt_h_op = 0b01110100000001101, + xvslt_w_op = 0b01110100000001110, + xvslt_d_op = 0b01110100000001111, + xvslt_bu_op = 0b01110100000010000, + xvslt_hu_op = 0b01110100000010001, + xvslt_wu_op = 0b01110100000010010, + xvslt_du_op = 0b01110100000010011, + xvadd_b_op = 0b01110100000010100, + xvadd_h_op = 0b01110100000010101, + xvadd_w_op = 0b01110100000010110, + xvadd_d_op = 0b01110100000010111, + xvsub_b_op = 0b01110100000011000, + xvsub_h_op = 0b01110100000011001, + xvsub_w_op = 0b01110100000011010, + xvsub_d_op = 0b01110100000011011, + xvabsd_b_op = 0b01110100011000000, + xvabsd_h_op = 0b01110100011000001, + xvabsd_w_op = 0b01110100011000010, + xvabsd_d_op = 0b01110100011000011, + xvmax_b_op = 0b01110100011100000, + xvmax_h_op = 0b01110100011100001, + xvmax_w_op = 0b01110100011100010, + xvmax_d_op = 0b01110100011100011, + xvmin_b_op = 0b01110100011100100, + xvmin_h_op = 0b01110100011100101, + xvmin_w_op = 0b01110100011100110, + xvmin_d_op = 0b01110100011100111, + xvmul_b_op = 0b01110100100001000, + xvmul_h_op = 0b01110100100001001, + xvmul_w_op = 0b01110100100001010, + xvmul_d_op = 0b01110100100001011, + xvmuh_b_op = 0b01110100100001100, + xvmuh_h_op = 0b01110100100001101, + xvmuh_w_op = 0b01110100100001110, + xvmuh_d_op = 0b01110100100001111, + xvmuh_bu_op = 0b01110100100010000, + xvmuh_hu_op = 0b01110100100010001, + xvmuh_wu_op = 0b01110100100010010, + xvmuh_du_op = 0b01110100100010011, + xvmulwev_h_b_op = 0b01110100100100000, + xvmulwev_w_h_op = 0b01110100100100001, + xvmulwev_d_w_op = 0b01110100100100010, + xvmulwev_q_d_op = 0b01110100100100011, + xvmulwod_h_b_op = 0b01110100100100100, + xvmulwod_w_h_op = 0b01110100100100101, + xvmulwod_d_w_op = 0b01110100100100110, + xvmulwod_q_d_op = 0b01110100100100111, + xvmadd_b_op = 0b01110100101010000, + xvmadd_h_op = 0b01110100101010001, + xvmadd_w_op = 0b01110100101010010, + xvmadd_d_op = 0b01110100101010011, + xvmsub_b_op = 0b01110100101010100, + xvmsub_h_op = 0b01110100101010101, + xvmsub_w_op = 0b01110100101010110, + xvmsub_d_op = 0b01110100101010111, + xvsll_b_op = 0b01110100111010000, + xvsll_h_op = 0b01110100111010001, + xvsll_w_op = 0b01110100111010010, + xvsll_d_op = 0b01110100111010011, + xvsrl_b_op = 0b01110100111010100, + xvsrl_h_op = 0b01110100111010101, + xvsrl_w_op = 0b01110100111010110, + xvsrl_d_op = 0b01110100111010111, + xvsra_b_op = 0b01110100111011000, + xvsra_h_op = 0b01110100111011001, + xvsra_w_op = 0b01110100111011010, + xvsra_d_op = 0b01110100111011011, + xvrotr_b_op = 0b01110100111011100, + xvrotr_h_op = 0b01110100111011101, + xvrotr_w_op = 0b01110100111011110, + xvrotr_d_op = 0b01110100111011111, + xvbitclr_b_op = 0b01110101000011000, + xvbitclr_h_op = 0b01110101000011001, + xvbitclr_w_op = 0b01110101000011010, + xvbitclr_d_op = 0b01110101000011011, + xvbitset_b_op = 0b01110101000011100, + xvbitset_h_op = 0b01110101000011101, + xvbitset_w_op = 0b01110101000011110, + xvbitset_d_op = 0b01110101000011111, + xvbitrev_b_op = 0b01110101000100000, + xvbitrev_h_op = 0b01110101000100001, + xvbitrev_w_op = 0b01110101000100010, + xvbitrev_d_op = 0b01110101000100011, + xvand_v_op = 0b01110101001001100, + xvor_v_op = 0b01110101001001101, + xvxor_v_op = 0b01110101001001110, + xvnor_v_op = 0b01110101001001111, + xvandn_v_op = 0b01110101001010000, + xvorn_v_op = 0b01110101001010001, + xvadd_q_op = 0b01110101001011010, + xvsub_q_op = 0b01110101001011011, + xvfadd_s_op = 0b01110101001100001, + xvfadd_d_op = 0b01110101001100010, + xvfsub_s_op = 0b01110101001100101, + xvfsub_d_op = 0b01110101001100110, + xvfmul_s_op = 0b01110101001110001, + xvfmul_d_op = 0b01110101001110010, + xvfdiv_s_op = 0b01110101001110101, + xvfdiv_d_op = 0b01110101001110110, + xvfmax_s_op = 0b01110101001111001, + xvfmax_d_op = 0b01110101001111010, + xvfmin_s_op = 0b01110101001111101, + xvfmin_d_op = 0b01110101001111110, + xvfcvt_h_s_op = 0b01110101010001100, + xvfcvt_s_d_op = 0b01110101010001101, + xvffint_s_l_op = 0b01110101010010000, + xvftint_w_d_op = 0b01110101010010011, + xvftintrm_w_d_op = 0b01110101010010100, + xvftintrp_w_d_op = 0b01110101010010101, + xvftintrz_w_d_op = 0b01110101010010110, + xvftintrne_w_d_op = 0b01110101010010111, + xvshuf_h_op = 0b01110101011110101, + xvshuf_w_op = 0b01110101011110110, + xvshuf_d_op = 0b01110101011110111, + xvperm_w_op = 0b01110101011111010, + xvslti_bu_op = 0b01110110100010000, + xvslti_hu_op = 0b01110110100010001, + xvslti_wu_op = 0b01110110100010010, + xvslti_du_op = 0b01110110100010011, + xvaddi_bu_op = 0b01110110100010100, + xvaddi_hu_op = 0b01110110100010101, + xvaddi_wu_op = 0b01110110100010110, + xvaddi_du_op = 0b01110110100010111, + xvsubi_bu_op = 0b01110110100011000, + xvsubi_hu_op = 0b01110110100011001, + xvsubi_wu_op = 0b01110110100011010, + xvsubi_du_op = 0b01110110100011011, + xvrotri_w_op = 0b01110110101000001, + xvbitclri_w_op = 0b01110111000100001, + xvbitseti_w_op = 0b01110111000101001, + xvbitrevi_w_op = 0b01110111000110001, + xvslli_w_op = 0b01110111001011001, + xvsrli_w_op = 0b01110111001100001, + xvsrai_w_op = 0b01110111001101001, + + unknow_ops17 = 0b11111111111111111 + }; + + // 16-bit opcode, highest 16 bits: bits[31...16] + enum ops16 { + vrotri_d_op = 0b0111001010100001, + vbitclri_d_op = 0b0111001100010001, + vbitseti_d_op = 0b0111001100010101, + vbitrevi_d_op = 0b0111001100011001, + vslli_d_op = 0b0111001100101101, + vsrli_d_op = 0b0111001100110001, + vsrai_d_op = 0b0111001100110101, + vsrlni_w_d_op = 0b0111001101000001, + xvrotri_d_op = 0b0111011010100001, + xvbitclri_d_op = 0b0111011100010001, + xvbitseti_d_op = 0b0111011100010101, + xvbitrevi_d_op = 0b0111011100011001, + xvslli_d_op = 0b0111011100101101, + xvsrli_d_op = 0b0111011100110001, + xvsrai_d_op = 0b0111011100110101, + + unknow_ops16 = 0b1111111111111111 + }; + + // 15-bit opcode, highest 15 bits: bits[31...17] + enum ops15 { + vsrlni_d_q_op = 0b011100110100001, + + unknow_ops15 = 0b111111111111111 + }; + + // 14-bit opcode, highest 14 bits: bits[31...18] + enum ops14 { + alsl_w_op = 0b00000000000001, + bytepick_w_op = 0b00000000000010, + bytepick_d_op = 0b00000000000011, + alsl_d_op = 0b00000000001011, + slli_op = 0b00000000010000, + srli_op = 0b00000000010001, + srai_op = 0b00000000010010, + rotri_op = 0b00000000010011, + lddir_op = 0b00000110010000, + ldpte_op = 0b00000110010001, + vshuf4i_b_op = 0b01110011100100, + vshuf4i_h_op = 0b01110011100101, + vshuf4i_w_op = 0b01110011100110, + vshuf4i_d_op = 0b01110011100111, + vandi_b_op = 0b01110011110100, + vori_b_op = 0b01110011110101, + vxori_b_op = 0b01110011110110, + vnori_b_op = 0b01110011110111, + vldi_op = 0b01110011111000, + vpermi_w_op = 0b01110011111001, + xvshuf4i_b_op = 0b01110111100100, + xvshuf4i_h_op = 0b01110111100101, + xvshuf4i_w_op = 0b01110111100110, + xvshuf4i_d_op = 0b01110111100111, + xvandi_b_op = 0b01110111110100, + xvori_b_op = 0b01110111110101, + xvxori_b_op = 0b01110111110110, + xvnori_b_op = 0b01110111110111, + xvldi_op = 0b01110111111000, + xvpermi_w_op = 0b01110111111001, + xvpermi_d_op = 0b01110111111010, + xvpermi_q_op = 0b01110111111011, + + unknow_ops14 = 0b11111111111111 + }; + + // 12-bit opcode, highest 12 bits: bits[31...20] + enum ops12 { + fmadd_s_op = 0b000010000001, + fmadd_d_op = 0b000010000010, + fmsub_s_op = 0b000010000101, + fmsub_d_op = 0b000010000110, + fnmadd_s_op = 0b000010001001, + fnmadd_d_op = 0b000010001010, + fnmsub_s_op = 0b000010001101, + fnmsub_d_op = 0b000010001110, + vfmadd_s_op = 0b000010010001, + vfmadd_d_op = 0b000010010010, + vfmsub_s_op = 0b000010010101, + vfmsub_d_op = 0b000010010110, + vfnmadd_s_op = 0b000010011001, + vfnmadd_d_op = 0b000010011010, + vfnmsub_s_op = 0b000010011101, + vfnmsub_d_op = 0b000010011110, + xvfmadd_s_op = 0b000010100001, + xvfmadd_d_op = 0b000010100010, + xvfmsub_s_op = 0b000010100101, + xvfmsub_d_op = 0b000010100110, + xvfnmadd_s_op = 0b000010101001, + xvfnmadd_d_op = 0b000010101010, + xvfnmsub_s_op = 0b000010101101, + xvfnmsub_d_op = 0b000010101110, + fcmp_cond_s_op = 0b000011000001, + fcmp_cond_d_op = 0b000011000010, + vfcmp_cond_s_op = 0b000011000101, + vfcmp_cond_d_op = 0b000011000110, + xvfcmp_cond_s_op = 0b000011001001, + xvfcmp_cond_d_op = 0b000011001010, + fsel_op = 0b000011010000, + vbitsel_v_op = 0b000011010001, + xvbitsel_v_op = 0b000011010010, + vshuf_b_op = 0b000011010101, + xvshuf_b_op = 0b000011010110, + + unknow_ops12 = 0b111111111111 + }; + + // 10-bit opcode, highest 10 bits: bits[31...22] + enum ops10 { + bstr_w_op = 0b0000000001, + bstrins_d_op = 0b0000000010, + bstrpick_d_op = 0b0000000011, + slti_op = 0b0000001000, + sltui_op = 0b0000001001, + addi_w_op = 0b0000001010, + addi_d_op = 0b0000001011, + lu52i_d_op = 0b0000001100, + andi_op = 0b0000001101, + ori_op = 0b0000001110, + xori_op = 0b0000001111, + ld_b_op = 0b0010100000, + ld_h_op = 0b0010100001, + ld_w_op = 0b0010100010, + ld_d_op = 0b0010100011, + st_b_op = 0b0010100100, + st_h_op = 0b0010100101, + st_w_op = 0b0010100110, + st_d_op = 0b0010100111, + ld_bu_op = 0b0010101000, + ld_hu_op = 0b0010101001, + ld_wu_op = 0b0010101010, + preld_op = 0b0010101011, + fld_s_op = 0b0010101100, + fst_s_op = 0b0010101101, + fld_d_op = 0b0010101110, + fst_d_op = 0b0010101111, + vld_op = 0b0010110000, + vst_op = 0b0010110001, + xvld_op = 0b0010110010, + xvst_op = 0b0010110011, + ldl_w_op = 0b0010111000, + ldr_w_op = 0b0010111001, + + unknow_ops10 = 0b1111111111 + }; + + // 8-bit opcode, highest 8 bits: bits[31...22] + enum ops8 { + ll_w_op = 0b00100000, + sc_w_op = 0b00100001, + ll_d_op = 0b00100010, + sc_d_op = 0b00100011, + ldptr_w_op = 0b00100100, + stptr_w_op = 0b00100101, + ldptr_d_op = 0b00100110, + stptr_d_op = 0b00100111, + + unknow_ops8 = 0b11111111 + }; + + // 7-bit opcode, highest 7 bits: bits[31...25] + enum ops7 { + lu12i_w_op = 0b0001010, + lu32i_d_op = 0b0001011, + pcaddi_op = 0b0001100, + pcalau12i_op = 0b0001101, + pcaddu12i_op = 0b0001110, + pcaddu18i_op = 0b0001111, + + unknow_ops7 = 0b1111111 + }; + + // 6-bit opcode, highest 6 bits: bits[31...25] + enum ops6 { + addu16i_d_op = 0b000100, + beqz_op = 0b010000, + bnez_op = 0b010001, + bccondz_op = 0b010010, + jirl_op = 0b010011, + b_op = 0b010100, + bl_op = 0b010101, + beq_op = 0b010110, + bne_op = 0b010111, + blt_op = 0b011000, + bge_op = 0b011001, + bltu_op = 0b011010, + bgeu_op = 0b011011, + + unknow_ops6 = 0b111111 + }; + + enum fcmp_cond { + fcmp_caf = 0x00, + fcmp_cun = 0x08, + fcmp_ceq = 0x04, + fcmp_cueq = 0x0c, + fcmp_clt = 0x02, + fcmp_cult = 0x0a, + fcmp_cle = 0x06, + fcmp_cule = 0x0e, + fcmp_cne = 0x10, + fcmp_cor = 0x14, + fcmp_cune = 0x18, + fcmp_saf = 0x01, + fcmp_sun = 0x09, + fcmp_seq = 0x05, + fcmp_sueq = 0x0d, + fcmp_slt = 0x03, + fcmp_sult = 0x0b, + fcmp_sle = 0x07, + fcmp_sule = 0x0f, + fcmp_sne = 0x11, + fcmp_sor = 0x15, + fcmp_sune = 0x19 + }; + + enum Condition { + zero , + notZero , + equal , + notEqual , + less , + lessEqual , + greater , + greaterEqual , + below , + belowEqual , + above , + aboveEqual + }; + + static const int LogInstructionSize = 2; + static const int InstructionSize = 1 << LogInstructionSize; + + enum WhichOperand { + // input to locate_operand, and format code for relocations + imm_operand = 0, // embedded 32-bit|64-bit immediate operand + disp32_operand = 1, // embedded 32-bit displacement or address + call32_operand = 2, // embedded 32-bit self-relative displacement + narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop + _WhichOperand_limit = 4 + }; + + static int low (int x, int l) { return bitfield(x, 0, l); } + static int low16(int x) { return low(x, 16); } + static int low26(int x) { return low(x, 26); } + + static int high (int x, int l) { return bitfield(x, 32-l, l); } + static int high16(int x) { return high(x, 16); } + static int high6 (int x) { return high(x, 6); } + + + protected: + // help methods for instruction ejection + + // 2R-type + // 31 10 9 5 4 0 + // | opcode | rj | rd | + static inline int insn_RR (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; } + + // 3R-type + // 31 15 14 10 9 5 4 0 + // | opcode | rk | rj | rd | + static inline int insn_RRR (int op, int rk, int rj, int rd) { return (op<<15) | (rk<<10) | (rj<<5) | rd; } + + // 4R-type + // 31 20 19 15 14 10 9 5 4 0 + // | opcode | ra | rk | rj | rd | + static inline int insn_RRRR (int op, int ra, int rk, int rj, int rd) { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; } + + // 2RI1-type + // 31 11 10 9 5 4 0 + // | opcode | I1 | vj | rd | + static inline int insn_I1RR (int op, int ui1, int vj, int rd) { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; } + + // 2RI2-type + // 31 12 11 10 9 5 4 0 + // | opcode | I2 | vj | rd | + static inline int insn_I2RR (int op, int ui2, int vj, int rd) { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; } + + // 2RI3-type + // 31 13 12 10 9 5 4 0 + // | opcode | I3 | vj | vd | + static inline int insn_I3RR (int op, int ui3, int vj, int vd) { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; } + + // 2RI4-type + // 31 14 13 10 9 5 4 0 + // | opcode | I4 | vj | vd | + static inline int insn_I4RR (int op, int ui4, int vj, int vd) { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; } + + // 2RI5-type + // 31 15 14 10 9 5 4 0 + // | opcode | I5 | vj | vd | + static inline int insn_I5RR (int op, int ui5, int vj, int vd) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; } + + // 2RI6-type + // 31 16 15 10 9 5 4 0 + // | opcode | I6 | vj | vd | + static inline int insn_I6RR (int op, int ui6, int vj, int vd) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; } + + // 2RI7-type + // 31 17 16 10 9 5 4 0 + // | opcode | I7 | vj | vd | + static inline int insn_I7RR (int op, int ui7, int vj, int vd) { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; } + + // 2RI8-type + // 31 18 17 10 9 5 4 0 + // | opcode | I8 | rj | rd | + static inline int insn_I8RR (int op, int imm8, int rj, int rd) { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; } + + // 2RI12-type + // 31 22 21 10 9 5 4 0 + // | opcode | I12 | rj | rd | + static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/ return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; } + + + // 2RI14-type + // 31 24 23 10 9 5 4 0 + // | opcode | I14 | rj | rd | + static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; } + + // 2RI16-type + // 31 26 25 10 9 5 4 0 + // | opcode | I16 | rj | rd | + static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; } + + // 1RI13-type (?) + // 31 18 17 5 4 0 + // | opcode | I13 | vd | + static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; } + + // 1RI20-type (?) + // 31 25 24 5 4 0 + // | opcode | I20 | rd | + static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; } + + // 1RI21-type + // 31 26 25 10 9 5 4 0 + // | opcode | I21[15:0] | rj |I21[20:16]| + static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); } + + // I26-type + // 31 26 25 10 9 0 + // | opcode | I26[15:0] | I26[25:16] | + static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); } + + // imm15 + // 31 15 14 0 + // | opcode | I15 | + static inline int insn_I15 (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); } + + + // get the offset field of beq, bne, blt[u], bge[u] instruction + int offset16(address entry) { + assert(is_simm16((entry - pc()) / 4), "change this code"); + if (!is_simm16((entry - pc()) / 4)) { + tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4); + } + return (entry - pc()) / 4; + } + + // get the offset field of beqz, bnez instruction + int offset21(address entry) { + assert(is_simm((int)(entry - pc()) / 4, 21), "change this code"); + if (!is_simm((int)(entry - pc()) / 4, 21)) { + tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4); + } + return (entry - pc()) / 4; + } + + // get the offset field of b instruction + int offset26(address entry) { + assert(is_simm((int)(entry - pc()) / 4, 26), "change this code"); + if (!is_simm((int)(entry - pc()) / 4, 26)) { + tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4); + } + return (entry - pc()) / 4; + } + +public: + using AbstractAssembler::offset; + + //sign expand with the sign bit is h + static int expand(int x, int h) { return -(x & (1<> 16; + } + + static int split_high16(int x) { + return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; + } + + static int split_low20(int x) { + return (x & 0xfffff); + } + + // Convert 20-bit x to a sign-extended 20-bit integer + static int simm20(int x) { + assert(x == (x & 0xFFFFF), "must be 20-bit only"); + return (x << 12) >> 12; + } + + static int split_low12(int x) { + return (x & 0xfff); + } + + static inline void split_simm38(jlong si38, jint& si18, jint& si20) { + si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14; + si38 += (si38 & 0x20000) << 1; + si20 = si38 >> 18; + } + + // Convert 12-bit x to a sign-extended 12-bit integer + static int simm12(int x) { + assert(x == (x & 0xFFF), "must be 12-bit only"); + return (x << 20) >> 20; + } + + // Convert 26-bit x to a sign-extended 26-bit integer + static int simm26(int x) { + assert(x == (x & 0x3FFFFFF), "must be 26-bit only"); + return (x << 6) >> 6; + } + + static intptr_t merge(intptr_t x0, intptr_t x12) { + //lu12i, ori + return (((x12 << 12) | x0) << 32) >> 32; + } + + static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) { + //lu32i, lu12i, ori + return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12; + } + + static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) { + //lu52i, lu32i, lu12i, ori + return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0; + } + + // Test if x is within signed immediate range for nbits. + static bool is_simm (int x, unsigned int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int min = -( ((int)1) << nbits-1 ); + const int maxplus1 = ( ((int)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + static bool is_simm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong min = -( ((jlong)1) << nbits-1 ); + const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + static bool is_simm16(int x) { return is_simm(x, 16); } + static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } + + // Test if x is within unsigned immediate range for nbits + static bool is_uimm(int x, unsigned int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int maxplus1 = ( ((int)1) << nbits ); + return 0 <= x && x < maxplus1; + } + + static bool is_uimm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong maxplus1 = ( ((jlong)1) << nbits ); + return 0 <= x && x < maxplus1; + } + +public: + + void flush() { + AbstractAssembler::flush(); + } + + inline void emit_data(int x) { emit_int32(x); } + inline void emit_data(int x, relocInfo::relocType rtype) { + relocate(rtype); + emit_int32(x); + } + + inline void emit_data(int x, RelocationHolder const& rspec) { + relocate(rspec); + emit_int32(x); + } + + // Generic instructions + // Does 32bit or 64bit as needed for the platform. In some sense these + // belong in macro assembler but there is no need for both varieties to exist + + void clo_w (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void clz_w (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void cto_w (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void ctz_w (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void clo_d (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); } + void clz_d (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); } + void cto_d (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); } + void ctz_d (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); } + + void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); } + void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); } + void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); } + void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); } + void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); } + void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); } + + void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); } + void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); } + void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op, (int)rj->encoding(), (int)rd->encoding())); } + + void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); } + void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); } + + void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); } + void rdtime_d(Register rd, Register rj) { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); } + + void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); } + + void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } + void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); } + + void alsl_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } + void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } + void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } + void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } + + void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void slt (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(slt_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sltu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void OR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void mul_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulh_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulh_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mul_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulh_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulh_du (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulw_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void crc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void brk(int code) { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); } + + void alsl_d(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); } + + void slli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } + void slli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } + void srli_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } + void srli_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } + void srai_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } + void srai_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } + void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); } + void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01 << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); } + + void bstrins_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } + void bstrpick_w (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); } + void bstrins_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } + void bstrpick_d (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); } + + void fadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmul_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmul_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fdiv_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fdiv_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmax_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmax_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmin_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmin_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + + void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + + void fabs_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void fabs_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void fneg_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void fneg_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void flogb_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void flogb_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void fclass_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void fclass_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void fsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void fsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void frecip_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void frecip_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void frsqrt_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void frsqrt_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void fmov_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void fmov_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); } + + void movgr2fr_w (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_w_op, (int)rj->encoding(), (int)fd->encoding())); } + void movgr2fr_d (FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2fr_d_op, (int)rj->encoding(), (int)fd->encoding())); } + void movgr2frh_w(FloatRegister fd, Register rj) { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); } + void movfr2gr_s (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } + void movfr2gr_d (Register rd, FloatRegister fj) { emit_int32(insn_RR(movfr2gr_d_op, (int)fj->encoding(), (int)rd->encoding())); } + void movfrh2gr_s(Register rd, FloatRegister fj) { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); } + void movgr2fcsr (int fcsr, Register rj) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op, (int)rj->encoding(), fcsr)); } + void movfcsr2gr (Register rd, int fcsr) { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op, fcsr, (int)rd->encoding())); } + void movfr2cf (ConditionalFlagRegister cd, FloatRegister fj) { emit_int32(insn_RR(movfr2cf_op, (int)fj->encoding(), (int)cd->encoding())); } + void movcf2fr (FloatRegister fd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2fr_op, (int)cj->encoding(), (int)fd->encoding())); } + void movgr2cf (ConditionalFlagRegister cd, Register rj) { emit_int32(insn_RR(movgr2cf_op, (int)rj->encoding(), (int)cd->encoding())); } + void movcf2gr (Register rd, ConditionalFlagRegister cj) { emit_int32(insn_RR(movcf2gr_op, (int)cj->encoding(), (int)rd->encoding())); } + + void fcvt_s_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void fcvt_d_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); } + + void ftintrm_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrm_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrm_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrm_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrp_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrp_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrp_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrp_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrz_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrz_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrz_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrz_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrne_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrne_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrne_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftintrne_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftint_w_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftint_w_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftint_l_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void ftint_l_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); } + void ffint_s_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); } + void ffint_s_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); } + void ffint_d_w(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); } + void ffint_d_l(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); } + void frint_s(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); } + void frint_d(FloatRegister fd, FloatRegister fj) { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); } + + void slti (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void sltui (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void addi_w(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void addi_d(Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op, simm12(si12), (int)rj->encoding(), (int)rd->encoding())); } + void andi (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } + void ori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } + void xori (Register rd, Register rj, int ui12) { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op, ui12, (int)rj->encoding(), (int)rd->encoding())); } + + void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + + void fcmp_caf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_ceq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_clt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_saf_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sun_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_seq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_slt_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sle_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sne_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sor_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + + void fcmp_caf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_ceq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_clt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_saf_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sun_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_seq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_slt_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sle_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sne_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sor_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); } + + void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); } + + void addu16i_d(Register rj, Register rd, int si16) { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); } + + void lu12i_w(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); } + void lu32i_d(Register rj, int si20) { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); } + void pcaddi(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); } + void pcalau12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); } + void pcaddu12i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); } + void pcaddu18i(Register rj, int si20) { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); } + + void ll_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void sc_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void ll_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void sc_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void ldptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void stptr_w (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void ldptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + void stptr_d (Register rd, Register rj, int si16) { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); } + + void ld_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void st_b (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void st_h (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void st_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void st_d (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_bu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_hu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ld_wu (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void preld (int hint, Register rj, int si12) { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); } + void fld_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } + void fst_s (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); } + void fld_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } + void fst_d (FloatRegister fd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); } + void ldl_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + void ldr_w (Register rd, Register rj, int si12) { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); } + + void ldx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stx_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stx_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stx_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stx_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } + void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } + void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } + void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); } + + void ld_b (Register rd, Address src); + void ld_bu (Register rd, Address src); + void ld_d (Register rd, Address src); + void ld_h (Register rd, Address src); + void ld_hu (Register rd, Address src); + void ll_w (Register rd, Address src); + void ll_d (Register rd, Address src); + void ld_wu (Register rd, Address src); + void ld_w (Register rd, Address src); + void st_b (Register rd, Address dst); + void st_d (Register rd, Address dst); + void st_w (Register rd, Address dst); + void sc_w (Register rd, Address dst); + void sc_d (Register rd, Address dst); + void st_h (Register rd, Address dst); + void fld_s (FloatRegister fd, Address src); + void fld_d (FloatRegister fd, Address src); + void fst_s (FloatRegister fd, Address dst); + void fst_d (FloatRegister fd, Address dst); + + void amswap_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amswap_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amadd_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amadd_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); } + void amand_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amand_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amxor_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amxor_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_wu (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_du (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void dbar(int hint) { + assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); + + if (os::is_ActiveCoresMP()) + andi(R0, R0, 0); + else + emit_int32(insn_I15(dbar_op, hint)); + } + void ibar(int hint) { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); } + + void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); } + + void beqz(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); } + void bnez(Register rj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); } + void bceqz(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); } + void bcnez(ConditionalFlagRegister cj, int offs) { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); } + + void jirl(Register rd, Register rj, int offs) { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); } + + void b(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); } + void bl(int offs) { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); } + + + void beq(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + void bne(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + void blt(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + void bge(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + void bltu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + void bgeu(Register rj, Register rd, int offs) { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); } + + void beq (Register rj, Register rd, address entry) { beq (rj, rd, offset16(entry)); } + void bne (Register rj, Register rd, address entry) { bne (rj, rd, offset16(entry)); } + void blt (Register rj, Register rd, address entry) { blt (rj, rd, offset16(entry)); } + void bge (Register rj, Register rd, address entry) { bge (rj, rd, offset16(entry)); } + void bltu (Register rj, Register rd, address entry) { bltu (rj, rd, offset16(entry)); } + void bgeu (Register rj, Register rd, address entry) { bgeu (rj, rd, offset16(entry)); } + void beqz (Register rj, address entry) { beqz (rj, offset21(entry)); } + void bnez (Register rj, address entry) { bnez (rj, offset21(entry)); } + void b(address entry) { b(offset26(entry)); } + void bl(address entry) { bl(offset26(entry)); } + void bceqz(ConditionalFlagRegister cj, address entry) { bceqz(cj, offset21(entry)); } + void bcnez(ConditionalFlagRegister cj, address entry) { bcnez(cj, offset21(entry)); } + + void beq (Register rj, Register rd, Label& L) { beq (rj, rd, target(L)); } + void bne (Register rj, Register rd, Label& L) { bne (rj, rd, target(L)); } + void blt (Register rj, Register rd, Label& L) { blt (rj, rd, target(L)); } + void bge (Register rj, Register rd, Label& L) { bge (rj, rd, target(L)); } + void bltu (Register rj, Register rd, Label& L) { bltu (rj, rd, target(L)); } + void bgeu (Register rj, Register rd, Label& L) { bgeu (rj, rd, target(L)); } + void beqz (Register rj, Label& L) { beqz (rj, target(L)); } + void bnez (Register rj, Label& L) { bnez (rj, target(L)); } + void b(Label& L) { b(target(L)); } + void bl(Label& L) { bl(target(L)); } + void bceqz(ConditionalFlagRegister cj, Label& L) { bceqz(cj, target(L)); } + void bcnez(ConditionalFlagRegister cj, Label& L) { bcnez(cj, target(L)); } + + typedef enum { + // hint[4] + Completion = 0, + Ordering = (1 << 4), + + // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation. + // hint[3:2] and hint[1:0] + LoadLoad = ((1 << 3) | (1 << 1)), + LoadStore = ((1 << 3) | (1 << 0)), + StoreLoad = ((1 << 2) | (1 << 1)), + StoreStore = ((1 << 2) | (1 << 0)), + AnyAny = ((3 << 2) | (3 << 0)), + } Membar_mask_bits; + + // Serializes memory and blows flags + void membar(Membar_mask_bits hint) { + assert((hint & (3 << 0)) != 0, "membar mask unsupported!"); + assert((hint & (3 << 2)) != 0, "membar mask unsupported!"); + dbar(Ordering | (~hint & 0xf)); + } + + // LSX and LASX +#define ASSERT_LSX assert(UseLSX, ""); +#define ASSERT_LASX assert(UseLASX, ""); + + void vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + + void vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + + void vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); } + void vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); } + void vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); } + void vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); } + void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vldi(FloatRegister vd, int i13) { ASSERT_LSX emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); } + void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); } + + void vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); } + + void vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); } + void vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); } + void vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); } + void vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); } + void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); } + + void vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } + void vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); } + void vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); } + void vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); } + void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); } + void vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); } + + void vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); } + void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); } + + void vfcmp_caf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_ceq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_clt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_saf_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sun_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_seq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_slt_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sle_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sne_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sor_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + + void vfcmp_caf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_ceq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_clt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_saf_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sun_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_seq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_slt_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sle_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sne_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sor_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + + void xvfcmp_caf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_ceq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_clt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_saf_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sun_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_seq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_slt_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sle_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sne_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sor_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void xvfcmp_caf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_ceq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_clt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_saf_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sun_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_seq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_slt_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sle_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sne_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sor_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); } + void vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); } + void vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); } + void vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); } + + void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); } + void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); } + + void vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } + + void vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); } + void vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); } + + void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } + void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } + + void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); } + void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); } + + void vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); } + void vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); } + void vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); } + void vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); } + void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); } + void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); } + void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); } + void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); } + + void vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); } + void vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); } + void vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); } + void vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); } + + void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); } + void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); } + + void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } + + void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); } + void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); } + + void vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + void vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); } + + void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); } + + void vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); } + void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int"); emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); } + + void vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));} + void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));} + + void vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));} + void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));} + + void vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } + void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } + + void vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); } + void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); } + +#undef ASSERT_LSX +#undef ASSERT_LASX + +public: + // Creation + Assembler(CodeBuffer* code) : AbstractAssembler(code) {} + + // Decoding + static address locate_operand(address inst, WhichOperand which); + static address locate_next_instruction(address inst); +}; + +#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp new file mode 100644 index 00000000000..601f4afe6fe --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP +#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP diff --git a/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp new file mode 100644 index 00000000000..32775e9bc39 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP + +// Platform specific for C++ based Interpreter +#define LOTS_OF_REGS /* Lets interpreter use plenty of registers */ + +private: + + // save the bottom of the stack after frame manager setup. For ease of restoration after return + // from recursive interpreter call + intptr_t* _frame_bottom; /* saved bottom of frame manager frame */ + intptr_t* _last_Java_pc; /* pc to return to in frame manager */ + intptr_t* _sender_sp; /* sender's sp before stack (locals) extension */ + interpreterState _self_link; /* Previous interpreter state */ /* sometimes points to self??? */ + double _native_fresult; /* save result of native calls that might return floats */ + intptr_t _native_lresult; /* save result of native calls that might return handle/longs */ +public: + + static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp); + inline intptr_t* sender_sp() { + return _sender_sp; + } + + +#define SET_LAST_JAVA_FRAME() + +#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set_flags(0); + +/* + * Macros for accessing the stack. + */ +#undef STACK_INT +#undef STACK_FLOAT +#undef STACK_ADDR +#undef STACK_OBJECT +#undef STACK_DOUBLE +#undef STACK_LONG + +// JavaStack Implementation + +#define GET_STACK_SLOT(offset) (*((intptr_t*) &topOfStack[-(offset)])) +#define STACK_SLOT(offset) ((address) &topOfStack[-(offset)]) +#define STACK_ADDR(offset) (*((address *) &topOfStack[-(offset)])) +#define STACK_INT(offset) (*((jint*) &topOfStack[-(offset)])) +#define STACK_FLOAT(offset) (*((jfloat *) &topOfStack[-(offset)])) +#define STACK_OBJECT(offset) (*((oop *) &topOfStack [-(offset)])) +#define STACK_DOUBLE(offset) (((VMJavaVal64*) &topOfStack[-(offset)])->d) +#define STACK_LONG(offset) (((VMJavaVal64 *) &topOfStack[-(offset)])->l) + +#define SET_STACK_SLOT(value, offset) (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value)) +#define SET_STACK_ADDR(value, offset) (*((address *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_INT(value, offset) (*((jint *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_FLOAT(value, offset) (*((jfloat *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value)) +#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = \ + ((VMJavaVal64*)(addr))->d) +#define SET_STACK_LONG(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value)) +#define SET_STACK_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = \ + ((VMJavaVal64*)(addr))->l) +// JavaLocals implementation + +#define LOCALS_SLOT(offset) ((intptr_t*)&locals[-(offset)]) +#define LOCALS_ADDR(offset) ((address)locals[-(offset)]) +#define LOCALS_INT(offset) (*((jint*)&locals[-(offset)])) +#define LOCALS_FLOAT(offset) (*((jfloat*)&locals[-(offset)])) +#define LOCALS_OBJECT(offset) ((oop)locals[-(offset)]) +#define LOCALS_DOUBLE(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->d) +#define LOCALS_LONG(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->l) +#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)])) +#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)])) + +#define SET_LOCALS_SLOT(value, offset) (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value)) +#define SET_LOCALS_ADDR(value, offset) (*((address *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_INT(value, offset) (*((jint *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_FLOAT(value, offset) (*((jfloat *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_OBJECT(value, offset) (*((oop *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_DOUBLE(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value)) +#define SET_LOCALS_LONG(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value)) +#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \ + ((VMJavaVal64*)(addr))->d) +#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \ + ((VMJavaVal64*)(addr))->l) + +#endif // CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp new file mode 100644 index 00000000000..07df527e94e --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP +#define CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP + +// Inline interpreter functions for LoongArch + +inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; } +inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; } +inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; } +inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; } +inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); } + +inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; } + +inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) { + return ( op1 < op2 ? -1 : + op1 > op2 ? 1 : + op1 == op2 ? 0 : + (direction == -1 || direction == 1) ? direction : 0); + +} + +inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) { + // x86 can do unaligned copies but not 64bits at a time + to[0] = from[0]; to[1] = from[1]; +} + +// The long operations depend on compiler support for "long long" on x86 + +inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) { + return op1 + op2; +} + +inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) { + return op1 & op2; +} + +inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) { + // QQQ what about check and throw... + return op1 / op2; +} + +inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) { + return op1 * op2; +} + +inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) { + return op1 | op2; +} + +inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) { + return op1 - op2; +} + +inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) { + return op1 ^ op2; +} + +inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) { + return op1 % op2; +} + +inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) { + // CVM did this 0x3f mask, is the really needed??? QQQ + return ((unsigned long long) op1) >> (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) { + return op1 >> (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) { + return op1 << (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongNeg(jlong op) { + return -op; +} + +inline jlong BytecodeInterpreter::VMlongNot(jlong op) { + return ~op; +} + +inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) { + return (op <= 0); +} + +inline int32_t BytecodeInterpreter::VMlongGez(jlong op) { + return (op >= 0); +} + +inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) { + return (op == 0); +} + +inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) { + return (op1 == op2); +} + +inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) { + return (op1 != op2); +} + +inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) { + return (op1 >= op2); +} + +inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) { + return (op1 <= op2); +} + +inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) { + return (op1 < op2); +} + +inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) { + return (op1 > op2); +} + +inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) { + return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0); +} + +// Long conversions + +inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) { + return (jdouble) val; +} + +inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) { + return (jfloat) val; +} + +inline jint BytecodeInterpreter::VMlong2Int(jlong val) { + return (jint) val; +} + +// Double Arithmetic + +inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) { + return op1 + op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) { + // Divide by zero... QQQ + return op1 / op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) { + return op1 * op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) { + return -op; +} + +inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) { + return fmod(op1, op2); +} + +inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) { + return op1 - op2; +} + +inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) { + return ( op1 < op2 ? -1 : + op1 > op2 ? 1 : + op1 == op2 ? 0 : + (direction == -1 || direction == 1) ? direction : 0); +} + +// Double Conversions + +inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) { + return (jfloat) val; +} + +// Float Conversions + +inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) { + return (jdouble) op; +} + +// Integer Arithmetic + +inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) { + return op1 + op2; +} + +inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) { + return op1 & op2; +} + +inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) { + // it's possible we could catch this special case implicitly + if ((juint)op1 == 0x80000000 && op2 == -1) return op1; + else return op1 / op2; +} + +inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) { + return op1 * op2; +} + +inline jint BytecodeInterpreter::VMintNeg(jint op) { + return -op; +} + +inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) { + return op1 | op2; +} + +inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) { + // it's possible we could catch this special case implicitly + if ((juint)op1 == 0x80000000 && op2 == -1) return 0; + else return op1 % op2; +} + +inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) { + return op1 << op2; +} + +inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) { + return op1 >> (op2 & 0x1f); // QQ op2 & 0x1f?? +} + +inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) { + return op1 - op2; +} + +inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) { + return ((juint) op1) >> (op2 & 0x1f); // QQ op2 & 0x1f?? +} + +inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) { + return op1 ^ op2; +} + +inline jdouble BytecodeInterpreter::VMint2Double(jint val) { + return (jdouble) val; +} + +inline jfloat BytecodeInterpreter::VMint2Float(jint val) { + return (jfloat) val; +} + +inline jlong BytecodeInterpreter::VMint2Long(jint val) { + return (jlong) val; +} + +inline jchar BytecodeInterpreter::VMint2Char(jint val) { + return (jchar) val; +} + +inline jshort BytecodeInterpreter::VMint2Short(jint val) { + return (jshort) val; +} + +inline jbyte BytecodeInterpreter::VMint2Byte(jint val) { + return (jbyte) val; +} + +#endif // CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP diff --git a/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp new file mode 100644 index 00000000000..8641090584a --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/bytecodes.hpp" + + +void Bytecodes::pd_initialize() { + // No LoongArch specific initialization +} + + +Bytecodes::Code Bytecodes::pd_base_code_for(Code code) { + // No LoongArch specific bytecodes + return code; +} diff --git a/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp new file mode 100644 index 00000000000..fbdf5319960 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP + +// No Loongson specific bytecodes + +#endif // CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp new file mode 100644 index 00000000000..8f766a617e6 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Returns true if the byte ordering used by Java is different from the native byte ordering + // of the underlying machine. For example, this is true for Intel x86, but false for Solaris + // on Sparc. + // we use LoongArch, so return true + static inline bool is_Java_byte_ordering_different(){ return true; } + + + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // (no special code is needed since LoongArch CPUs can access unaligned data) + static inline u2 get_native_u2(address p) { return *(u2*)p; } + static inline u4 get_native_u4(address p) { return *(u4*)p; } + static inline u8 get_native_u8(address p) { return *(u8*)p; } + + static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; } + static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; } + static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; } + + + // Efficient reading and writing of unaligned unsigned data in Java + // byte ordering (i.e. big-endian ordering). Byte-order reversal is + // needed since LoongArch64 CPUs use little-endian format. + static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } + static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } + static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } + + + // Efficient swapping of byte ordering + static inline u2 swap_u2(u2 x); // compiler-dependent implementation + static inline u4 swap_u4(u4 x); // compiler-dependent implementation + static inline u8 swap_u8(u8 x); +}; + + +// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] +#ifdef TARGET_OS_ARCH_linux_loongarch +# include "bytes_linux_loongarch.inline.hpp" +#endif + +#endif // CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp new file mode 100644 index 00000000000..5166acfa2b5 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp @@ -0,0 +1,387 @@ +/* + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/javaClasses.hpp" +#include "nativeInst_loongarch.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_loongarch.inline.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#endif + +#define A0 RA0 +#define A3 RA3 + +#define __ ce->masm()-> + +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_method->as_register(), 1); + ce->store_parameter(_bci, 0); + __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, + bool throw_index_out_of_bounds_exception) + : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception) + , _index(index) +{ + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ call(a, relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); + return; + } + + if (_index->is_cpu_register()) { + __ move(SCR1, _index->as_register()); + } else { + __ li(SCR1, _index->as_jint()); + } + Runtime1::StubID stub_id; + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { + stub_id = Runtime1::throw_range_check_failed_id; + } + __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ call(a, relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } + __ bind(_entry); + __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); +#ifdef ASSERT + __ should_not_reach_here(); +#endif +} + +// Implementation of NewInstanceStub + +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, + CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + +void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + __ move(A3, _klass_reg->as_register()); + __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == A0, "result must in A0"); + __ b(_continuation); +} + +// Implementation of NewTypeArrayStub + +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, + CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == S0, "length must in S0,"); + assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); + __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == A0, "result must in A0"); + __ b(_continuation); +} + +// Implementation of NewObjectArrayStub + +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, + CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == S0, "length must in S0,"); + assert(_klass_reg->as_register() == A3, "klass_reg must in A3"); + __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == A0, "result must in A0"); + __ b(_continuation); +} + +// Implementation of MonitorAccessStubs + +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) + : MonitorAccessStub(obj_reg, lock_reg) { + _info = new CodeEmitInfo(info); +} + +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_obj_reg->as_register(), 1); + ce->store_parameter(_lock_reg->as_register(), 0); + Runtime1::StubID enter_id; + if (ce->compilation()->has_fpu_code()) { + enter_id = Runtime1::monitorenter_id; + } else { + enter_id = Runtime1::monitorenter_nofpu_id; + } + __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + +void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + // lock_reg was destroyed by fast unlocking attempt => recompute it + ce->monitor_address(_monitor_ix, _lock_reg); + } + ce->store_parameter(_lock_reg->as_register(), 0); + // note: non-blocking leaf routine => no call info needed + Runtime1::StubID exit_id; + if (ce->compilation()->has_fpu_code()) { + exit_id = Runtime1::monitorexit_id; + } else { + exit_id = Runtime1::monitorexit_nofpu_id; + } + __ lipc(RA, _continuation); + __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type); +} + +// Implementation of patching: +// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) +// - Replace original code with a call to the stub +// At Runtime: +// - call to stub, jump to runtime +// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) +// - in runtime: after initializing class, restore original code, reexecute instruction + +int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size; + +void PatchingStub::align_patch_site(MacroAssembler* masm) { +} + +void PatchingStub::emit_code(LIR_Assembler* ce) { + assert(false, "LoongArch64 should not use C1 runtime patching"); +} + +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + DEBUG_ONLY(__ should_not_reach_here()); +} + +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. + a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + } else { + a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); + } + + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + __ bind(_entry); + __ call(a, relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + + __ bind(_entry); + // pass the object in a scratch register because all other registers + // must be preserved + if (_obj->is_cpu_register()) { + __ move(SCR1, _obj->as_register()); + } + __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + //---------------slow case: call to native----------------- + __ bind(_entry); + // Figure out where the args should go + // This should really convert the IntrinsicID to the Method* and signature + // but I don't know how to do that. + // + VMRegPair args[5]; + BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; + SharedRuntime::java_calling_convention(signature, args, 5, true); + + // push parameters + // (src, src_pos, dest, destPos, length) + Register r[5]; + r[0] = src()->as_register(); + r[1] = src_pos()->as_register(); + r[2] = dst()->as_register(); + r[3] = dst_pos()->as_register(); + r[4] = length()->as_register(); + + // next registers will get stored on the stack + for (int i = 0; i < 5 ; i++ ) { + VMReg r_1 = args[i].first(); + if (r_1->is_stack()) { + int st_off = r_1->reg2stack() * wordSize; + __ stptr_d (r[i], SP, st_off); + } else { + assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); + } + } + + ce->align_call(lir_static_call); + + ce->emit_static_call_stub(); + if (ce->compilation()->bailed_out()) { + return; // CodeCache is full + } + AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), + relocInfo::static_call_type); + address call = __ trampoline_call(resolve); + if (call == NULL) { + ce->bailout("trampoline stub overflow"); + return; + } + ce->add_call_info_here(info()); + +#ifndef PRODUCT + __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt); + __ increment(Address(SCR2)); +#endif + + __ b(_continuation); +} + +///////////////////////////////////////////////////////////////////////////// +#if INCLUDE_ALL_GCS + +void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + + __ bind(_entry); + assert(pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = pre_val()->as_register(); + + if (do_load()) { + ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/); + } + __ beqz(pre_val_reg, _continuation); + ce->store_parameter(pre_val()->as_register(), 0); + __ call(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id), relocInfo::runtime_call_type); + __ b(_continuation); +} + +jbyte* G1PostBarrierStub::_byte_map_base = NULL; + +jbyte* G1PostBarrierStub::byte_map_base_slow() { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->is_a(BarrierSet::G1SATBCTLogging), + "Must be if we're using this."); + return ((G1SATBCardTableModRefBS*)bs)->byte_map_base; +} + + +void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + assert(addr()->is_register(), "Precondition."); + assert(new_val()->is_register(), "Precondition."); + Register new_val_reg = new_val()->as_register(); + __ beqz(new_val_reg, _continuation); + ce->store_parameter(addr()->as_pointer_register(), 0); + __ call(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id), relocInfo::runtime_call_type); + __ b(_continuation); +} + +#endif // INCLUDE_ALL_GCS +///////////////////////////////////////////////////////////////////////////// + +#undef __ diff --git a/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp new file mode 100644 index 00000000000..1140e44431d --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP + +// native word offsets from memory address (little endian) +enum { + pd_lo_word_offset_in_bytes = 0, + pd_hi_word_offset_in_bytes = BytesPerWord +}; + +// explicit rounding operations are required to implement the strictFP mode +enum { + pd_strict_fp_requires_explicit_rounding = false +}; + +// FIXME: There are no callee-saved + +// registers +enum { + pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission + pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission + + pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls + pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls + + pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map, + pd_last_callee_saved_reg = 21, + + pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1, + + pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator + pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator + + pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan + pd_nof_xmm_regs_linearscan = 0, // don't have vector registers + pd_first_cpu_reg = 0, + pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1, + pd_first_byte_reg = 0, + pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1, + pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, + pd_last_fpu_reg = pd_first_fpu_reg + 31, + + pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg, + pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg, +}; + +// Encoding of float value in debug info. This is true on x86 where +// floats are extended to doubles when stored in the stack, false for +// LoongArch64 where floats and doubles are stored in their native form. +enum { + pd_float_saved_as_double = false +}; + +#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp new file mode 100644 index 00000000000..bd8578c72a8 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP + +// No FPU stack on LoongArch +class FpuStackSim; + +#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp new file mode 100644 index 00000000000..1a89c437a83 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +//-------------------------------------------------------- +// FpuStackSim +//-------------------------------------------------------- + +// No FPU stack on LoongArch64 +#include "precompiled.hpp" diff --git a/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp new file mode 100644 index 00000000000..4f0cf053617 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP + +// On LoongArch64 the frame looks as follows: +// +// +-----------------------------+---------+----------------------------------------+----------------+----------- +// | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling . +// +-----------------------------+---------+----------------------------------------+----------------+----------- + + public: + static const int pd_c_runtime_reserved_arg_size; + + enum { + first_available_sp_in_frame = 0, + frame_pad_in_bytes = 16, + nof_reg_args = 8 + }; + + public: + static LIR_Opr receiver_opr; + + static LIR_Opr r0_opr; + static LIR_Opr ra_opr; + static LIR_Opr tp_opr; + static LIR_Opr sp_opr; + static LIR_Opr a0_opr; + static LIR_Opr a1_opr; + static LIR_Opr a2_opr; + static LIR_Opr a3_opr; + static LIR_Opr a4_opr; + static LIR_Opr a5_opr; + static LIR_Opr a6_opr; + static LIR_Opr a7_opr; + static LIR_Opr t0_opr; + static LIR_Opr t1_opr; + static LIR_Opr t2_opr; + static LIR_Opr t3_opr; + static LIR_Opr t4_opr; + static LIR_Opr t5_opr; + static LIR_Opr t6_opr; + static LIR_Opr t7_opr; + static LIR_Opr t8_opr; + static LIR_Opr rx_opr; + static LIR_Opr fp_opr; + static LIR_Opr s0_opr; + static LIR_Opr s1_opr; + static LIR_Opr s2_opr; + static LIR_Opr s3_opr; + static LIR_Opr s4_opr; + static LIR_Opr s5_opr; + static LIR_Opr s6_opr; + static LIR_Opr s7_opr; + static LIR_Opr s8_opr; + + static LIR_Opr ra_oop_opr; + static LIR_Opr a0_oop_opr; + static LIR_Opr a1_oop_opr; + static LIR_Opr a2_oop_opr; + static LIR_Opr a3_oop_opr; + static LIR_Opr a4_oop_opr; + static LIR_Opr a5_oop_opr; + static LIR_Opr a6_oop_opr; + static LIR_Opr a7_oop_opr; + static LIR_Opr t0_oop_opr; + static LIR_Opr t1_oop_opr; + static LIR_Opr t2_oop_opr; + static LIR_Opr t3_oop_opr; + static LIR_Opr t4_oop_opr; + static LIR_Opr t5_oop_opr; + static LIR_Opr t6_oop_opr; + static LIR_Opr t7_oop_opr; + static LIR_Opr t8_oop_opr; + static LIR_Opr fp_oop_opr; + static LIR_Opr s0_oop_opr; + static LIR_Opr s1_oop_opr; + static LIR_Opr s2_oop_opr; + static LIR_Opr s3_oop_opr; + static LIR_Opr s4_oop_opr; + static LIR_Opr s5_oop_opr; + static LIR_Opr s6_oop_opr; + static LIR_Opr s7_oop_opr; + static LIR_Opr s8_oop_opr; + + static LIR_Opr scr1_opr; + static LIR_Opr scr2_opr; + static LIR_Opr scr1_long_opr; + static LIR_Opr scr2_long_opr; + + static LIR_Opr a0_metadata_opr; + static LIR_Opr a1_metadata_opr; + static LIR_Opr a2_metadata_opr; + static LIR_Opr a3_metadata_opr; + static LIR_Opr a4_metadata_opr; + static LIR_Opr a5_metadata_opr; + + static LIR_Opr long0_opr; + static LIR_Opr long1_opr; + static LIR_Opr fpu0_float_opr; + static LIR_Opr fpu0_double_opr; + + static LIR_Opr as_long_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + static LIR_Opr as_pointer_opr(Register r) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); + } + + // VMReg name for spilled physical FPU stack slot n + static VMReg fpu_regname (int n); + + static bool is_caller_save_register(LIR_Opr opr) { return true; } + static bool is_caller_save_register(Register r) { return true; } + + static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; } + static int last_cpu_reg() { return pd_last_cpu_reg; } + static int last_byte_reg() { return pd_last_byte_reg; } + +#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp new file mode 100644 index 00000000000..25c90bcf98f --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp @@ -0,0 +1,362 @@ +/* + * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_loongarch.inline.hpp" + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); + if (r_1->is_stack()) { + // Convert stack slot to an SP offset + // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value + // so we must add it in here. + int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; + opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); + } else if (r_1->is_Register()) { + Register reg = r_1->as_Register(); + if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { + Register reg2 = r_2->as_Register(); + assert(reg2 == reg, "must be same register"); + opr = as_long_opr(reg); + } else if (is_reference_type(type)) { + opr = as_oop_opr(reg); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg); + } else if (type == T_ADDRESS) { + opr = as_address_opr(reg); + } else { + opr = as_opr(reg); + } + } else if (r_1->is_FloatRegister()) { + assert(type == T_DOUBLE || type == T_FLOAT, "wrong type"); + int num = r_1->as_FloatRegister()->encoding(); + if (type == T_FLOAT) { + opr = LIR_OprFact::single_fpu(num); + } else { + opr = LIR_OprFact::double_fpu(num); + } + } else { + ShouldNotReachHere(); + } + return opr; +} + +LIR_Opr FrameMap::r0_opr; +LIR_Opr FrameMap::ra_opr; +LIR_Opr FrameMap::tp_opr; +LIR_Opr FrameMap::sp_opr; +LIR_Opr FrameMap::a0_opr; +LIR_Opr FrameMap::a1_opr; +LIR_Opr FrameMap::a2_opr; +LIR_Opr FrameMap::a3_opr; +LIR_Opr FrameMap::a4_opr; +LIR_Opr FrameMap::a5_opr; +LIR_Opr FrameMap::a6_opr; +LIR_Opr FrameMap::a7_opr; +LIR_Opr FrameMap::t0_opr; +LIR_Opr FrameMap::t1_opr; +LIR_Opr FrameMap::t2_opr; +LIR_Opr FrameMap::t3_opr; +LIR_Opr FrameMap::t4_opr; +LIR_Opr FrameMap::t5_opr; +LIR_Opr FrameMap::t6_opr; +LIR_Opr FrameMap::t7_opr; +LIR_Opr FrameMap::t8_opr; +LIR_Opr FrameMap::rx_opr; +LIR_Opr FrameMap::fp_opr; +LIR_Opr FrameMap::s0_opr; +LIR_Opr FrameMap::s1_opr; +LIR_Opr FrameMap::s2_opr; +LIR_Opr FrameMap::s3_opr; +LIR_Opr FrameMap::s4_opr; +LIR_Opr FrameMap::s5_opr; +LIR_Opr FrameMap::s6_opr; +LIR_Opr FrameMap::s7_opr; +LIR_Opr FrameMap::s8_opr; + +LIR_Opr FrameMap::receiver_opr; + +LIR_Opr FrameMap::ra_oop_opr; +LIR_Opr FrameMap::a0_oop_opr; +LIR_Opr FrameMap::a1_oop_opr; +LIR_Opr FrameMap::a2_oop_opr; +LIR_Opr FrameMap::a3_oop_opr; +LIR_Opr FrameMap::a4_oop_opr; +LIR_Opr FrameMap::a5_oop_opr; +LIR_Opr FrameMap::a6_oop_opr; +LIR_Opr FrameMap::a7_oop_opr; +LIR_Opr FrameMap::t0_oop_opr; +LIR_Opr FrameMap::t1_oop_opr; +LIR_Opr FrameMap::t2_oop_opr; +LIR_Opr FrameMap::t3_oop_opr; +LIR_Opr FrameMap::t4_oop_opr; +LIR_Opr FrameMap::t5_oop_opr; +LIR_Opr FrameMap::t6_oop_opr; +LIR_Opr FrameMap::t7_oop_opr; +LIR_Opr FrameMap::t8_oop_opr; +LIR_Opr FrameMap::fp_oop_opr; +LIR_Opr FrameMap::s0_oop_opr; +LIR_Opr FrameMap::s1_oop_opr; +LIR_Opr FrameMap::s2_oop_opr; +LIR_Opr FrameMap::s3_oop_opr; +LIR_Opr FrameMap::s4_oop_opr; +LIR_Opr FrameMap::s5_oop_opr; +LIR_Opr FrameMap::s6_oop_opr; +LIR_Opr FrameMap::s7_oop_opr; +LIR_Opr FrameMap::s8_oop_opr; + +LIR_Opr FrameMap::scr1_opr; +LIR_Opr FrameMap::scr2_opr; +LIR_Opr FrameMap::scr1_long_opr; +LIR_Opr FrameMap::scr2_long_opr; + +LIR_Opr FrameMap::a0_metadata_opr; +LIR_Opr FrameMap::a1_metadata_opr; +LIR_Opr FrameMap::a2_metadata_opr; +LIR_Opr FrameMap::a3_metadata_opr; +LIR_Opr FrameMap::a4_metadata_opr; +LIR_Opr FrameMap::a5_metadata_opr; + +LIR_Opr FrameMap::long0_opr; +LIR_Opr FrameMap::long1_opr; +LIR_Opr FrameMap::fpu0_float_opr; +LIR_Opr FrameMap::fpu0_double_opr; + +LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 }; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 }; + +//-------------------------------------------------------- +// FrameMap +//-------------------------------------------------------- + +void FrameMap::initialize() { + assert(!_init_done, "once"); + int i = 0; + + // caller save register + map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++; + + // callee save register + map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++; + + // special register + map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase + map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread + map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp + map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp + map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra + map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp + + // tmp register + map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1 + map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2 + + scr1_opr = t7_opr; + scr2_opr = t4_opr; + scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr()); + scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr()); + + long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr()); + long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr()); + + fpu0_float_opr = LIR_OprFact::single_fpu(0); + fpu0_double_opr = LIR_OprFact::double_fpu(0); + + // scr1, scr2 not included + _caller_save_cpu_regs[0] = a0_opr; + _caller_save_cpu_regs[1] = a1_opr; + _caller_save_cpu_regs[2] = a2_opr; + _caller_save_cpu_regs[3] = a3_opr; + _caller_save_cpu_regs[4] = a4_opr; + _caller_save_cpu_regs[5] = a5_opr; + _caller_save_cpu_regs[6] = a6_opr; + _caller_save_cpu_regs[7] = a7_opr; + _caller_save_cpu_regs[8] = t0_opr; + _caller_save_cpu_regs[9] = t1_opr; + _caller_save_cpu_regs[10] = t2_opr; + _caller_save_cpu_regs[11] = t3_opr; + _caller_save_cpu_regs[12] = t5_opr; + _caller_save_cpu_regs[13] = t6_opr; + _caller_save_cpu_regs[14] = t8_opr; + + for (int i = 0; i < 8; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } + + _init_done = true; + + ra_oop_opr = as_oop_opr(RA); + a0_oop_opr = as_oop_opr(A0); + a1_oop_opr = as_oop_opr(A1); + a2_oop_opr = as_oop_opr(A2); + a3_oop_opr = as_oop_opr(A3); + a4_oop_opr = as_oop_opr(A4); + a5_oop_opr = as_oop_opr(A5); + a6_oop_opr = as_oop_opr(A6); + a7_oop_opr = as_oop_opr(A7); + t0_oop_opr = as_oop_opr(T0); + t1_oop_opr = as_oop_opr(T1); + t2_oop_opr = as_oop_opr(T2); + t3_oop_opr = as_oop_opr(T3); + t4_oop_opr = as_oop_opr(T4); + t5_oop_opr = as_oop_opr(T5); + t6_oop_opr = as_oop_opr(T6); + t7_oop_opr = as_oop_opr(T7); + t8_oop_opr = as_oop_opr(T8); + fp_oop_opr = as_oop_opr(FP); + s0_oop_opr = as_oop_opr(S0); + s1_oop_opr = as_oop_opr(S1); + s2_oop_opr = as_oop_opr(S2); + s3_oop_opr = as_oop_opr(S3); + s4_oop_opr = as_oop_opr(S4); + s5_oop_opr = as_oop_opr(S5); + s6_oop_opr = as_oop_opr(S6); + s7_oop_opr = as_oop_opr(S7); + s8_oop_opr = as_oop_opr(S8); + + a0_metadata_opr = as_metadata_opr(A0); + a1_metadata_opr = as_metadata_opr(A1); + a2_metadata_opr = as_metadata_opr(A2); + a3_metadata_opr = as_metadata_opr(A3); + a4_metadata_opr = as_metadata_opr(A4); + a5_metadata_opr = as_metadata_opr(A5); + + sp_opr = as_pointer_opr(SP); + fp_opr = as_pointer_opr(FP); + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; + SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + for (int i = 0; i < nof_caller_save_fpu_regs; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } +} + +Address FrameMap::make_new_address(ByteSize sp_offset) const { + // for sp, based address use this: + // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4); + return Address(SP, in_bytes(sp_offset)); +} + +// ----------------mapping----------------------- +// all mapping is based on fp addressing, except for simple leaf methods where we access +// the locals sp based (and no frame is built) + +// Frame for simple leaf methods (quick entries) +// +// +----------+ +// | ret addr | <- TOS +// +----------+ +// | args | +// | ...... | + +// Frame for standard methods +// +// | .........| <- TOS +// | locals | +// +----------+ +// | old fp, | <- RFP +// +----------+ +// | ret addr | +// +----------+ +// | args | +// | .........| + +// For OopMaps, map a local variable or spill index to an VMRegImpl name. +// This is the offset from sp() in the frame of the slot for the index, +// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.) +// +// framesize + +// stack0 stack0 0 <- VMReg +// | | | +// ...........|..............|.............| +// 0 1 2 3 x x 4 5 6 ... | <- local indices +// ^ ^ sp() ( x x indicate link +// | | and return addr) +// arguments non-argument locals + +VMReg FrameMap::fpu_regname(int n) { + // Return the OptoReg name for the fpu stack slot "n" + // A spilled fpu stack slot comprises to two single-word OptoReg's. + return as_FloatRegister(n)->as_VMReg(); +} + +LIR_Opr FrameMap::stack_pointer() { + return FrameMap::sp_opr; +} + +// JSR 292 +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { + return LIR_OprFact::illegalOpr; // Not needed on LoongArch64 +} + +bool FrameMap::validate_frame() { + return true; +} diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp new file mode 100644 index 00000000000..38b0daa0257 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP + +// ArrayCopyStub needs access to bailout +friend class ArrayCopyStub; + + private: + int array_element_size(BasicType type) const; + + void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, + int dest_index, bool pop_fpu_stack); + + // helper functions which checks for overflow and sets bailout if it + // occurs. Always returns a valid embeddable pointer but in the + // bailout case the pointer won't be to unique storage. + address float_constant(float f); + address double_constant(double d); + + address int_constant(jlong n); + + bool is_literal_address(LIR_Address* addr); + + // Ensure we have a valid Address (base+offset) to a stack-slot. + Address stack_slot_address(int index, uint shift, int adjust = 0); + + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done); + void add_debug_info_for_branch(address adr, CodeEmitInfo* info); + + void casw(Register addr, Register newval, Register cmpval, bool sign); + void casl(Register addr, Register newval, Register cmpval); + + void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL); + + static const int max_tableswitches = 20; + struct tableswitch switches[max_tableswitches]; + int tableswitch_count; + + void init() { tableswitch_count = 0; } + + void deoptimize_trap(CodeEmitInfo *info); + +public: + void store_parameter(Register r, int offset_from_sp_in_words); + void store_parameter(jint c, int offset_from_sp_in_words); + void store_parameter(jobject c, int offset_from_sp_in_words); + + enum { + // call stub: CompiledStaticCall::to_interp_stub_size() + + // NativeInstruction::nop_instruction_size + + // NativeCallTrampolineStub::instruction_size + call_stub_size = 13 * NativeInstruction::nop_instruction_size, + exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), + deopt_handler_size = 7 * NativeInstruction::nop_instruction_size + }; + +#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp new file mode 100644 index 00000000000..ee48326becf --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp @@ -0,0 +1,3377 @@ +/* + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "code/compiledIC.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_loongarch.inline.hpp" + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +NEEDS_CLEANUP // remove this definitions? + +#define __ _masm-> + +static void select_different_registers(Register preserve, Register extra, + Register &tmp1, Register &tmp2) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp2 = extra; + } + assert_different_registers(preserve, tmp1, tmp2); +} + +static void select_different_registers(Register preserve, Register extra, + Register &tmp1, Register &tmp2, + Register &tmp3) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp2 = extra; + } else if (tmp3 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp3 = extra; + } + assert_different_registers(preserve, tmp1, tmp2, tmp3); +} + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; +} + +LIR_Opr LIR_Assembler::osrBufferPointer() { + return FrameMap::as_pointer_opr(receiverOpr()->as_register()); +} + +//--------------fpu register translations----------------------- + +address LIR_Assembler::float_constant(float f) { + address const_addr = __ float_constant(f); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +address LIR_Assembler::double_constant(double d) { + address const_addr = __ double_constant(d); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } + +void LIR_Assembler::reset_FPU() { Unimplemented(); } + +void LIR_Assembler::fpop() { Unimplemented(); } + +void LIR_Assembler::fxch(int i) { Unimplemented(); } + +void LIR_Assembler::fld(int i) { Unimplemented(); } + +void LIR_Assembler::ffree(int i) { Unimplemented(); } + +void LIR_Assembler::breakpoint() { Unimplemented(); } + +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } + +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } + +bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; } + +static Register as_reg(LIR_Opr op) { + return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); +} + +static jlong as_long(LIR_Opr data) { + jlong result; + switch (data->type()) { + case T_INT: + result = (data->as_jint()); + break; + case T_LONG: + result = (data->as_jlong()); + break; + default: + ShouldNotReachHere(); + result = 0; // unreachable + } + return result; +} + +Address LIR_Assembler::as_Address(LIR_Address* addr) { + Register base = addr->base()->as_pointer_register(); + LIR_Opr opr = addr->index(); + if (opr->is_cpu_register()) { + Register index; + if (opr->is_single_cpu()) + index = opr->as_register(); + else + index = opr->as_register_lo(); + assert(addr->disp() == 0, "must be"); + return Address(base, index, Address::ScaleFactor(addr->scale())); + } else { + assert(addr->scale() == 0, "must be"); + return Address(base, addr->disp()); + } + return Address(); +} + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + ShouldNotReachHere(); + return Address(); +} + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { + return as_Address(addr); // Ouch + // FIXME: This needs to be much more clever. See x86. +} + +// Ensure a valid Address (base + offset) to a stack-slot. If stack access is +// not encodable as a base + (immediate) offset, generate an explicit address +// calculation to hold the address in a temporary register. +Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) { + precond(size == 4 || size == 8); + Address addr = frame_map()->address_for_slot(index, adjust); + precond(addr.index() == noreg); + precond(addr.base() == SP); + precond(addr.disp() > 0); + uint mask = size - 1; + assert((addr.disp() & mask) == 0, "scaled offsets only"); + return addr; +} + +void LIR_Assembler::osr_entry() { + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); + BlockBegin* osr_entry = compilation()->hir()->osr_entry(); + ValueStack* entry_state = osr_entry->state(); + int number_of_locks = entry_state->locks_size(); + + // we jump here if osr happens with the interpreter + // state set up to continue at the beginning of the + // loop that triggered osr - in particular, we have + // the following registers setup: + // + // A2: osr buffer + // + + // build frame + ciMethod* m = compilation()->method(); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + + // OSR buffer is + // + // locals[nlocals-1..0] + // monitors[0..number_of_locks] + // + // locals is a direct copy of the interpreter frame so in the osr buffer + // so first slot in the local array is the last local from the interpreter + // and last slot is local[0] (receiver) from the interpreter + // + // Similarly with locks. The first lock slot in the osr buffer is the nth lock + // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock + // in the interpreter frame (the method lock if a sync method) + + // Initialize monitors in the compiled activation. + // A2: pointer to osr buffer + // + // All other registers are dead at this point and the locals will be + // copied into place by code emitted in the IR. + + Register OSR_buf = osrBufferPointer()->as_pointer_register(); + { + assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); + int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1); + // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in + // the OSR buffer using 2 word entries: first the lock and then + // the oop. + for (int i = 0; i < number_of_locks; i++) { + int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); +#ifdef ASSERT + // verify the interpreter's monitor has a non-null object + { + Label L; + __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord)); + __ bnez(SCR1, L); + __ stop("locked object is NULL"); + __ bind(L); + } +#endif + __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0)); + __ st_ptr(S0, frame_map()->address_for_monitor_lock(i)); + __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord)); + __ st_ptr(S0, frame_map()->address_for_monitor_object(i)); + } + } +} + +// inline cache check; done before the frame is built. +int LIR_Assembler::check_icache() { + Register receiver = FrameMap::receiver_opr->as_register(); + Register ic_klass = IC_Klass; + int start_offset = __ offset(); + Label dont; + + __ verify_oop(receiver); + + // explicit NULL check not needed since load from [klass_offset] causes a trap + // check against inline cache + assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), + "must add explicit null check"); + + __ load_klass(SCR2, receiver); + __ beq(SCR2, ic_klass, dont); + + // if icache check fails, then jump to runtime routine + // Note: RECEIVER must still contain the receiver! + __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); + + // We align the verified entry point unless the method body + // (including its inline cache check) will fit in a single 64-byte + // icache line. + if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) { + // force alignment after the cache check. + __ align(CodeEntryAlignment); + } + + __ bind(dont); + return start_offset; +} + +void LIR_Assembler::jobject2reg(jobject o, Register reg) { + if (o == NULL) { + __ move(reg, R0); + } else { + int oop_index = __ oop_recorder()->find_index(o); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + __ relocate(rspec); + __ patchable_li52(reg, (long)o); + } +} + +void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { + address target = NULL; + + switch (patching_id(info)) { + case PatchingStub::access_field_id: + target = Runtime1::entry_for(Runtime1::access_field_patching_id); + break; + case PatchingStub::load_klass_id: + target = Runtime1::entry_for(Runtime1::load_klass_patching_id); + break; + case PatchingStub::load_mirror_id: + target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); + break; + case PatchingStub::load_appendix_id: + target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); + break; + default: ShouldNotReachHere(); + } + + __ call(target, relocInfo::runtime_call_type); + add_call_info_here(info); +} + +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { + deoptimize_trap(info); +} + +// This specifies the rsp decrement needed to build the frame +int LIR_Assembler::initial_frame_size_in_bytes() const { + // if rounding, must let FrameMap know! + return in_bytes(frame_map()->framesize_in_bytes()); +} + +int LIR_Assembler::emit_exception_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(exception_handler_size); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("exception handler overflow"); + return -1; + } + + int offset = code_offset(); + + // the exception oop and pc are in A0, and A1 + // no other registers need to be preserved, so invalidate them + __ invalidate_registers(false, true, true, true, true, true); + + // check that there is really an exception + __ verify_not_null_oop(A0); + + // search an exception handler (A0: exception oop, A1: throwing pc) + __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type); + __ should_not_reach_here(); + guarantee(code_offset() - offset <= exception_handler_size, "overflow"); + __ end_a_stub(); + + return offset; +} + +// Emit the code to remove the frame from the stack in the exception unwind path. +int LIR_Assembler::emit_unwind_handler() { +#ifndef PRODUCT + if (CommentedAssembly) { + _masm->block_comment("Unwind handler"); + } +#endif + + int offset = code_offset(); + + // Fetch the exception from TLS and clear out exception related thread state + __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset())); + __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset())); + __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset())); + + __ bind(_unwind_handler_entry); + __ verify_not_null_oop(V0); + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ move(S0, V0); // Preserve the exception + } + + // Perform needed unlocking + MonitorExitStub* stub = NULL; + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::a0_opr); + stub = new MonitorExitStub(FrameMap::a0_opr, true, 0); + __ unlock_object(A5, A4, A0, *stub->entry()); + __ bind(*stub->continuation()); + } + + if (compilation()->env()->dtrace_method_probes()) { + __ mov_metadata(A1, method()->constant_encoding()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1); + } + + if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) { + __ move(A0, S0); // Restore the exception + } + + // remove the activation and dispatch to the unwind handler + __ block_comment("remove_frame and dispatch to the unwind handler"); + __ remove_frame(initial_frame_size_in_bytes()); + __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type); + + // Emit the slow path assembly + if (stub != NULL) { + stub->emit_code(this); + } + + return offset; +} + +int LIR_Assembler::emit_deopt_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(deopt_handler_size); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("deopt handler overflow"); + return -1; + } + + int offset = code_offset(); + + __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type); + guarantee(code_offset() - offset <= deopt_handler_size, "overflow"); + __ end_a_stub(); + + return offset; +} + +void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { + _masm->code_section()->relocate(adr, relocInfo::poll_type); + int pc_offset = code_offset(); + flush_debug_info(pc_offset); + info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); + if (info->exception_handlers() != NULL) { + compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); + } +} + +void LIR_Assembler::return_op(LIR_Opr result) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0, + "word returns are in V0,"); + + // Pop the stack before the safepoint code + __ remove_frame(initial_frame_size_in_bytes()); + + __ li(SCR2, os::get_polling_page()); + __ relocate(relocInfo::poll_return_type); + __ ld_w(SCR1, SCR2, 0); + __ jr(RA); +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { + guarantee(info != NULL, "Shouldn't be NULL"); + __ li(SCR2, os::get_polling_page()); + add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map + __ relocate(relocInfo::poll_type); + __ ld_w(SCR1, SCR2, 0); + return __ offset(); +} + +void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { + __ move(to_reg, from_reg); +} + +void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); } + +void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + + switch (c->type()) { + case T_INT: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ li(dest->as_register(), c->as_jint()); + break; + case T_ADDRESS: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ li(dest->as_register(), c->as_jint()); + break; + case T_LONG: + assert(patch_code == lir_patch_none, "no patching handled here"); + __ li(dest->as_register_lo(), (intptr_t)c->as_jlong()); + break; + case T_OBJECT: + if (patch_code == lir_patch_none) { + jobject2reg(c->as_jobject(), dest->as_register()); + } else { + jobject2reg_with_patching(dest->as_register(), info); + } + break; + case T_METADATA: + if (patch_code != lir_patch_none) { + klass2reg_with_patching(dest->as_register(), info); + } else { + __ mov_metadata(dest->as_register(), c->as_metadata()); + } + break; + case T_FLOAT: + __ relocate(relocInfo::internal_word_type); + __ patchable_li52(SCR1, (jlong) float_constant(c->as_jfloat())); + __ fld_s(dest->as_float_reg(), SCR1, 0); + break; + case T_DOUBLE: + __ relocate(relocInfo::internal_word_type); + __ patchable_li52(SCR1, (jlong) double_constant(c->as_jdouble())); + __ fld_d(dest->as_double_reg(), SCR1, 0); + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { + LIR_Const* c = src->as_constant_ptr(); + switch (c->type()) { + case T_OBJECT: + if (!c->as_jobject()) + __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix())); + else { + const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::scr1_opr, dest, c->type(), false); + } + break; + case T_ADDRESS: + const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::scr1_opr, dest, c->type(), false); + case T_INT: + case T_FLOAT: + if (c->as_jint_bits() == 0) + __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix())); + else { + __ li(SCR2, c->as_jint_bits()); + __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix())); + } + break; + case T_LONG: + case T_DOUBLE: + if (c->as_jlong_bits() == 0) + __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + else { + __ li(SCR2, (intptr_t)c->as_jlong_bits()); + __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + } + break; + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, + CodeEmitInfo* info, bool wide) { + assert(src->is_constant(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + LIR_Address* to_addr = dest->as_address_ptr(); + + void (Assembler::* insn)(Register Rt, Address adr); + + switch (type) { + case T_ADDRESS: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::st_d; + break; + case T_LONG: + assert(c->as_jlong() == 0, "should be"); + insn = &Assembler::st_d; + break; + case T_INT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::st_w; + break; + case T_OBJECT: + case T_ARRAY: + assert(c->as_jobject() == 0, "should be"); + if (UseCompressedOops && !wide) { + insn = &Assembler::st_w; + } else { + insn = &Assembler::st_d; + } + break; + case T_CHAR: + case T_SHORT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::st_h; + break; + case T_BOOLEAN: + case T_BYTE: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::st_b; + break; + default: + ShouldNotReachHere(); + insn = &Assembler::st_d; // unreachable + } + + if (info) add_debug_info_for_null_check_here(info); + (_masm->*insn)(R0, as_Address(to_addr)); +} + +void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + // move between cpu-registers + if (dest->is_single_cpu()) { + if (src->type() == T_LONG) { + // Can do LONG -> OBJECT + move_regs(src->as_register_lo(), dest->as_register()); + return; + } + assert(src->is_single_cpu(), "must match"); + if (src->type() == T_OBJECT) { + __ verify_oop(src->as_register()); + } + move_regs(src->as_register(), dest->as_register()); + } else if (dest->is_double_cpu()) { + if (is_reference_type(src->type())) { + // Surprising to me but we can see move of a long to t_object + __ verify_oop(src->as_register()); + move_regs(src->as_register(), dest->as_register_lo()); + return; + } + assert(src->is_double_cpu(), "must match"); + Register f_lo = src->as_register_lo(); + Register f_hi = src->as_register_hi(); + Register t_lo = dest->as_register_lo(); + Register t_hi = dest->as_register_hi(); + assert(f_hi == f_lo, "must be same"); + assert(t_hi == t_lo, "must be same"); + move_regs(f_lo, t_lo); + } else if (dest->is_single_fpu()) { + __ fmov_s(dest->as_float_reg(), src->as_float_reg()); + } else if (dest->is_double_fpu()) { + __ fmov_d(dest->as_double_reg(), src->as_double_reg()); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { + precond(src->is_register() && dest->is_stack()); + + uint const c_sz32 = sizeof(uint32_t); + uint const c_sz64 = sizeof(uint64_t); + + if (src->is_single_cpu()) { + int index = dest->single_stack_ix(); + if (is_reference_type(type)) { + __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); + __ verify_oop(src->as_register()); + } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) { + __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64)); + } else { + __ st_w(src->as_register(), stack_slot_address(index, c_sz32)); + } + } else if (src->is_double_cpu()) { + int index = dest->double_stack_ix(); + Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); + __ st_ptr(src->as_register_lo(), dest_addr_LO); + } else if (src->is_single_fpu()) { + int index = dest->single_stack_ix(); + __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32)); + } else if (src->is_double_fpu()) { + int index = dest->double_stack_ix(); + __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64)); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, + CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + PatchingStub* patch = NULL; + Register compressed_src = SCR2; + + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + if (is_reference_type(type)) { + __ verify_oop(src->as_register()); + + if (UseCompressedOops && !wide) { + __ encode_heap_oop(compressed_src, src->as_register()); + } else { + compressed_src = src->as_register(); + } + } + + int null_check_here = code_offset(); + switch (type) { + case T_FLOAT: + __ fst_s(src->as_float_reg(), as_Address(to_addr)); + break; + case T_DOUBLE: + __ fst_d(src->as_double_reg(), as_Address(to_addr)); + break; + case T_ARRAY: // fall through + case T_OBJECT: // fall through + if (UseCompressedOops && !wide) { + __ st_w(compressed_src, as_Address(to_addr)); + } else { + __ st_ptr(compressed_src, as_Address(to_addr)); + } + break; + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); + __ st_ptr(src->as_register(), as_Address(to_addr)); + break; + case T_ADDRESS: + __ st_ptr(src->as_register(), as_Address(to_addr)); + break; + case T_INT: + __ st_w(src->as_register(), as_Address(to_addr)); + break; + case T_LONG: + __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr)); + break; + case T_BYTE: // fall through + case T_BOOLEAN: + __ st_b(src->as_register(), as_Address(to_addr)); + break; + case T_CHAR: // fall through + case T_SHORT: + __ st_h(src->as_register(), as_Address(to_addr)); + break; + default: + ShouldNotReachHere(); + } + if (info != NULL) { + add_debug_info_for_null_check(null_check_here, info); + } +} + +void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { + precond(src->is_stack() && dest->is_register()); + + uint const c_sz32 = sizeof(uint32_t); + uint const c_sz64 = sizeof(uint64_t); + + if (dest->is_single_cpu()) { + int index = src->single_stack_ix(); + if (is_reference_type(type)) { + __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); + __ verify_oop(dest->as_register()); + } else if (type == T_METADATA || type == T_ADDRESS) { + __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64)); + } else { + __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32)); + } + } else if (dest->is_double_cpu()) { + int index = src->double_stack_ix(); + Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes); + __ ld_ptr(dest->as_register_lo(), src_addr_LO); + } else if (dest->is_single_fpu()) { + int index = src->single_stack_ix(); + __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32)); + } else if (dest->is_double_fpu()) { + int index = src->double_stack_ix(); + __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64)); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { + address target = NULL; + + switch (patching_id(info)) { + case PatchingStub::access_field_id: + target = Runtime1::entry_for(Runtime1::access_field_patching_id); + break; + case PatchingStub::load_klass_id: + target = Runtime1::entry_for(Runtime1::load_klass_patching_id); + break; + case PatchingStub::load_mirror_id: + target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); + break; + case PatchingStub::load_appendix_id: + target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); + break; + default: ShouldNotReachHere(); + } + + __ call(target, relocInfo::runtime_call_type); + add_call_info_here(info); +} + +void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + LIR_Opr temp; + + if (type == T_LONG || type == T_DOUBLE) + temp = FrameMap::scr1_long_opr; + else + temp = FrameMap::scr1_opr; + + stack2reg(src, temp, src->type()); + reg2stack(temp, dest, dest->type(), false); +} + +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { + LIR_Address* addr = src->as_address_ptr(); + LIR_Address* from_addr = src->as_address_ptr(); + + if (addr->base()->type() == T_OBJECT) { + __ verify_oop(addr->base()->as_pointer_register()); + } + + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + + if (info != NULL) { + add_debug_info_for_null_check_here(info); + } + int null_check_here = code_offset(); + switch (type) { + case T_FLOAT: + __ fld_s(dest->as_float_reg(), as_Address(from_addr)); + break; + case T_DOUBLE: + __ fld_d(dest->as_double_reg(), as_Address(from_addr)); + break; + case T_ARRAY: // fall through + case T_OBJECT: // fall through + if (UseCompressedOops && !wide) { + __ ld_wu(dest->as_register(), as_Address(from_addr)); + } else { + __ ld_ptr(dest->as_register(), as_Address(from_addr)); + } + break; + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); + __ ld_ptr(dest->as_register(), as_Address(from_addr)); + break; + case T_ADDRESS: + // FIXME: OMG this is a horrible kludge. Any offset from an + // address that matches klass_offset_in_bytes() will be loaded + // as a word, not a long. + if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) { + __ ld_wu(dest->as_register(), as_Address(from_addr)); + } else { + __ ld_ptr(dest->as_register(), as_Address(from_addr)); + } + break; + case T_INT: + __ ld_w(dest->as_register(), as_Address(from_addr)); + break; + case T_LONG: + __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr)); + break; + case T_BYTE: + __ ld_b(dest->as_register(), as_Address(from_addr)); + break; + case T_BOOLEAN: + __ ld_bu(dest->as_register(), as_Address(from_addr)); + break; + case T_CHAR: + __ ld_hu(dest->as_register(), as_Address(from_addr)); + break; + case T_SHORT: + __ ld_h(dest->as_register(), as_Address(from_addr)); + break; + default: + ShouldNotReachHere(); + } + + if (is_reference_type(type)) { + if (UseCompressedOops && !wide) { + __ decode_heap_oop(dest->as_register()); + } + + // Load barrier has not yet been applied, so ZGC can't verify the oop here + __ verify_oop(dest->as_register()); + } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) { + if (UseCompressedClassPointers) { + __ decode_klass_not_null(dest->as_register()); + } + } +} + +void LIR_Assembler::prefetchr(LIR_Opr src) { Unimplemented(); } + +void LIR_Assembler::prefetchw(LIR_Opr src) { Unimplemented(); } + +int LIR_Assembler::array_element_size(BasicType type) const { + int elem_size = type2aelembytes(type); + return exact_log2(elem_size); +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + switch (op->code()) { + case lir_idiv: + case lir_irem: + arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(), + op->result_opr(), op->info()); + break; + default: + ShouldNotReachHere(); + break; + } +} + +void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { +#ifdef ASSERT + assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); + if (op->block() != NULL) _branch_target_blocks.append(op->block()); + assert(op->cond() == lir_cond_always, "must be"); +#endif + + if (op->info() != NULL) + add_debug_info_for_branch(op->info()); + + __ b_far(*(op->label())); +} + +void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { +#ifdef ASSERT + assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); + if (op->block() != NULL) _branch_target_blocks.append(op->block()); + if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); +#endif + + if (op->info() != NULL) { + assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(), + "shouldn't be codeemitinfo for non-address operands"); + add_debug_info_for_null_check_here(op->info()); // exception possible + } + + Label& L = *(op->label()); + Assembler::Condition acond; + LIR_Opr opr1 = op->in_opr1(); + LIR_Opr opr2 = op->in_opr2(); + assert(op->condition() != lir_cond_always, "must be"); + + if (op->code() == lir_cmp_float_branch) { + bool is_unordered = (op->ublock() == op->block()); + if (opr1->is_single_fpu()) { + FloatRegister reg1 = opr1->as_float_reg(); + assert(opr2->is_single_fpu(), "expect single float register"); + FloatRegister reg2 = opr2->as_float_reg(); + switch(op->condition()) { + case lir_cond_equal: + if (is_unordered) + __ fcmp_cueq_s(FCC0, reg1, reg2); + else + __ fcmp_ceq_s(FCC0, reg1, reg2); + break; + case lir_cond_notEqual: + if (is_unordered) + __ fcmp_cune_s(FCC0, reg1, reg2); + else + __ fcmp_cne_s(FCC0, reg1, reg2); + break; + case lir_cond_less: + if (is_unordered) + __ fcmp_cult_s(FCC0, reg1, reg2); + else + __ fcmp_clt_s(FCC0, reg1, reg2); + break; + case lir_cond_lessEqual: + if (is_unordered) + __ fcmp_cule_s(FCC0, reg1, reg2); + else + __ fcmp_cle_s(FCC0, reg1, reg2); + break; + case lir_cond_greaterEqual: + if (is_unordered) + __ fcmp_cule_s(FCC0, reg2, reg1); + else + __ fcmp_cle_s(FCC0, reg2, reg1); + break; + case lir_cond_greater: + if (is_unordered) + __ fcmp_cult_s(FCC0, reg2, reg1); + else + __ fcmp_clt_s(FCC0, reg2, reg1); + break; + default: + ShouldNotReachHere(); + } + } else if (opr1->is_double_fpu()) { + FloatRegister reg1 = opr1->as_double_reg(); + assert(opr2->is_double_fpu(), "expect double float register"); + FloatRegister reg2 = opr2->as_double_reg(); + switch(op->condition()) { + case lir_cond_equal: + if (is_unordered) + __ fcmp_cueq_d(FCC0, reg1, reg2); + else + __ fcmp_ceq_d(FCC0, reg1, reg2); + break; + case lir_cond_notEqual: + if (is_unordered) + __ fcmp_cune_d(FCC0, reg1, reg2); + else + __ fcmp_cne_d(FCC0, reg1, reg2); + break; + case lir_cond_less: + if (is_unordered) + __ fcmp_cult_d(FCC0, reg1, reg2); + else + __ fcmp_clt_d(FCC0, reg1, reg2); + break; + case lir_cond_lessEqual: + if (is_unordered) + __ fcmp_cule_d(FCC0, reg1, reg2); + else + __ fcmp_cle_d(FCC0, reg1, reg2); + break; + case lir_cond_greaterEqual: + if (is_unordered) + __ fcmp_cule_d(FCC0, reg2, reg1); + else + __ fcmp_cle_d(FCC0, reg2, reg1); + break; + case lir_cond_greater: + if (is_unordered) + __ fcmp_cult_d(FCC0, reg2, reg1); + else + __ fcmp_clt_d(FCC0, reg2, reg1); + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + __ bcnez(FCC0, L); + } else { + if (opr1->is_constant() && opr2->is_single_cpu()) { + // tableswitch + Unimplemented(); + } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) { + Register reg1 = as_reg(opr1); + Register reg2 = noreg; + jlong imm2 = 0; + if (opr2->is_single_cpu()) { + // cpu register - cpu register + reg2 = opr2->as_register(); + } else if (opr2->is_double_cpu()) { + // cpu register - cpu register + reg2 = opr2->as_register_lo(); + } else if (opr2->is_constant()) { + switch(opr2->type()) { + case T_INT: + case T_ADDRESS: + imm2 = opr2->as_constant_ptr()->as_jint(); + break; + case T_LONG: + imm2 = opr2->as_constant_ptr()->as_jlong(); + break; + case T_METADATA: + imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata(); + break; + case T_OBJECT: + case T_ARRAY: + if (opr2->as_constant_ptr()->as_jobject() != NULL) { + reg2 = SCR1; + jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2); + } else { + reg2 = R0; + } + break; + default: + ShouldNotReachHere(); + break; + } + } else { + ShouldNotReachHere(); + } + if (reg2 == noreg) { + if (imm2 == 0) { + reg2 = R0; + } else { + reg2 = SCR1; + __ li(reg2, imm2); + } + } + switch (op->condition()) { + case lir_cond_equal: + __ beq_far(reg1, reg2, L); break; + case lir_cond_notEqual: + __ bne_far(reg1, reg2, L); break; + case lir_cond_less: + __ blt_far(reg1, reg2, L, true); break; + case lir_cond_lessEqual: + __ bge_far(reg2, reg1, L, true); break; + case lir_cond_greaterEqual: + __ bge_far(reg1, reg2, L, true); break; + case lir_cond_greater: + __ blt_far(reg2, reg1, L, true); break; + case lir_cond_belowEqual: + __ bge_far(reg2, reg1, L, false); break; + case lir_cond_aboveEqual: + __ bge_far(reg1, reg2, L, false); break; + default: + ShouldNotReachHere(); + } + } + } +} + +void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); + LIR_Opr tmp = op->tmp(); + + switch (op->bytecode()) { + case Bytecodes::_i2f: + __ movgr2fr_w(dest->as_float_reg(), src->as_register()); + __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg()); + break; + case Bytecodes::_i2d: + __ movgr2fr_w(dest->as_double_reg(), src->as_register()); + __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg()); + break; + case Bytecodes::_l2d: + __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo()); + __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg()); + break; + case Bytecodes::_l2f: + __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo()); + __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg()); + break; + case Bytecodes::_f2d: + __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg()); + break; + case Bytecodes::_d2f: + __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg()); + break; + case Bytecodes::_i2c: + __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0); + break; + case Bytecodes::_i2l: + _masm->block_comment("FIXME: This could be a no-op"); + __ slli_w(dest->as_register_lo(), src->as_register(), 0); + break; + case Bytecodes::_i2s: + __ ext_w_h(dest->as_register(), src->as_register()); + break; + case Bytecodes::_i2b: + __ ext_w_b(dest->as_register(), src->as_register()); + break; + case Bytecodes::_l2i: + __ slli_w(dest->as_register(), src->as_register_lo(), 0); + break; + case Bytecodes::_d2l: + __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg()); + __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg()); + break; + case Bytecodes::_f2i: + __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg()); + __ movfr2gr_s(dest->as_register(), tmp->as_float_reg()); + break; + case Bytecodes::_f2l: + __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg()); + __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg()); + break; + case Bytecodes::_d2i: + __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg()); + __ movfr2gr_s(dest->as_register(), tmp->as_double_reg()); + break; + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { + if (op->init_check()) { + __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset())); + __ li(SCR2, InstanceKlass::fully_initialized); + add_debug_info_for_null_check_here(op->stub()->info()); + __ bne_far(SCR1, SCR2, *op->stub()->entry()); + } + __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(), + op->tmp2()->as_register(), op->header_size(), + op->object_size(), op->klass()->as_register(), + *op->stub()->entry()); + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { + Register len = op->len()->as_register(); + if (UseSlowPath || + (!UseFastNewObjectArray && is_reference_type(op->type())) || + (!UseFastNewTypeArray && !is_reference_type(op->type()))) { + __ b(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + Register tmp3 = op->tmp3()->as_register(); + if (len == tmp1) { + tmp1 = tmp3; + } else if (len == tmp2) { + tmp2 = tmp3; + } else if (len == tmp3) { + // everything is ok + } else { + __ move(tmp3, len); + } + __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2, + arrayOopDesc::header_size(op->type()), + array_element_size(op->type()), + op->klass()->as_register(), + *op->stub()->entry()); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done) { + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + // See if the receiver is receiver[n]. + __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + __ ld_ptr(SCR1, Address(SCR2)); + __ bne(recv, SCR1, next_test); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); + __ ld_ptr(SCR2, data_addr); + __ addi_d(SCR2, SCR2, DataLayout::counter_increment); + __ st_ptr(SCR2, data_addr); + __ b(*update_done); + __ bind(next_test); + } + + // Didn't find receiver; find next empty slot and fill it in + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + Address recv_addr(SCR2); + __ ld_ptr(SCR1, recv_addr); + __ bnez(SCR1, next_test); + __ st_ptr(recv, recv_addr); + __ li(SCR1, DataLayout::counter_increment); + __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); + __ st_ptr(SCR1, Address(SCR2)); + __ b(*update_done); + __ bind(next_test); + } +} + +void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, + Label* failure, Label* obj_is_null) { + // we always need a stub for the failure case. + CodeStub* stub = op->stub(); + Register obj = op->object()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register dst = op->result_opr()->as_register(); + ciKlass* k = op->klass(); + Register Rtmp1 = noreg; + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + + const bool should_profile = op->should_profile(); + + if (should_profile) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + + Label profile_cast_success, profile_cast_failure; + Label *success_target = should_profile ? &profile_cast_success : success; + Label *failure_target = should_profile ? &profile_cast_failure : failure; + + if (obj == k_RInfo) { + k_RInfo = dst; + } else if (obj == klass_RInfo) { + klass_RInfo = dst; + } + if (k->is_loaded() && !UseCompressedClassPointers) { + select_different_registers(obj, dst, k_RInfo, klass_RInfo); + } else { + Rtmp1 = op->tmp3()->as_register(); + select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); + } + + assert_different_registers(obj, k_RInfo, klass_RInfo); + + if (should_profile) { + Label not_null; + __ bnez(obj, not_null); + // Object is null; update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); + __ ld_bu(SCR2, data_addr); + __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); + __ st_b(SCR2, data_addr); + __ b(*obj_is_null); + __ bind(not_null); + } else { + __ beqz(obj, *obj_is_null); + } + + if (!k->is_loaded()) { + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } else { + __ mov_metadata(k_RInfo, k->constant_encoding()); + } + __ verify_oop(obj); + + if (op->fast_check()) { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(SCR2, obj); + __ bne_far(SCR2, k_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } else { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(klass_RInfo, obj); + if (k->is_loaded()) { + // See if we get an immediate positive hit + __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset()))); + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { + __ bne_far(k_RInfo, SCR1, *failure_target); + // successful cast, fall through to profile or jump + } else { + // See if we get an immediate positive hit + __ beq_far(k_RInfo, SCR1, *success_target); + // check for self + __ beq_far(klass_RInfo, k_RInfo, *success_target); + + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); + __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + // result is a boolean + __ beqz(klass_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } + } else { + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); + __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); + __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + // result is a boolean + __ beqz(k_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } + } + if (should_profile) { + Register mdo = klass_RInfo, recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, obj); + Label update_done; + type_profile_helper(mdo, md, data, recv, success); + __ b(*success); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ ld_ptr(SCR2, counter_addr); + __ addi_d(SCR2, SCR2, -DataLayout::counter_increment); + __ st_ptr(SCR2, counter_addr); + __ b(*failure); + } + __ b(*success); +} + +void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { + const bool should_profile = op->should_profile(); + + LIR_Code code = op->code(); + if (code == lir_store_check) { + Register value = op->object()->as_register(); + Register array = op->array()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register Rtmp1 = op->tmp3()->as_register(); + CodeStub* stub = op->stub(); + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + + if (should_profile) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + Label profile_cast_success, profile_cast_failure, done; + Label *success_target = should_profile ? &profile_cast_success : &done; + Label *failure_target = should_profile ? &profile_cast_failure : stub->entry(); + + if (should_profile) { + Label not_null; + __ bnez(value, not_null); + // Object is null; update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset())); + __ ld_bu(SCR2, data_addr); + __ ori(SCR2, SCR2, BitData::null_seen_byte_constant()); + __ st_b(SCR2, data_addr); + __ b(done); + __ bind(not_null); + } else { + __ beqz(value, done); + } + + add_debug_info_for_null_check_here(op->info_for_exception()); + __ load_klass(k_RInfo, array); + __ load_klass(klass_RInfo, value); + + // get instance klass (it's already uncompressed) + __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(k_RInfo, Address(SP, 0 * wordSize)); + __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize)); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize)); + __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + // result is a boolean + __ beqz(k_RInfo, *failure_target); + // fall through to the success case + + if (should_profile) { + Register mdo = klass_RInfo, recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, value); + Label update_done; + type_profile_helper(mdo, md, data, recv, &done); + __ b(done); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ lea(SCR2, counter_addr); + __ ld_ptr(SCR1, Address(SCR2)); + __ addi_d(SCR1, SCR1, -DataLayout::counter_increment); + __ st_ptr(SCR1, Address(SCR2)); + __ b(*stub->entry()); + } + + __ bind(done); + } else if (code == lir_checkcast) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success; + emit_typecheck_helper(op, &success, op->stub()->entry(), &success); + __ bind(success); + if (dst != obj) { + __ move(dst, obj); + } + } else if (code == lir_instanceof) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success, failure, done; + emit_typecheck_helper(op, &success, &failure, &failure); + __ bind(failure); + __ move(dst, R0); + __ b(done); + __ bind(success); + __ li(dst, 1); + __ bind(done); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) { + __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign, + /* retold */ false, /* barrier */ true); +} + +void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) { + __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1, + /* retold */ false, /* barrier */ true); +} + +void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + assert(VM_Version::supports_cx8(), "wrong machine"); + Register addr; + if (op->addr()->is_register()) { + addr = as_reg(op->addr()); + } else { + assert(op->addr()->is_address(), "what else?"); + LIR_Address* addr_ptr = op->addr()->as_address_ptr(); + assert(addr_ptr->disp() == 0, "need 0 disp"); + assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); + addr = as_reg(addr_ptr->base()); + } + Register newval = as_reg(op->new_value()); + Register cmpval = as_reg(op->cmp_value()); + + if (op->code() == lir_cas_obj) { + if (UseCompressedOops) { + Register t1 = op->tmp1()->as_register(); + assert(op->tmp1()->is_valid(), "must be"); + __ encode_heap_oop(t1, cmpval); + cmpval = t1; + __ encode_heap_oop(SCR2, newval); + newval = SCR2; + casw(addr, newval, cmpval, false); + } else { + casl(addr, newval, cmpval); + } + } else if (op->code() == lir_cas_int) { + casw(addr, newval, cmpval, true); + } else { + casl(addr, newval, cmpval); + } +} + +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, + LIR_Opr result, BasicType type) { + Unimplemented(); +} + +void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, + LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { + assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result"); + assert(left->is_single_cpu() || left->is_double_cpu(), "must be"); + Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register(); + Register regl = as_reg(left); + Register regr = noreg; + Register reg1 = noreg; + Register reg2 = noreg; + jlong immr = 0; + + // comparison operands + if (right->is_single_cpu()) { + // cpu register - cpu register + regr = right->as_register(); + } else if (right->is_double_cpu()) { + // cpu register - cpu register + regr = right->as_register_lo(); + } else if (right->is_constant()) { + switch(right->type()) { + case T_INT: + case T_ADDRESS: + immr = right->as_constant_ptr()->as_jint(); + break; + case T_LONG: + immr = right->as_constant_ptr()->as_jlong(); + break; + case T_METADATA: + immr = (intptr_t)right->as_constant_ptr()->as_metadata(); + break; + case T_OBJECT: + case T_ARRAY: + if (right->as_constant_ptr()->as_jobject() != NULL) { + regr = SCR1; + jobject2reg(right->as_constant_ptr()->as_jobject(), regr); + } else { + immr = 0; + } + break; + default: + ShouldNotReachHere(); + break; + } + } else { + ShouldNotReachHere(); + } + + if (regr == noreg) { + switch (condition) { + case lir_cond_equal: + case lir_cond_notEqual: + if (!Assembler::is_simm(-immr, 12)) { + regr = SCR1; + __ li(regr, immr); + } + break; + default: + if (!Assembler::is_simm(immr, 12)) { + regr = SCR1; + __ li(regr, immr); + } + } + } + + // special cases + if (src1->is_constant() && src2->is_constant()) { + jlong val1 = 0, val2 = 0; + if (src1->type() == T_INT && src2->type() == T_INT) { + val1 = src1->as_jint(); + val2 = src2->as_jint(); + } else if (src1->type() == T_LONG && src2->type() == T_LONG) { + val1 = src1->as_jlong(); + val2 = src2->as_jlong(); + } + if (val1 == 0 && val2 == 1) { + if (regr == noreg) { + switch (condition) { + case lir_cond_equal: + if (immr == 0) { + __ sltu(regd, R0, regl); + } else { + __ addi_d(SCR1, regl, -immr); + __ li(regd, 1); + __ maskeqz(regd, regd, SCR1); + } + break; + case lir_cond_notEqual: + if (immr == 0) { + __ sltu(regd, R0, regl); + __ xori(regd, regd, 1); + } else { + __ addi_d(SCR1, regl, -immr); + __ li(regd, 1); + __ masknez(regd, regd, SCR1); + } + break; + case lir_cond_less: + __ slti(regd, regl, immr); + __ xori(regd, regd, 1); + break; + case lir_cond_lessEqual: + if (immr == 0) { + __ slt(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ slt(regd, SCR1, regl); + } + break; + case lir_cond_greater: + if (immr == 0) { + __ slt(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ slt(regd, SCR1, regl); + } + __ xori(regd, regd, 1); + break; + case lir_cond_greaterEqual: + __ slti(regd, regl, immr); + break; + case lir_cond_belowEqual: + if (immr == 0) { + __ sltu(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ sltu(regd, SCR1, regl); + } + break; + case lir_cond_aboveEqual: + __ sltui(regd, regl, immr); + break; + default: + ShouldNotReachHere(); + } + } else { + switch (condition) { + case lir_cond_equal: + __ sub_d(SCR1, regl, regr); + __ li(regd, 1); + __ maskeqz(regd, regd, SCR1); + break; + case lir_cond_notEqual: + __ sub_d(SCR1, regl, regr); + __ li(regd, 1); + __ masknez(regd, regd, SCR1); + break; + case lir_cond_less: + __ slt(regd, regl, regr); + __ xori(regd, regd, 1); + break; + case lir_cond_lessEqual: + __ slt(regd, regr, regl); + break; + case lir_cond_greater: + __ slt(regd, regr, regl); + __ xori(regd, regd, 1); + break; + case lir_cond_greaterEqual: + __ slt(regd, regl, regr); + break; + case lir_cond_belowEqual: + __ sltu(regd, regr, regl); + break; + case lir_cond_aboveEqual: + __ sltu(regd, regl, regr); + break; + default: + ShouldNotReachHere(); + } + } + return; + } else if (val1 == 1 && val2 == 0) { + if (regr == noreg) { + switch (condition) { + case lir_cond_equal: + if (immr == 0) { + __ sltu(regd, R0, regl); + __ xori(regd, regd, 1); + } else { + __ addi_d(SCR1, regl, -immr); + __ li(regd, 1); + __ masknez(regd, regd, SCR1); + } + break; + case lir_cond_notEqual: + if (immr == 0) { + __ sltu(regd, R0, regl); + } else { + __ addi_d(SCR1, regl, -immr); + __ li(regd, 1); + __ maskeqz(regd, regd, SCR1); + } + break; + case lir_cond_less: + __ slti(regd, regl, immr); + break; + case lir_cond_lessEqual: + if (immr == 0) { + __ slt(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ slt(regd, SCR1, regl); + } + __ xori(regd, regd, 1); + break; + case lir_cond_greater: + if (immr == 0) { + __ slt(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ slt(regd, SCR1, regl); + } + break; + case lir_cond_greaterEqual: + __ slti(regd, regl, immr); + __ xori(regd, regd, 1); + break; + case lir_cond_belowEqual: + if (immr == 0) { + __ sltu(regd, R0, regl); + } else { + __ li(SCR1, immr); + __ sltu(regd, SCR1, regl); + } + __ xori(regd, regd, 1); + break; + case lir_cond_aboveEqual: + __ sltui(regd, regl, immr); + __ xori(regd, regd, 1); + break; + default: + ShouldNotReachHere(); + } + } else { + switch (condition) { + case lir_cond_equal: + __ sub_d(SCR1, regl, regr); + __ li(regd, 1); + __ masknez(regd, regd, SCR1); + break; + case lir_cond_notEqual: + __ sub_d(SCR1, regl, regr); + __ li(regd, 1); + __ maskeqz(regd, regd, SCR1); + break; + case lir_cond_less: + __ slt(regd, regl, regr); + break; + case lir_cond_lessEqual: + __ slt(regd, regr, regl); + __ xori(regd, regd, 1); + break; + case lir_cond_greater: + __ slt(regd, regr, regl); + break; + case lir_cond_greaterEqual: + __ slt(regd, regl, regr); + __ xori(regd, regd, 1); + break; + case lir_cond_belowEqual: + __ sltu(regd, regr, regl); + __ xori(regd, regd, 1); + break; + case lir_cond_aboveEqual: + __ sltu(regd, regl, regr); + __ xori(regd, regd, 1); + break; + default: + ShouldNotReachHere(); + } + } + return; + } + } + + // cmp + if (regr == noreg) { + switch (condition) { + case lir_cond_equal: + __ addi_d(SCR2, regl, -immr); + break; + case lir_cond_notEqual: + __ addi_d(SCR2, regl, -immr); + break; + case lir_cond_less: + __ slti(SCR2, regl, immr); + break; + case lir_cond_lessEqual: + __ li(SCR1, immr); + __ slt(SCR2, SCR1, regl); + break; + case lir_cond_greater: + __ li(SCR1, immr); + __ slt(SCR2, SCR1, regl); + break; + case lir_cond_greaterEqual: + __ slti(SCR2, regl, immr); + break; + case lir_cond_belowEqual: + __ li(SCR1, immr); + __ sltu(SCR2, SCR1, regl); + break; + case lir_cond_aboveEqual: + __ sltui(SCR2, regl, immr); + break; + default: + ShouldNotReachHere(); + } + } else { + switch (condition) { + case lir_cond_equal: + __ sub_d(SCR2, regl, regr); + break; + case lir_cond_notEqual: + __ sub_d(SCR2, regl, regr); + break; + case lir_cond_less: + __ slt(SCR2, regl, regr); + break; + case lir_cond_lessEqual: + __ slt(SCR2, regr, regl); + break; + case lir_cond_greater: + __ slt(SCR2, regr, regl); + break; + case lir_cond_greaterEqual: + __ slt(SCR2, regl, regr); + break; + case lir_cond_belowEqual: + __ sltu(SCR2, regr, regl); + break; + case lir_cond_aboveEqual: + __ sltu(SCR2, regl, regr); + break; + default: + ShouldNotReachHere(); + } + } + + // value operands + if (src1->is_stack()) { + stack2reg(src1, result, result->type()); + reg1 = regd; + } else if (src1->is_constant()) { + const2reg(src1, result, lir_patch_none, NULL); + reg1 = regd; + } else { + reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register(); + } + + if (src2->is_stack()) { + stack2reg(src2, FrameMap::scr1_opr, result->type()); + reg2 = SCR1; + } else if (src2->is_constant()) { + LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr; + const2reg(src2, tmp, lir_patch_none, NULL); + reg2 = SCR1; + } else { + reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register(); + } + + // cmove + switch (condition) { + case lir_cond_equal: + __ masknez(regd, reg1, SCR2); + __ maskeqz(SCR2, reg2, SCR2); + break; + case lir_cond_notEqual: + __ maskeqz(regd, reg1, SCR2); + __ masknez(SCR2, reg2, SCR2); + break; + case lir_cond_less: + __ maskeqz(regd, reg1, SCR2); + __ masknez(SCR2, reg2, SCR2); + break; + case lir_cond_lessEqual: + __ masknez(regd, reg1, SCR2); + __ maskeqz(SCR2, reg2, SCR2); + break; + case lir_cond_greater: + __ maskeqz(regd, reg1, SCR2); + __ masknez(SCR2, reg2, SCR2); + break; + case lir_cond_greaterEqual: + __ masknez(regd, reg1, SCR2); + __ maskeqz(SCR2, reg2, SCR2); + break; + case lir_cond_belowEqual: + __ masknez(regd, reg1, SCR2); + __ maskeqz(SCR2, reg2, SCR2); + break; + case lir_cond_aboveEqual: + __ masknez(regd, reg1, SCR2); + __ maskeqz(SCR2, reg2, SCR2); + break; + default: + ShouldNotReachHere(); + } + + __ OR(regd, regd, SCR2); +} + +void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + + if (left->is_single_cpu()) { + Register lreg = left->as_register(); + Register dreg = as_reg(dest); + + if (right->is_single_cpu()) { + // cpu register - cpu register + assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be"); + Register rreg = right->as_register(); + switch (code) { + case lir_add: __ add_w (dest->as_register(), lreg, rreg); break; + case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break; + case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else if (right->is_double_cpu()) { + Register rreg = right->as_register_lo(); + // single_cpu + double_cpu: can happen with obj+long + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + switch (code) { + case lir_add: __ add_d(dreg, lreg, rreg); break; + case lir_sub: __ sub_d(dreg, lreg, rreg); break; + default: ShouldNotReachHere(); + } + } else if (right->is_constant()) { + // cpu register - constant + jlong c; + + // FIXME: This is fugly: we really need to factor all this logic. + switch(right->type()) { + case T_LONG: + c = right->as_constant_ptr()->as_jlong(); + break; + case T_INT: + case T_ADDRESS: + c = right->as_constant_ptr()->as_jint(); + break; + default: + ShouldNotReachHere(); + c = 0; // unreachable + break; + } + + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + if (c == 0 && dreg == lreg) { + COMMENT("effective nop elided"); + return; + } + + switch(left->type()) { + case T_INT: + switch (code) { + case lir_add: __ addi_w(dreg, lreg, c); break; + case lir_sub: __ addi_w(dreg, lreg, -c); break; + default: ShouldNotReachHere(); + } + break; + case T_OBJECT: + case T_ADDRESS: + switch (code) { + case lir_add: __ addi_d(dreg, lreg, c); break; + case lir_sub: __ addi_d(dreg, lreg, -c); break; + default: ShouldNotReachHere(); + } + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + } else if (left->is_double_cpu()) { + Register lreg_lo = left->as_register_lo(); + + if (right->is_double_cpu()) { + // cpu register - cpu register + Register rreg_lo = right->as_register_lo(); + switch (code) { + case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; + case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break; + default: ShouldNotReachHere(); + } + + } else if (right->is_constant()) { + jlong c = right->as_constant_ptr()->as_jlong(); + Register dreg = as_reg(dest); + switch (code) { + case lir_add: + case lir_sub: + if (c == 0 && dreg == lreg_lo) { + COMMENT("effective nop elided"); + return; + } + code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c); + break; + case lir_div: + assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move lreg_lo to dreg if divisor is 1 + __ move(dreg, lreg_lo); + } else { + unsigned int shift = exact_log2(c); + // use scr1 as intermediate result register + __ srai_d(SCR1, lreg_lo, 63); + __ srli_d(SCR1, SCR1, 64 - shift); + __ add_d(SCR1, lreg_lo, SCR1); + __ srai_d(dreg, SCR1, shift); + } + break; + case lir_rem: + assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ move(dreg, R0); + } else { + // use scr1/2 as intermediate result register + __ sub_d(SCR1, R0, lreg_lo); + __ slt(SCR2, SCR1, R0); + __ andi(dreg, lreg_lo, c - 1); + __ andi(SCR1, SCR1, c - 1); + __ sub_d(SCR1, R0, SCR1); + __ maskeqz(dreg, dreg, SCR2); + __ masknez(SCR1, SCR1, SCR2); + __ OR(dreg, dreg, SCR1); + } + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + } else if (left->is_single_fpu()) { + assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); + switch (code) { + case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_div_strictfp: // fall through + case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: ShouldNotReachHere(); + } + } else if (left->is_double_fpu()) { + if (right->is_double_fpu()) { + // fpu register - fpu register + switch (code) { + case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_mul_strictfp: // fall through + case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_div_strictfp: // fall through + case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: ShouldNotReachHere(); + } + } else { + if (right->is_constant()) { + ShouldNotReachHere(); + } + ShouldNotReachHere(); + } + } else if (left->is_single_stack() || left->is_address()) { + assert(left == dest, "left and dest must be equal"); + ShouldNotReachHere(); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, + int dest_index, bool pop_fpu_stack) { + Unimplemented(); +} + +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { + switch(code) { + case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break; + case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break; + default : ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { + assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); + Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + + if (dst->is_single_cpu()) { + Register Rdst = dst->as_register(); + if (right->is_constant()) { + switch (code) { + case lir_logic_and: + if (Assembler::is_uimm(right->as_jint(), 12)) { + __ andi(Rdst, Rleft, right->as_jint()); + } else { + __ li(AT, right->as_jint()); + __ AND(Rdst, Rleft, AT); + } + break; + case lir_logic_or: __ ori(Rdst, Rleft, right->as_jint()); break; + case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break; + default: ShouldNotReachHere(); break; + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); + switch (code) { + case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; + case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; + case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; + default: ShouldNotReachHere(); break; + } + } + } else { + Register Rdst = dst->as_register_lo(); + if (right->is_constant()) { + switch (code) { + case lir_logic_and: + if (Assembler::is_uimm(right->as_jlong(), 12)) { + __ andi(Rdst, Rleft, right->as_jlong()); + } else { + // We can guarantee that transform from HIR LogicOp is in range of + // uimm(12), but the common code directly generates LIR LogicAnd, + // and the right-operand is mask with all ones in the high bits. + __ li(AT, right->as_jlong()); + __ AND(Rdst, Rleft, AT); + } + break; + case lir_logic_or: __ ori(Rdst, Rleft, right->as_jlong()); break; + case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break; + default: ShouldNotReachHere(); break; + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo(); + switch (code) { + case lir_logic_and: __ AND(Rdst, Rleft, Rright); break; + case lir_logic_or: __ OR(Rdst, Rleft, Rright); break; + case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break; + default: ShouldNotReachHere(); break; + } + } + } +} + +void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, + LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) { + // opcode check + assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem"); + bool is_irem = (code == lir_irem); + + // operand check + assert(left->is_single_cpu(), "left must be register"); + assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant"); + assert(result->is_single_cpu(), "result must be register"); + Register lreg = left->as_register(); + Register dreg = result->as_register(); + + // power-of-2 constant check and codegen + if (right->is_constant()) { + int c = right->as_constant_ptr()->as_jint(); + assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant"); + if (is_irem) { + if (c == 1) { + // move 0 to dreg if divisor is 1 + __ move(dreg, R0); + } else { + // use scr1/2 as intermediate result register + __ sub_w(SCR1, R0, lreg); + __ slt(SCR2, SCR1, R0); + __ andi(dreg, lreg, c - 1); + __ andi(SCR1, SCR1, c - 1); + __ sub_w(SCR1, R0, SCR1); + __ maskeqz(dreg, dreg, SCR2); + __ masknez(SCR1, SCR1, SCR2); + __ OR(dreg, dreg, SCR1); + } + } else { + if (c == 1) { + // move lreg to dreg if divisor is 1 + __ move(dreg, lreg); + } else { + unsigned int shift = exact_log2(c); + // use scr1 as intermediate result register + __ srai_w(SCR1, lreg, 31); + __ srli_w(SCR1, SCR1, 32 - shift); + __ add_w(SCR1, lreg, SCR1); + __ srai_w(dreg, SCR1, shift); + } + } + } else { + Register rreg = right->as_register(); + if (is_irem) + __ mod_w(dreg, lreg, rreg); + else + __ div_w(dreg, lreg, rreg); + } +} + +void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { + Unimplemented(); +} + +void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){ + if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { + bool is_unordered_less = (code == lir_ucmp_fd2i); + if (left->is_single_fpu()) { + if (is_unordered_less) { + __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg()); + __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg()); + } else { + __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg()); + __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg()); + } + } else if (left->is_double_fpu()) { + if (is_unordered_less) { + __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg()); + __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg()); + } else { + __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg()); + __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg()); + } + } else { + ShouldNotReachHere(); + } + __ movcf2gr(dst->as_register(), FCC0); + __ movcf2gr(SCR1, FCC1); + __ sub_d(dst->as_register(), dst->as_register(), SCR1); + } else if (code == lir_cmp_l2i) { + __ slt(SCR1, left->as_register_lo(), right->as_register_lo()); + __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo()); + __ sub_d(dst->as_register(), dst->as_register(), SCR1); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::align_call(LIR_Code code) {} + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + address call = __ trampoline_call(AddressLiteral(op->addr(), rtype)); + if (call == NULL) { + bailout("trampoline stub overflow"); + return; + } + add_call_info(code_offset(), op->info()); +} + +void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + address call = __ ic_call(op->addr()); + if (call == NULL) { + bailout("trampoline stub overflow"); + return; + } + add_call_info(code_offset(), op->info()); +} + +/* Currently, vtable-dispatch is only enabled for sparc platforms */ +void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { + ShouldNotReachHere(); +} + +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + address stub = __ start_a_stub(call_stub_size); + if (stub == NULL) { + bailout("static call stub overflow"); + return; + } + + int start = __ offset(); + + __ relocate(static_stub_Relocation::spec(call_pc)); + + // Code stream for loading method may be changed. + __ ibar(0); + + // Rmethod contains Method*, it should be relocated for GC + // static stub relocation also tags the Method* in the code-stream. + __ mov_metadata(Rmethod, NULL); + // This is recognized as unresolved by relocs/nativeInst/ic code + __ patchable_jump(__ pc()); + + assert(__ offset() - start <= call_stub_size, "stub too big"); + __ end_a_stub(); +} + +void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { + assert(exceptionOop->as_register() == A0, "must match"); + assert(exceptionPC->as_register() == A1, "must match"); + + // exception object is not added to oop map by LinearScan + // (LinearScan assumes that no oops are in fixed registers) + info->add_register_oop(exceptionOop); + Runtime1::StubID unwind_id; + + // get current pc information + // pc is only needed if the method has an exception handler, the unwind code does not need it. + if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) { + // As no instructions have been generated yet for this LIR node it's + // possible that an oop map already exists for the current offset. + // In that case insert an dummy NOP here to ensure all oop map PCs + // are unique. See JDK-8237483. + __ nop(); + } + Label L; + int pc_for_athrow_offset = __ offset(); + __ bind(L); + __ lipc(exceptionPC->as_register(), L); + add_call_info(pc_for_athrow_offset, info); // for exception handler + + __ verify_not_null_oop(A0); + // search an exception handler (A0: exception oop, A1: throwing pc) + if (compilation()->has_fpu_code()) { + unwind_id = Runtime1::handle_exception_id; + } else { + unwind_id = Runtime1::handle_exception_nofpu_id; + } + __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type); + + // FIXME: enough room for two byte trap ???? + __ nop(); +} + +void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { + assert(exceptionOop->as_register() == A0, "must match"); + __ b(_unwind_handler_entry); +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + + switch (left->type()) { + case T_INT: { + switch (code) { + case lir_shl: __ sll_w(dreg, lreg, count->as_register()); break; + case lir_shr: __ sra_w(dreg, lreg, count->as_register()); break; + case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break; + default: ShouldNotReachHere(); break; + } + break; + case T_LONG: + case T_ADDRESS: + case T_OBJECT: + switch (code) { + case lir_shl: __ sll_d(dreg, lreg, count->as_register()); break; + case lir_shr: __ sra_d(dreg, lreg, count->as_register()); break; + case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break; + default: ShouldNotReachHere(); break; + } + break; + default: + ShouldNotReachHere(); + break; + } + } +} + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + + switch (left->type()) { + case T_INT: { + switch (code) { + case lir_shl: __ slli_w(dreg, lreg, count); break; + case lir_shr: __ srai_w(dreg, lreg, count); break; + case lir_ushr: __ srli_w(dreg, lreg, count); break; + default: ShouldNotReachHere(); break; + } + break; + case T_LONG: + case T_ADDRESS: + case T_OBJECT: + switch (code) { + case lir_shl: __ slli_d(dreg, lreg, count); break; + case lir_shr: __ srai_d(dreg, lreg, count); break; + case lir_ushr: __ srli_d(dreg, lreg, count); break; + default: ShouldNotReachHere(); break; + } + break; + default: + ShouldNotReachHere(); + break; + } + } +} + +void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) { + assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); + int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; + assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ st_ptr(r, Address(SP, offset_from_sp_in_bytes)); +} + +void LIR_Assembler::store_parameter(jint c, int offset_from_sp_in_words) { + assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); + int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; + assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ li(SCR2, c); + __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes)); +} + +void LIR_Assembler::store_parameter(jobject o, int offset_from_sp_in_words) { + ShouldNotReachHere(); +} + +// This code replaces a call to arraycopy; no exception may +// be thrown in this code, they must be thrown in the System.arraycopy +// activation frame; we could save some checks if this would not be the case +void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + Register j_rarg0 = T0; + Register j_rarg1 = A0; + Register j_rarg2 = A1; + Register j_rarg3 = A2; + Register j_rarg4 = A3; + + ciArrayKlass* default_type = op->expected_type(); + Register src = op->src()->as_register(); + Register dst = op->dst()->as_register(); + Register src_pos = op->src_pos()->as_register(); + Register dst_pos = op->dst_pos()->as_register(); + Register length = op->length()->as_register(); + Register tmp = op->tmp()->as_register(); + + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; + if (is_reference_type(basic_type)) + basic_type = T_OBJECT; + + // if we don't know anything, just go through the generic arraycopy + if (default_type == NULL) { + Label done; + assert(src == T0 && src_pos == A0, "mismatch in calling convention"); + + // Save the arguments in case the generic arraycopy fails and we + // have to fall back to the JNI stub + __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); + __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); + __ st_ptr(length, Address(SP, 2 * BytesPerWord)); + __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); + __ st_ptr(src, Address(SP, 4 * BytesPerWord)); + + address copyfunc_addr = StubRoutines::generic_arraycopy(); + + // FIXME: LA + if (copyfunc_addr == NULL) { + // Take a slow path for generic arraycopy. + __ b(*stub->entry()); + __ bind(*stub->continuation()); + return; + } + + // The arguments are in java calling convention so we shift them + // to C convention + assert_different_registers(A0, j_rarg1, j_rarg2, j_rarg3, j_rarg4); + __ move(A0, j_rarg0); + assert_different_registers(A1, j_rarg2, j_rarg3, j_rarg4); + __ move(A1, j_rarg1); + assert_different_registers(A2, j_rarg3, j_rarg4); + __ move(A2, j_rarg2); + assert_different_registers(A3, j_rarg4); + __ move(A3, j_rarg3); + __ move(A4, j_rarg4); +#ifndef PRODUCT + if (PrintC1Statistics) { + __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt); + __ increment(SCR2, 1); + } +#endif + __ call(copyfunc_addr, relocInfo::runtime_call_type); + + __ beqz(A0, *stub->continuation()); + + // Reload values from the stack so they are where the stub + // expects them. + __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); + __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); + __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); + __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); + __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); + + // A0 is -1^K where K == partial copied count + __ nor(SCR1, A0, R0); + __ slli_w(SCR1, SCR1, 0); + // adjust length down and src/end pos up by partial copied count + __ sub_w(length, length, SCR1); + __ add_w(src_pos, src_pos, SCR1); + __ add_w(dst_pos, dst_pos, SCR1); + __ b(*stub->entry()); + + __ bind(*stub->continuation()); + return; + } + + assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), + "must be true at this point"); + + int elem_size = type2aelembytes(basic_type); + Address::ScaleFactor scale = Address::times(elem_size); + + Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes()); + Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes()); + Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes()); + Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); + + // test for NULL + if (flags & LIR_OpArrayCopy::src_null_check) { + __ beqz(src, *stub->entry()); + } + if (flags & LIR_OpArrayCopy::dst_null_check) { + __ beqz(dst, *stub->entry()); + } + + // If the compiler was not able to prove that exact type of the source or the destination + // of the arraycopy is an array type, check at runtime if the source or the destination is + // an instance type. + if (flags & LIR_OpArrayCopy::type_check) { + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ li(SCR2, Klass::_lh_neutral_value); + __ bge_far(SCR1, SCR2, *stub->entry(), true); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ li(SCR2, Klass::_lh_neutral_value); + __ bge_far(SCR1, SCR2, *stub->entry(), true); + } + } + + // check if negative + if (flags & LIR_OpArrayCopy::src_pos_positive_check) { + __ blt_far(src_pos, R0, *stub->entry(), true); + } + if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { + __ blt_far(dst_pos, R0, *stub->entry(), true); + } + + if (flags & LIR_OpArrayCopy::length_positive_check) { + __ blt_far(length, R0, *stub->entry(), true); + } + + if (flags & LIR_OpArrayCopy::src_range_check) { + __ add_w(tmp, src_pos, length); + __ ld_wu(SCR1, src_length_addr); + __ blt_far(SCR1, tmp, *stub->entry(), false); + } + if (flags & LIR_OpArrayCopy::dst_range_check) { + __ add_w(tmp, dst_pos, length); + __ ld_wu(SCR1, dst_length_addr); + __ blt_far(SCR1, tmp, *stub->entry(), false); + } + + if (flags & LIR_OpArrayCopy::type_check) { + // We don't know the array types are compatible + if (basic_type != T_OBJECT) { + // Simple test for basic type arrays + if (UseCompressedClassPointers) { + __ ld_wu(tmp, src_klass_addr); + __ ld_wu(SCR1, dst_klass_addr); + } else { + __ ld_ptr(tmp, src_klass_addr); + __ ld_ptr(SCR1, dst_klass_addr); + } + __ bne_far(tmp, SCR1, *stub->entry()); + } else { + // For object arrays, if src is a sub class of dst then we can + // safely do the copy. + Label cont, slow; + + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(dst, Address(SP, 0 * wordSize)); + __ st_ptr(src, Address(SP, 1 * wordSize)); + + __ load_klass(src, src); + __ load_klass(dst, dst); + + __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); + + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(dst, Address(SP, 0 * wordSize)); + __ st_ptr(src, Address(SP, 1 * wordSize)); + __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); + __ ld_ptr(dst, Address(SP, 0 * wordSize)); + __ ld_ptr(src, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + + __ bnez(dst, cont); + + __ bind(slow); + __ ld_ptr(dst, Address(SP, 0 * wordSize)); + __ ld_ptr(src, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + + address copyfunc_addr = StubRoutines::checkcast_arraycopy(); + if (copyfunc_addr != NULL) { // use stub if available + // src is not a sub class of dst so we have to do a + // per-element check. + + int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; + if ((flags & mask) != mask) { + // Check that at least both of them object arrays. + assert(flags & mask, "one of the two should be known to be an object array"); + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + } + int lh_offset = in_bytes(Klass::layout_helper_offset()); + Address klass_lh_addr(tmp, lh_offset); + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ ld_w(SCR1, klass_lh_addr); + __ li(SCR2, objArray_lh); + __ XOR(SCR1, SCR1, SCR2); + __ bnez(SCR1, *stub->entry()); + } + + // Spill because stubs can use any register they like and it's + // easier to restore just those that we care about. + __ st_ptr(dst, Address(SP, 0 * BytesPerWord)); + __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); + __ st_ptr(length, Address(SP, 2 * BytesPerWord)); + __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord)); + __ st_ptr(src, Address(SP, 4 * BytesPerWord)); + + __ lea(A0, Address(src, src_pos, scale)); + __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(A0, dst, dst_pos, length); + __ lea(A1, Address(dst, dst_pos, scale)); + __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(A1, dst, length); + __ bstrpick_d(A2, length, 31, 0); + assert_different_registers(A2, dst); + + __ load_klass(A4, dst); + __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset())); + __ ld_w(A3, Address(A4, Klass::super_check_offset_offset())); + __ call(copyfunc_addr, relocInfo::runtime_call_type); + +#ifndef PRODUCT + if (PrintC1Statistics) { + Label failed; + __ bnez(A0, failed); + __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt); + __ increment(SCR2, 1); + __ bind(failed); + } +#endif + + __ beqz(A0, *stub->continuation()); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt); + __ increment(SCR2, 1); + } +#endif + assert_different_registers(dst, dst_pos, length, src_pos, src, A0, SCR1); + + // Restore previously spilled arguments + __ ld_ptr(dst, Address(SP, 0 * BytesPerWord)); + __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord)); + __ ld_ptr(length, Address(SP, 2 * BytesPerWord)); + __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord)); + __ ld_ptr(src, Address(SP, 4 * BytesPerWord)); + + // return value is -1^K where K is partial copied count + __ nor(SCR1, A0, R0); + __ slli_w(SCR1, SCR1, 0); + // adjust length down and src/end pos up by partial copied count + __ sub_w(length, length, SCR1); + __ add_w(src_pos, src_pos, SCR1); + __ add_w(dst_pos, dst_pos, SCR1); + } + + __ b(*stub->entry()); + + __ bind(cont); + __ ld_ptr(dst, Address(SP, 0 * wordSize)); + __ ld_ptr(src, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + } + } + +#ifdef ASSERT + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the + // primitive case the types must match exactly with src.klass and + // dst.klass each exactly matching the default type. For the + // object array case, if no type check is needed then either the + // dst type is exactly the expected type and the src type is a + // subtype which we can't check or src is the same array as dst + // but not necessarily exactly of type default_type. + Label known_ok, halt; + __ mov_metadata(tmp, default_type->constant_encoding()); + if (UseCompressedClassPointers) { + __ encode_klass_not_null(tmp); + } + + if (basic_type != T_OBJECT) { + + if (UseCompressedClassPointers) { + __ ld_wu(SCR1, dst_klass_addr); + } else { + __ ld_ptr(SCR1, dst_klass_addr); + } + __ bne(tmp, SCR1, halt); + if (UseCompressedClassPointers) { + __ ld_wu(SCR1, src_klass_addr); + } else { + __ ld_ptr(SCR1, src_klass_addr); + } + __ beq(tmp, SCR1, known_ok); + } else { + if (UseCompressedClassPointers) { + __ ld_wu(SCR1, dst_klass_addr); + } else { + __ ld_ptr(SCR1, dst_klass_addr); + } + __ beq(tmp, SCR1, known_ok); + __ beq(src, dst, known_ok); + } + __ bind(halt); + __ stop("incorrect type information in arraycopy"); + __ bind(known_ok); + } +#endif + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ li(SCR2, Runtime1::arraycopy_count_address(basic_type)); + __ increment(SCR2, 1); + } +#endif + + __ lea(A0, Address(src, src_pos, scale)); + __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(A0, dst, dst_pos, length); + __ lea(A1, Address(dst, dst_pos, scale)); + __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(A1, length); + __ bstrpick_d(A2, length, 31, 0); + + bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; + bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; + const char *name; + address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); + + CodeBlob *cb = CodeCache::find_blob(entry); + if (cb) { + __ call(entry, relocInfo::runtime_call_type); + } else { + __ call_VM_leaf(entry, 3); + } + + __ bind(*stub->continuation()); +} + +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); + if (!UseFastLocking) { + __ b(*op->stub()->entry()); + } else if (op->code() == lir_lock) { + Register scratch = noreg; + if (UseBiasedLocking) { + scratch = op->scratch_opr()->as_register(); + } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, + "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible + int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } + // done + } else if (op->code() == lir_unlock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, + "lock_reg must point to the displaced header"); + __ unlock_object(hdr, obj, lock, *op->stub()->entry()); + } else { + Unimplemented(); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + ciMethod* callee = op->profiled_callee(); + int bci = op->profiled_bci(); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + Bytecodes::Code bc = method->java_code_at_bci(bci); + const bool callee_is_static = callee->is_loaded() && callee->is_static(); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) && + !callee_is_static && // required for optimized MH invokes + C1ProfileVirtualCalls) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ ld_ptr(SCR2, data_addr); + __ addi_d(SCR2, SCR2, DataLayout::counter_increment); + __ st_ptr(SCR2, data_addr); + return; + } + } + + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == NULL) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); + __ mov_metadata(SCR2, known_klass->constant_encoding()); + __ lea(SCR1, recv_addr); + __ st_ptr(SCR2, SCR1, 0); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ ld_ptr(SCR2, data_addr); + __ addi_d(SCR2, SCR1, DataLayout::counter_increment); + __ st_ptr(SCR2, data_addr); + return; + } + } + } else { + __ load_klass(recv, recv); + Label update_done; + type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ ld_ptr(SCR2, counter_addr); + __ addi_d(SCR2, SCR2, DataLayout::counter_increment); + __ st_ptr(SCR2, counter_addr); + + __ bind(update_done); + } + } else { + // Static call + __ ld_ptr(SCR2, counter_addr); + __ addi_d(SCR2, SCR2, DataLayout::counter_increment); + __ st_ptr(SCR2, counter_addr); + } +} + +void LIR_Assembler::emit_delay(LIR_OpDelay*) { + Unimplemented(); +} + +void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { + __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); +} + +void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { + assert(op->crc()->is_single_cpu(), "crc must be register"); + assert(op->val()->is_single_cpu(), "byte value must be register"); + assert(op->result_opr()->is_single_cpu(), "result must be register"); + Register crc = op->crc()->as_register(); + Register val = op->val()->as_register(); + Register res = op->result_opr()->as_register(); + + assert_different_registers(val, crc, res); + __ li(res, StubRoutines::crc_table_addr()); + __ nor(crc, crc, R0); // ~crc + __ update_byte_crc32(crc, val, res); + __ nor(res, crc, R0); // ~crc +} + +void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { + COMMENT("emit_profile_type {"); + Register obj = op->obj()->as_register(); + Register tmp = op->tmp()->as_pointer_register(); + Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + assert(mdo_addr.base() != SCR1, "wrong register"); + + __ verify_oop(obj); + + if (tmp != obj) { + __ move(tmp, obj); + } + if (do_null) { + __ bnez(tmp, update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ ld_ptr(SCR2, mdo_addr); + __ ori(SCR2, SCR2, TypeEntries::null_seen); + __ st_ptr(SCR2, mdo_addr); + } + if (do_update) { +#ifndef ASSERT + __ b(next); + } +#else + __ b(next); + } + } else { + __ bnez(tmp, update); + __ stop("unexpected null obj"); +#endif + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + Label ok; + __ load_klass(tmp, tmp); + __ mov_metadata(SCR1, exact_klass->constant_encoding()); + __ XOR(SCR1, tmp, SCR1); + __ beqz(SCR1, ok); + __ stop("exact klass and actual klass differ"); + __ bind(ok); + } +#endif + if (!no_conflict) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + } else { + __ load_klass(tmp, tmp); + } + + __ ld_ptr(SCR2, mdo_addr); + __ XOR(tmp, tmp, SCR2); + assert(TypeEntries::type_klass_mask == -4, "must be"); + __ bstrpick_d(SCR1, tmp, 63, 2); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + __ beqz(SCR1, next); + + __ andi(SCR1, tmp, TypeEntries::type_unknown); + __ bnez(SCR1, next); // already unknown. Nothing to do anymore. + + if (TypeEntries::is_type_none(current_klass)) { + __ beqz(SCR2, none); + __ li(SCR1, (u1)TypeEntries::null_seen); + __ beq(SCR2, SCR1, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + membar_acquire(); + __ ld_ptr(SCR2, mdo_addr); + __ XOR(tmp, tmp, SCR2); + assert(TypeEntries::type_klass_mask == -4, "must be"); + __ bstrpick_d(SCR1, tmp, 63, 2); + __ beqz(SCR1, next); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + __ ld_ptr(tmp, mdo_addr); + __ andi(SCR2, tmp, TypeEntries::type_unknown); + __ bnez(SCR2, next); // already unknown. Nothing to do anymore. + } + + // different than before. Cannot keep accurate profile. + __ ld_ptr(SCR2, mdo_addr); + __ ori(SCR2, SCR2, TypeEntries::type_unknown); + __ st_ptr(SCR2, mdo_addr); + + if (TypeEntries::is_type_none(current_klass)) { + __ b(next); + + __ bind(none); + // first time here. Set profile type. + __ st_ptr(tmp, mdo_addr); + } + } else { + // There's a single possible klass at this profile point + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + __ ld_ptr(SCR2, mdo_addr); + __ XOR(tmp, tmp, SCR2); + assert(TypeEntries::type_klass_mask == -4, "must be"); + __ bstrpick_d(SCR1, tmp, 63, 2); + __ beqz(SCR1, next); +#ifdef ASSERT + { + Label ok; + __ ld_ptr(SCR1, mdo_addr); + __ beqz(SCR1, ok); + __ li(SCR2, (u1)TypeEntries::null_seen); + __ beq(SCR1, SCR2, ok); + // may have been set by another thread + membar_acquire(); + __ mov_metadata(SCR1, exact_klass->constant_encoding()); + __ ld_ptr(SCR2, mdo_addr); + __ XOR(SCR2, SCR1, SCR2); + assert(TypeEntries::type_mask == -2, "must be"); + __ bstrpick_d(SCR2, SCR2, 63, 1); + __ beqz(SCR2, ok); + + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } +#endif + // first time here. Set profile type. + __ st_ptr(tmp, mdo_addr); + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + __ ld_ptr(tmp, mdo_addr); + __ andi(SCR1, tmp, TypeEntries::type_unknown); + __ bnez(SCR1, next); // already unknown. Nothing to do anymore. + + __ ori(tmp, tmp, TypeEntries::type_unknown); + __ st_ptr(tmp, mdo_addr); + // FIXME: Write barrier needed here? + } + } + + __ bind(next); + } + COMMENT("} emit_profile_type"); +} + +void LIR_Assembler::align_backward_branch_target() {} + +void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) { + if (left->is_single_cpu()) { + assert(dest->is_single_cpu(), "expect single result reg"); + __ sub_w(dest->as_register(), R0, left->as_register()); + } else if (left->is_double_cpu()) { + assert(dest->is_double_cpu(), "expect double result reg"); + __ sub_d(dest->as_register_lo(), R0, left->as_register_lo()); + } else if (left->is_single_fpu()) { + assert(dest->is_single_fpu(), "expect single float result reg"); + __ fneg_s(dest->as_float_reg(), left->as_float_reg()); + } else { + assert(left->is_double_fpu(), "expect double float operand reg"); + assert(dest->is_double_fpu(), "expect double float result reg"); + __ fneg_d(dest->as_double_reg(), left->as_double_reg()); + } +} + +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) { + __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr())); +} + +void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, + LIR_Opr tmp, CodeEmitInfo* info) { + assert(!tmp->is_valid(), "don't need temporary"); + __ call(dest, relocInfo::runtime_call_type); + if (info != NULL) { + add_call_info_here(info); + } +} + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, + CodeEmitInfo* info) { + if (dest->is_address() || src->is_address()) { + move_op(src, dest, type, lir_patch_none, info, + /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false); + } else { + ShouldNotReachHere(); + } +} + +#ifdef ASSERT +// emit run-time assertion +void LIR_Assembler::emit_assert(LIR_OpAssert* op) { + assert(op->code() == lir_assert, "must be"); + Label ok; + + if (op->in_opr1()->is_valid()) { + assert(op->in_opr2()->is_valid(), "both operands must be valid"); + assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be"); + Register reg1 = as_reg(op->in_opr1()); + Register reg2 = as_reg(op->in_opr2()); + switch (op->condition()) { + case lir_cond_equal: __ beq(reg1, reg2, ok); break; + case lir_cond_notEqual: __ bne(reg1, reg2, ok); break; + case lir_cond_less: __ blt(reg1, reg2, ok); break; + case lir_cond_lessEqual: __ bge(reg2, reg1, ok); break; + case lir_cond_greaterEqual: __ bge(reg1, reg2, ok); break; + case lir_cond_greater: __ blt(reg2, reg1, ok); break; + case lir_cond_belowEqual: __ bgeu(reg2, reg1, ok); break; + case lir_cond_aboveEqual: __ bgeu(reg1, reg2, ok); break; + default: ShouldNotReachHere(); + } + } else { + assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); + assert(op->condition() == lir_cond_always, "no other conditions allowed"); + } + if (op->halt()) { + const char* str = __ code_string(op->msg()); + __ stop(str); + } else { + breakpoint(); + } + __ bind(ok); +} +#endif + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +void LIR_Assembler::membar() { + COMMENT("membar"); + __ membar(Assembler::AnyAny); +} + +void LIR_Assembler::membar_acquire() { + __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore)); +} + +void LIR_Assembler::membar_release() { + __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore)); +} + +void LIR_Assembler::membar_loadload() { + __ membar(Assembler::LoadLoad); +} + +void LIR_Assembler::membar_storestore() { + __ membar(MacroAssembler::StoreStore); +} + +void LIR_Assembler::membar_loadstore() { + __ membar(MacroAssembler::LoadStore); +} + +void LIR_Assembler::membar_storeload() { + __ membar(MacroAssembler::StoreLoad); +} + +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + __ move(result_reg->as_register(), TREG); +} + +void LIR_Assembler::peephole(LIR_List *lir) { +} + +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, + LIR_Opr dest, LIR_Opr tmp_op) { + Address addr = as_Address(src->as_address_ptr()); + BasicType type = src->type(); + Register dst = as_reg(dest); + Register tmp = as_reg(tmp_op); + bool is_oop = is_reference_type(type); + + if (Assembler::is_simm(addr.disp(), 12)) { + __ addi_d(tmp, addr.base(), addr.disp()); + } else { + __ li(tmp, addr.disp()); + __ add_d(tmp, addr.base(), tmp); + } + if (addr.index() != noreg) { + if (addr.scale() > Address::times_1) + __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1); + else + __ add_d(tmp, tmp, addr.index()); + } + + switch(type) { + case T_INT: + break; + case T_LONG: + break; + case T_OBJECT: + case T_ARRAY: + if (UseCompressedOops) { + // unsigned int + } else { + // long + } + break; + default: + ShouldNotReachHere(); + } + + if (code == lir_xadd) { + Register inc = noreg; + if (data->is_constant()) { + inc = SCR1; + __ li(inc, as_long(data)); + } else { + inc = as_reg(data); + } + switch(type) { + case T_INT: + __ amadd_db_w(dst, inc, tmp); + break; + case T_LONG: + __ amadd_db_d(dst, inc, tmp); + break; + case T_OBJECT: + case T_ARRAY: + if (UseCompressedOops) { + __ amadd_db_w(dst, inc, tmp); + __ lu32i_d(dst, 0); + } else { + __ amadd_db_d(dst, inc, tmp); + } + break; + default: + ShouldNotReachHere(); + } + } else if (code == lir_xchg) { + Register obj = as_reg(data); + if (is_oop && UseCompressedOops) { + __ encode_heap_oop(SCR2, obj); + obj = SCR2; + } + switch(type) { + case T_INT: + __ amswap_db_w(dst, obj, tmp); + break; + case T_LONG: + __ amswap_db_d(dst, obj, tmp); + break; + case T_OBJECT: + case T_ARRAY: + if (UseCompressedOops) { + __ amswap_db_w(dst, obj, tmp); + __ lu32i_d(dst, 0); + } else { + __ amswap_db_d(dst, obj, tmp); + } + break; + default: + ShouldNotReachHere(); + } + if (is_oop && UseCompressedOops) { + __ decode_heap_oop(dst); + } + } else { + ShouldNotReachHere(); + } +} + +#undef __ diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp new file mode 100644 index 00000000000..7cb15f689f5 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp @@ -0,0 +1,1442 @@ +/* + * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_LIRGenerator.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArray.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_loongarch.inline.hpp" + +#ifdef ASSERT +#define __ gen()->lir(__FILE__, __LINE__)-> +#else +#define __ gen()->lir()-> +#endif + +// Item will be loaded into a byte register; Intel only +void LIRItem::load_byte_item() { + load_item(); +} + +void LIRItem::load_nonconstant() { + LIR_Opr r = value()->operand(); + if (r->is_constant()) { + _result = r; + } else { + load_item(); + } +} + +//-------------------------------------------------------------- +// LIRGenerator +//-------------------------------------------------------------- + +LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; } +LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::a1_opr; } +LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::a0_opr; } +LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } + +LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case intTag: opr = FrameMap::a0_opr; break; + case objectTag: opr = FrameMap::a0_oop_opr; break; + case longTag: opr = FrameMap::long0_opr; break; + case floatTag: opr = FrameMap::fpu0_float_opr; break; + case doubleTag: opr = FrameMap::fpu0_double_opr; break; + case addressTag: + default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; + } + + assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); + return opr; +} + +LIR_Opr LIRGenerator::rlock_byte(BasicType type) { + LIR_Opr reg = new_register(T_INT); + set_vreg_flag(reg, LIRGenerator::byte_reg); + return reg; +} + +//--------- loading items into registers -------------------------------- + +bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { + if (v->type()->as_IntConstant() != NULL) { + return v->type()->as_IntConstant()->value() == 0L; + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0L; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + +bool LIRGenerator::can_inline_as_constant(Value v) const { + // FIXME: Just a guess + if (v->type()->as_IntConstant() != NULL) { + return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12); + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0L; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; } + +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} + +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + assert(base->is_register(), "must be"); + intx large_disp = disp; + + // accumulate fixed displacements + if (index->is_constant()) { + LIR_Const *constant = index->as_constant_ptr(); + if (constant->type() == T_INT) { + large_disp += index->as_jint() << shift; + } else { + assert(constant->type() == T_LONG, "should be"); + jlong c = index->as_jlong() << shift; + if ((jlong)((jint)c) == c) { + large_disp += c; + index = LIR_OprFact::illegalOpr; + } else { + LIR_Opr tmp = new_register(T_LONG); + __ move(index, tmp); + index = tmp; + // apply shift and displacement below + } + } + } + + if (index->is_register()) { + // apply the shift and accumulate the displacement + if (shift > 0) { + LIR_Opr tmp = new_pointer_register(); + __ shift_left(index, shift, tmp); + index = tmp; + } + if (large_disp != 0) { + LIR_Opr tmp = new_pointer_register(); + if (Assembler::is_simm(large_disp, 12)) { + __ add(index, LIR_OprFact::intptrConst(large_disp), tmp); + index = tmp; + } else { + __ move(LIR_OprFact::intptrConst(large_disp), tmp); + __ add(tmp, index, tmp); + index = tmp; + } + large_disp = 0; + } + } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) { + // index is illegal so replace it with the displacement loaded into a register + index = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(large_disp), index); + large_disp = 0; + } + + // at this point we either have base + index or base + displacement + if (large_disp == 0 && index->is_register()) { + return new LIR_Address(base, index, type); + } else { + assert(Assembler::is_simm(large_disp, 12), "must be"); + return new LIR_Address(base, large_disp, type); + } +} + +LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type, bool needs_card_mark) { + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); + + LIR_Address* addr; + if (index_opr->is_constant()) { + addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type); + } else { + if (offset_in_bytes) { + LIR_Opr tmp = new_pointer_register(); + __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp); + array_opr = tmp; + offset_in_bytes = 0; + } + addr = new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type); + } + if (needs_card_mark) { + // This store will need a precise card mark, so go ahead and + // compute the full adddres instead of computing once for the + // store and again for the card mark. + LIR_Opr tmp = new_pointer_register(); + __ leal(LIR_OprFact::address(addr), tmp); + return new LIR_Address(tmp, type); + } else { + return addr; + } +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + LIR_Opr r; + if (type == T_LONG) { + r = LIR_OprFact::longConst(x); + if (!Assembler::is_simm(x, 12)) { + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + } else if (type == T_INT) { + r = LIR_OprFact::intConst(x); + if (!Assembler::is_simm(x, 12)) { + // This is all rather nasty. We don't know whether our constant + // is required for a logical or an arithmetic operation, wo we + // don't know what the range of valid values is!! + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + } else { + ShouldNotReachHere(); + r = NULL; // unreachable + } + return r; +} + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { + LIR_Opr pointer = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(counter), pointer); + LIR_Address* addr = new LIR_Address(pointer, type); + increment_counter(addr, step); +} + +void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + LIR_Opr imm = NULL; + switch(addr->type()) { + case T_INT: + imm = LIR_OprFact::intConst(step); + break; + case T_LONG: + imm = LIR_OprFact::longConst(step); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr reg = new_register(addr->type()); + __ load(addr, reg); + __ add(reg, imm, reg); + __ store(reg, addr); +} + +template +void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, + int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr reg = new_register(T_INT); + __ load(generate_address(base, disp, T_INT), reg, info); + __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt); +} + +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +template +void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, + int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr reg1 = new_register(T_INT); + __ load(generate_address(base, disp, type), reg1, info); + __ cmp_branch(condition, reg, reg1, type, tgt); +} + +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + +bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (is_power_of_2(c - 1)) { + __ shift_left(left, exact_log2(c - 1), tmp); + __ add(tmp, left, result); + return true; + } else if (is_power_of_2(c + 1)) { + __ shift_left(left, exact_log2(c + 1), tmp); + __ sub(tmp, left, result); + return true; + } else { + return false; + } +} + +void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { + BasicType type = item->type(); + __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); +} + +//---------------------------------------------------------------------- +// visitor functions +//---------------------------------------------------------------------- + +void LIRGenerator::do_StoreIndexed(StoreIndexed* x) { + assert(x->is_pinned(),""); + bool needs_range_check = x->compute_needs_range_check(); + bool use_length = x->length() != NULL; + bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT; + bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL || + !get_jobject_constant(x->value())->is_null_object() || + x->should_profile()); + + LIRItem array(x->array(), this); + LIRItem index(x->index(), this); + LIRItem value(x->value(), this); + LIRItem length(this); + + array.load_item(); + index.load_nonconstant(); + + if (use_length && needs_range_check) { + length.set_instruction(x->length()); + length.load_item(); + + } + if (needs_store_check || x->check_boolean()) { + value.load_item(); + } else { + value.load_for_store(x->elt_type()); + } + + set_no_result(x); + + // the CodeEmitInfo must be duplicated for each different + // LIR-instruction because spilling can occur anywhere between two + // instructions and so the debug information must be different + CodeEmitInfo* range_check_info = state_for(x); + CodeEmitInfo* null_check_info = NULL; + if (x->needs_null_check()) { + null_check_info = new CodeEmitInfo(range_check_info); + } + + // emit array address setup early so it schedules better + // FIXME? No harm in this on aarch64, and it might help + LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store); + + if (GenerateRangeChecks && needs_range_check) { + if (use_length) { + __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), x->elt_type(), new RangeCheckStub(range_check_info, index.result())); + } else { + array_range_check(array.result(), index.result(), null_check_info, range_check_info); + // range_check also does the null check + null_check_info = NULL; + } + } + + if (GenerateArrayStoreCheck && needs_store_check) { + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = new_register(objectType); + + CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info); + __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci()); + } + + if (obj_store) { + // Needs GC write barriers. + pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + __ move(value.result(), array_addr, null_check_info); + // Seems to be a precise + post_barrier(LIR_OprFact::address(array_addr), value.result()); + } else { + LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info); + __ move(result, array_addr, null_check_info); + } +} + +void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + assert(x->is_pinned(),""); + LIRItem obj(x->obj(), this); + obj.load_item(); + + set_no_result(x); + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); + // Need a scratch register for biased locking + LIR_Opr scratch = LIR_OprFact::illegalOpr; + if (UseBiasedLocking) { + scratch = new_register(T_INT); + } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { + info_for_exception = state_for(x); + } + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); + monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); +} + +void LIRGenerator::do_MonitorExit(MonitorExit* x) { + assert(x->is_pinned(),""); + + LIRItem obj(x->obj(), this); + obj.dont_load_item(); + + LIR_Opr lock = new_register(T_INT); + LIR_Opr obj_temp = new_register(T_INT); + set_no_result(x); + monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); +} + +void LIRGenerator::do_NegateOp(NegateOp* x) { + LIRItem from(x->x(), this); + from.load_item(); + LIR_Opr result = rlock_result(x); + __ negate (from.result(), result); +} + +// for _fadd, _fmul, _fsub, _fdiv, _frem +// _dadd, _dmul, _dsub, _ddiv, _drem +void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { + // float remainder is implemented as a direct call into the runtime + LIRItem right(x->x(), this); + LIRItem left(x->y(), this); + + BasicTypeList signature(2); + if (x->op() == Bytecodes::_frem) { + signature.append(T_FLOAT); + signature.append(T_FLOAT); + } else { + signature.append(T_DOUBLE); + signature.append(T_DOUBLE); + } + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + const LIR_Opr result_reg = result_register_for(x->type()); + left.load_item_force(cc->at(1)); + right.load_item(); + + __ move(right.result(), cc->at(0)); + + address entry; + if (x->op() == Bytecodes::_frem) { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); + } else { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); + } + + LIR_Opr result = rlock_result(x); + __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + return; + } + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + + // Always load right hand side. + right.load_item(); + + if (!left.is_register()) + left.load_item(); + + LIR_Opr reg = rlock(x); + + arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp()); + + set_result(x, round_item(reg)); +} + +// for _ladd, _lmul, _lsub, _ldiv, _lrem +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + // missing test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { + left.load_item(); + bool need_zero_check = true; + if (right.is_constant()) { + jlong c = right.get_jlong_constant(); + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) need_zero_check = false; + // do not load right if the divisor is a power-of-2 constant + if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { + right.dont_load_item(); + } else { + right.load_item(); + } + } else { + right.load_item(); + } + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + CodeStub* stub = new DivByZeroStub(info); + __ cmp_branch(lir_cond_equal, right.result(), LIR_OprFact::longConst(0), T_LONG, stub); + } + + rlock_result(x); + switch (x->op()) { + case Bytecodes::_lrem: + __ rem (left.result(), right.result(), x->operand()); + break; + case Bytecodes::_ldiv: + __ div (left.result(), right.result(), x->operand()); + break; + default: + ShouldNotReachHere(); + break; + } + } else { + assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, + "expect lmul, ladd or lsub"); + // add, sub, mul + left.load_item(); + if (!right.is_register()) { + if (x->op() == Bytecodes::_lmul || !right.is_constant() || + (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) || + (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) { + right.load_item(); + } else { // add, sub + assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub"); + // don't load constants to save register + right.load_nonconstant(); + } + } + rlock_result(x); + arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); + } +} + +// for: _iadd, _imul, _isub, _idiv, _irem +void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + // Test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + if (x->is_commutative() && left.is_stack() && right.is_register()) { + // swap them if left is real stack (or cached) and right is real register(not cached) + left_arg = &right; + right_arg = &left; + } + + left_arg->load_item(); + + // do not need to load right, as we can handle stack and constants + if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { + rlock_result(x); + bool need_zero_check = true; + if (right.is_constant()) { + jint c = right.get_jint_constant(); + // no need to do div-by-zero check if the divisor is a non-zero constant + if (c != 0) need_zero_check = false; + // do not load right if the divisor is a power-of-2 constant + if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) { + right_arg->dont_load_item(); + } else { + right_arg->load_item(); + } + } else { + right_arg->load_item(); + } + if (need_zero_check) { + CodeEmitInfo* info = state_for(x); + CodeStub* stub = new DivByZeroStub(info); + __ cmp_branch(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0), T_INT, stub); + } + + LIR_Opr ill = LIR_OprFact::illegalOpr; + if (x->op() == Bytecodes::_irem) { + __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL); + } + } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { + if (right.is_constant() && + ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) || + (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) { + right.load_nonconstant(); + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); + } else { + assert (x->op() == Bytecodes::_imul, "expect imul"); + if (right.is_constant()) { + jint c = right.get_jint_constant(); + if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { + right_arg->dont_load_item(); + } else { + // Cannot use constant op. + right_arg->load_item(); + } + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); + } +} + +void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { + // when an operand with use count 1 is the left operand, then it is + // likely that no move for 2-operand-LIR-form is necessary + if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { + x->swap_operands(); + } + + ValueTag tag = x->type()->tag(); + assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); + switch (tag) { + case floatTag: + case doubleTag: do_ArithmeticOp_FPU(x); return; + case longTag: do_ArithmeticOp_Long(x); return; + case intTag: do_ArithmeticOp_Int(x); return; + default: ShouldNotReachHere(); return; + } +} + +// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr +void LIRGenerator::do_ShiftOp(ShiftOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + + rlock_result(x); + if (right.is_constant()) { + right.dont_load_item(); + int c; + switch (x->op()) { + case Bytecodes::_ishl: + c = right.get_jint_constant() & 0x1f; + __ shift_left(left.result(), c, x->operand()); + break; + case Bytecodes::_ishr: + c = right.get_jint_constant() & 0x1f; + __ shift_right(left.result(), c, x->operand()); + break; + case Bytecodes::_iushr: + c = right.get_jint_constant() & 0x1f; + __ unsigned_shift_right(left.result(), c, x->operand()); + break; + case Bytecodes::_lshl: + c = right.get_jint_constant() & 0x3f; + __ shift_left(left.result(), c, x->operand()); + break; + case Bytecodes::_lshr: + c = right.get_jint_constant() & 0x3f; + __ shift_right(left.result(), c, x->operand()); + break; + case Bytecodes::_lushr: + c = right.get_jint_constant() & 0x3f; + __ unsigned_shift_right(left.result(), c, x->operand()); + break; + default: + ShouldNotReachHere(); + } + } else { + right.load_item(); + LIR_Opr tmp = new_register(T_INT); + switch (x->op()) { + case Bytecodes::_ishl: + __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); + __ shift_left(left.result(), tmp, x->operand(), tmp); + break; + case Bytecodes::_ishr: + __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); + __ shift_right(left.result(), tmp, x->operand(), tmp); + break; + case Bytecodes::_iushr: + __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp); + __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); + break; + case Bytecodes::_lshl: + __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); + __ shift_left(left.result(), tmp, x->operand(), tmp); + break; + case Bytecodes::_lshr: + __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); + __ shift_right(left.result(), tmp, x->operand(), tmp); + break; + case Bytecodes::_lushr: + __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp); + __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp); + break; + default: + ShouldNotReachHere(); + } + } +} + +// _iand, _land, _ior, _lor, _ixor, _lxor +void LIRGenerator::do_LogicOp(LogicOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + + rlock_result(x); + if (right.is_constant() + && ((right.type()->tag() == intTag + && Assembler::is_uimm(right.get_jint_constant(), 12)) + || (right.type()->tag() == longTag + && Assembler::is_uimm(right.get_jlong_constant(), 12)))) { + right.dont_load_item(); + } else { + right.load_item(); + } + switch (x->op()) { + case Bytecodes::_iand: + case Bytecodes::_land: + __ logical_and(left.result(), right.result(), x->operand()); break; + case Bytecodes::_ior: + case Bytecodes::_lor: + __ logical_or (left.result(), right.result(), x->operand()); break; + case Bytecodes::_ixor: + case Bytecodes::_lxor: + __ logical_xor(left.result(), right.result(), x->operand()); break; + default: Unimplemented(); + } +} + +// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg +void LIRGenerator::do_CompareOp(CompareOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + ValueTag tag = x->x()->type()->tag(); + if (tag == longTag) { + left.set_destroys_register(); + } + left.load_item(); + right.load_item(); + LIR_Opr reg = rlock_result(x); + + if (x->x()->type()->is_float_kind()) { + Bytecodes::Code code = x->op(); + __ fcmp2int(left.result(), right.result(), reg, + (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); + } else if (x->x()->type()->tag() == longTag) { + __ lcmp2int(left.result(), right.result(), reg); + } else { + Unimplemented(); + } +} + +void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { + LIRItem value(x->argument_at(0), this); + value.set_destroys_register(); + + LIR_Opr calc_result = rlock_result(x); + LIR_Opr result_reg = result_register_for(x->type()); + + CallingConvention* cc = NULL; + + if (x->id() == vmIntrinsics::_dpow) { + LIRItem value1(x->argument_at(1), this); + + value1.set_destroys_register(); + + BasicTypeList signature(2); + signature.append(T_DOUBLE); + signature.append(T_DOUBLE); + cc = frame_map()->c_calling_convention(&signature); + value.load_item_force(cc->at(0)); + value1.load_item_force(cc->at(1)); + } else { + BasicTypeList signature(1); + signature.append(T_DOUBLE); + cc = frame_map()->c_calling_convention(&signature); + value.load_item_force(cc->at(0)); + } + + switch (x->id()) { + case vmIntrinsics::_dexp: + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); + break; + case vmIntrinsics::_dlog: + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); + break; + case vmIntrinsics::_dlog10: + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); + break; + case vmIntrinsics::_dpow: + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); + break; + case vmIntrinsics::_dsin: + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); + break; + case vmIntrinsics::_dcos: + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); + break; + case vmIntrinsics::_dtan: + __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); + break; + default: ShouldNotReachHere(); + } + __ move(result_reg, calc_result); +} + +void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) { + assert(x->number_of_arguments() == 4, "wrong type"); + LIRItem obj (x->argument_at(0), this); // object + LIRItem offset(x->argument_at(1), this); // offset of field + LIRItem cmp (x->argument_at(2), this); // value to compare with field + LIRItem val (x->argument_at(3), this); // replace field with val if matches cmp + + assert(obj.type()->tag() == objectTag, "invalid type"); + + // In 64bit the type can be long, sparc doesn't have this assert + // assert(offset.type()->tag() == intTag, "invalid type"); + + assert(cmp.type()->tag() == type->tag(), "invalid type"); + assert(val.type()->tag() == type->tag(), "invalid type"); + + // get address of field + obj.load_item(); + offset.load_nonconstant(); + val.load_item(); + cmp.load_item(); + + LIR_Address* a; + if(offset.result()->is_constant()) { + jlong c = offset.result()->as_jlong(); + if ((jlong)((jint)c) == c) { + a = new LIR_Address(obj.result(), + (jint)c, + as_BasicType(type)); + } else { + LIR_Opr tmp = new_register(T_LONG); + __ move(offset.result(), tmp); + a = new LIR_Address(obj.result(), + tmp, + as_BasicType(type)); + } + } else { + a = new LIR_Address(obj.result(), + offset.result(), + LIR_Address::times_1, + 0, + as_BasicType(type)); + } + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + if (type == objectType) { // Write-barrier needed for Object fields. + // Do the pre-write barrier, if any. + pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } + + LIR_Opr result = rlock_result(x); + + LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience + if (type == objectType) + __ cas_obj(addr, cmp.result(), val.result(), new_register(T_INT), new_register(T_INT), + result); + else if (type == intType) + __ cas_int(addr, cmp.result(), val.result(), ill, ill); + else if (type == longType) + __ cas_long(addr, cmp.result(), val.result(), ill, ill); + else { + ShouldNotReachHere(); + } + + __ move(FrameMap::scr1_opr, result); + + if (type == objectType) { // Write-barrier needed for Object fields. + // Seems to be precise + post_barrier(addr, val.result()); + } +} + +void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), + "wrong type"); + if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || + x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || + x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || + x->id() == vmIntrinsics::_dlog10) { + do_LibmIntrinsic(x); + return; + } + switch (x->id()) { + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_dsqrt: + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + case vmIntrinsics::_dabs: + __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + break; + default: + ShouldNotReachHere(); + } + break; + } + default: + ShouldNotReachHere(); + } +} + +void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + Register j_rarg0 = RT0; + Register j_rarg1 = RA0; + Register j_rarg2 = RA1; + Register j_rarg3 = RA2; + Register j_rarg4 = RA3; + Register j_rarg5 = RA4; + + assert(x->number_of_arguments() == 5, "wrong type"); + + // Make all state_for calls early since they can emit code + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem src(x->argument_at(0), this); + LIRItem src_pos(x->argument_at(1), this); + LIRItem dst(x->argument_at(2), this); + LIRItem dst_pos(x->argument_at(3), this); + LIRItem length(x->argument_at(4), this); + + // operands for arraycopy must use fixed registers, otherwise + // LinearScan will fail allocation (because arraycopy always needs a + // call) + + // The java calling convention will give us enough registers + // so that on the stub side the args will be perfect already. + // On the other slow/special case side we call C and the arg + // positions are not similar enough to pick one as the best. + // Also because the java calling convention is a "shifted" version + // of the C convention we can process the java args trivially into C + // args without worry of overwriting during the xfer + + src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); + src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); + dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); + dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); + length.load_item_force (FrameMap::as_opr(j_rarg4)); + + LIR_Opr tmp = FrameMap::as_opr(j_rarg5); + + set_no_result(x); + + int flags; + ciArrayKlass* expected_type; + arraycopy_helper(x, &flags, &expected_type); + + __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), + length.result(), tmp, expected_type, flags, info); // does add_safepoint +} + +void LIRGenerator::do_update_CRC32(Intrinsic* x) { + assert(UseCRC32Intrinsics, "why are we here?"); + // Make all state_for calls early since they can emit code + LIR_Opr result = rlock_result(x); + int flags = 0; + switch (x->id()) { + case vmIntrinsics::_updateCRC32: { + LIRItem crc(x->argument_at(0), this); + LIRItem val(x->argument_at(1), this); + // val is destroyed by update_crc32 + val.set_destroys_register(); + crc.load_item(); + val.load_item(); + __ update_crc32(crc.result(), val.result(), result); + break; + } + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: { + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32); + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem len(x->argument_at(3), this); + buf.load_item(); + off.load_nonconstant(); + + LIR_Opr index = off.result(); + int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + if(off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + offset += off.result()->as_jint(); + } + LIR_Opr base_op = buf.result(); + + if (index->is_valid()) { + LIR_Opr tmp = new_register(T_LONG); + __ convert(Bytecodes::_i2l, index, tmp); + index = tmp; + } + + if (offset) { + LIR_Opr tmp = new_pointer_register(); + __ add(base_op, LIR_OprFact::intConst(offset), tmp); + base_op = tmp; + offset = 0; + } + + LIR_Address* a = new LIR_Address(base_op, index, LIR_Address::times_1, offset, T_BYTE); + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for(x->type()); + + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + crc.load_item_force(cc->at(0)); + __ move(addr, cc->at(1)); + len.load_item_force(cc->at(2)); + + __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + break; + } + default: { + ShouldNotReachHere(); + } + } +} + +// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f +// _i2b, _i2c, _i2s +void LIRGenerator::do_Convert(Convert* x) { + LIRItem value(x->value(), this); + value.load_item(); + LIR_Opr input = value.result(); + LIR_Opr result = rlock(x); + + // arguments of lir_convert + LIR_Opr conv_input = input; + LIR_Opr conv_result = result; + + switch (x->op()) { + case Bytecodes::_f2i: + case Bytecodes::_f2l: + __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT)); + break; + case Bytecodes::_d2i: + case Bytecodes::_d2l: + __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE)); + break; + default: + __ convert(x->op(), conv_input, conv_result); + break; + } + + assert(result->is_virtual(), "result must be virtual register"); + set_result(x, result); +} + +void LIRGenerator::do_NewInstance(NewInstance* x) { +#ifndef PRODUCT + if (PrintNotLoaded && !x->klass()->is_loaded()) { + tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); + } +#endif + CodeEmitInfo* info = state_for(x, x->state()); + LIR_Opr reg = result_register_for(x->type()); + new_instance(reg, x->klass(), x->is_unresolved(), + FrameMap::t0_oop_opr, + FrameMap::t1_oop_opr, + FrameMap::a4_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::a3_metadata_opr, info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem length(x->length(), this); + length.load_item_force(FrameMap::s0_opr); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::t0_oop_opr; + LIR_Opr tmp2 = FrameMap::t1_oop_opr; + LIR_Opr tmp3 = FrameMap::a5_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::a3_metadata_opr; + LIR_Opr len = length.result(); + BasicType elem_type = x->elt_type(); + + __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); + + CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { + LIRItem length(x->length(), this); + // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction + // and therefore provide the state before the parameters have been consumed + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + + CodeEmitInfo* info = state_for(x, x->state()); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::t0_oop_opr; + LIR_Opr tmp2 = FrameMap::t1_oop_opr; + LIR_Opr tmp3 = FrameMap::a5_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::a3_metadata_opr; + + length.load_item_force(FrameMap::s0_opr); + LIR_Opr len = length.result(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { + BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); + } + klass2reg_with_patching(klass_reg, obj, patching_info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { + Values* dims = x->dims(); + int i = dims->length(); + LIRItemList* items = new LIRItemList(i, NULL); + while (i-- > 0) { + LIRItem* size = new LIRItem(dims->at(i), this); + items->at_put(i, size); + } + + // Evaluate state_for early since it may emit code. + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + + // Cannot re-use same xhandlers for multiple CodeEmitInfos, so + // clone all handlers (NOTE: Usually this is handled transparently + // by the CodeEmitInfo cloning logic in CodeStub constructors but + // is done explicitly here because a stub isn't being used). + x->set_exception_handlers(new XHandlers(x->exception_handlers())); + } + CodeEmitInfo* info = state_for(x, x->state()); + + i = dims->length(); + while (i-- > 0) { + LIRItem* size = items->at(i); + size->load_item(); + + store_stack_parameter(size->result(), in_ByteSize(i*4)); + } + + LIR_Opr klass_reg = FrameMap::a0_metadata_opr; + klass2reg_with_patching(klass_reg, x->klass(), patching_info); + + LIR_Opr rank = FrameMap::s0_opr; + __ move(LIR_OprFact::intConst(x->rank()), rank); + LIR_Opr varargs = FrameMap::a2_opr; + __ move(FrameMap::sp_opr, varargs); + LIR_OprList* args = new LIR_OprList(3); + args->append(klass_reg); + args->append(rank); + args->append(varargs); + LIR_Opr reg = result_register_for(x->type()); + __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), + LIR_OprFact::illegalOpr, + reg, args, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_BlockBegin(BlockBegin* x) { + // nothing to do for now +} + +void LIRGenerator::do_CheckCast(CheckCast* x) { + LIRItem obj(x->obj(), this); + + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || + (PatchALot && !x->is_incompatible_class_change_check() && + !x->is_invokespecial_receiver_check())) { + // must do this before locking the destination register as an oop register, + // and before the obj is loaded (the latter is for deoptimization) + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + + // info for exceptions + CodeEmitInfo* info_for_exception = + (x->needs_exception_state() ? state_for(x) : + state_for(x, x->state_before(), true /*ignore_xhandler*/)); + + CodeStub* stub; + if (x->is_incompatible_class_change_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, + LIR_OprFact::illegalOpr, info_for_exception); + } else if (x->is_invokespecial_receiver_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new DeoptimizeStub(info_for_exception); + } else { + stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, + obj.result(), info_for_exception); + } + LIR_Opr reg = rlock_result(x); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded() || UseCompressedClassPointers) { + tmp3 = new_register(objectType); + } + __ checkcast(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), info_for_exception, patching_info, stub, + x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_InstanceOf(InstanceOf* x) { + LIRItem obj(x->obj(), this); + + // result and test object may not be in same register + LIR_Opr reg = rlock_result(x); + CodeEmitInfo* patching_info = NULL; + if ((!x->klass()->is_loaded() || PatchALot)) { + // must do this before locking the destination register as an oop register + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded() || UseCompressedClassPointers) { + tmp3 = new_register(objectType); + } + __ instanceof(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_If(If* x) { + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + bool is_safepoint = x->is_safepoint(); + + If::Condition cond = x->cond(); + + LIRItem xitem(x->x(), this); + LIRItem yitem(x->y(), this); + LIRItem* xin = &xitem; + LIRItem* yin = &yitem; + + if (tag == longTag) { + // for longs, only conditions "eql", "neq", "lss", "geq" are valid; + // mirror for other conditions + if (cond == If::gtr || cond == If::leq) { + cond = Instruction::mirror(cond); + xin = &yitem; + yin = &xitem; + } + xin->set_destroys_register(); + } + xin->load_item(); + + if (tag == longTag) { + if (yin->is_constant() && yin->get_jlong_constant() == 0) { + yin->dont_load_item(); + } else { + yin->load_item(); + } + } else if (tag == intTag) { + if (yin->is_constant() && yin->get_jint_constant() == 0) { + yin->dont_load_item(); + } else { + yin->load_item(); + } + } else { + yin->load_item(); + } + + set_no_result(x); + + LIR_Opr left = xin->result(); + LIR_Opr right = yin->result(); + + // add safepoint before generating condition code so it can be recomputed + if (x->is_safepoint()) { + // increment backedge counter if needed + increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci()); + __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); + } + + // Generate branch profiling. Profiling code doesn't kill flags. + profile_branch(x, cond, left, right); + move_to_phi(x->state()); + if (x->x()->type()->is_float_kind()) { + __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux(), x->usux()); + } else { + __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); +} + +LIR_Opr LIRGenerator::getThreadPointer() { + return FrameMap::as_pointer_opr(TREG); +} + +void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); } + +void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, + CodeEmitInfo* info) { + __ volatile_store_mem_reg(value, address, info); +} + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { + // 8179954: We need to make sure that the code generated for + // volatile accesses forms a sequentially-consistent set of + // operations when combined with STLR and LDAR. Without a leading + // membar it's possible for a simple Dekker test to fail if loads + // use LD;DMB but stores use STLR. This can happen if C2 compiles + // the stores in one method and C1 compiles the loads in another. + __ membar(); + __ volatile_load_mem_reg(address, result, info); +} + +void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset, + BasicType type, bool is_volatile) { + LIR_Address* addr = new LIR_Address(src, offset, type); + __ load(addr, dst); +} + +void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data, + BasicType type, bool is_volatile) { + LIR_Address* addr = new LIR_Address(src, offset, type); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); + if (is_obj) { + // Do the pre-write barrier, if any. + pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + __ move(data, addr); + assert(src->is_register(), "must be register"); + // Seems to be a precise address + post_barrier(LIR_OprFact::address(addr), data); + } else { + __ move(data, addr); + } +} + +void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) { + BasicType type = x->basic_type(); + LIRItem src(x->object(), this); + LIRItem off(x->offset(), this); + LIRItem value(x->value(), this); + + src.load_item(); + off.load_nonconstant(); + + // We can cope with a constant increment in an xadd + if (! (x->is_add() + && value.is_constant() + && can_inline_as_constant(x->value()))) { + value.load_item(); + } + + LIR_Opr dst = rlock_result(x, type); + LIR_Opr data = value.result(); + bool is_obj = (type == T_ARRAY || type == T_OBJECT); + LIR_Opr offset = off.result(); + + if (data == dst) { + LIR_Opr tmp = new_register(data->type()); + __ move(data, tmp); + data = tmp; + } + + LIR_Address* addr; + if (offset->is_constant()) { + jlong l = offset->as_jlong(); + assert((jlong)((jint)l) == l, "offset too large for constant"); + jint c = (jint)l; + addr = new LIR_Address(src.result(), c, type); + } else { + addr = new LIR_Address(src.result(), offset, type); + } + + LIR_Opr tmp = new_register(T_INT); + LIR_Opr ptr = LIR_OprFact::illegalOpr; + + if (x->is_add()) { + __ xadd(LIR_OprFact::address(addr), data, dst, tmp); + } else { + if (is_obj) { + // Do the pre-write barrier, if any. + ptr = new_pointer_register(); + __ add(src.result(), off.result(), ptr); + pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */, + true /* do_load */, false /* patch */, NULL); + } + __ xchg(LIR_OprFact::address(addr), data, dst, tmp); + if (is_obj) { + post_barrier(ptr, data); + } + } +} diff --git a/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp new file mode 100644 index 00000000000..f15dacafeba --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP + +inline bool LinearScan::is_processed_reg_num(int reg_num) { + return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; +} + +inline int LinearScan::num_physical_regs(BasicType type) { + return 1; +} + +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return false; +} + +inline bool LinearScan::is_caller_save(int assigned_reg) { + assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers"); + if (assigned_reg < pd_first_callee_saved_reg) + return true; + if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg) + return true; + if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg) + return true; + return false; +} + +inline void LinearScan::pd_add_temps(LIR_Op* op) {} + +// Implementation of LinearScanWalker +inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { + if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) { + assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only"); + _first_reg = pd_first_callee_saved_reg; + _last_reg = pd_last_callee_saved_reg; + return true; + } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || + cur->type() == T_ADDRESS || cur->type() == T_METADATA) { + _first_reg = pd_first_cpu_reg; + _last_reg = pd_last_allocatable_cpu_reg; + return true; + } + return false; +} + +#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp new file mode 100644 index 00000000000..219b2e3671c --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LinearScan.hpp" +#include "utilities/bitMap.inline.hpp" + +void LinearScan::allocate_fpu_stack() { + // No FPU stack on LoongArch64 +} diff --git a/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp new file mode 100644 index 00000000000..38ff4c58369 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP + +using MacroAssembler::build_frame; +using MacroAssembler::null_check; + +// C1_MacroAssembler contains high-level macros for C1 + + private: + int _rsp_offset; // track rsp changes + // initialization + void pd_init() { _rsp_offset = 0; } + + public: + void try_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); + void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2); + + // locking + // hdr : must be A0, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved + // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information + int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed + void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); + + void initialize_object( + Register obj, // result: pointer to object after successful allocation + Register klass, // object klass + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB + ); + + // allocation of fixed-size objects + // (can also be used to allocate fixed-size arrays, by setting + // hdr_size correctly and storing the array length afterwards) + // obj : will contain pointer to allocated object + // t1, t2 : scratch registers - contents destroyed + // header_size: size of object header in words + // object_size: total size of object in words + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_object(Register obj, Register t1, Register t2, int header_size, + int object_size, Register klass, Label& slow_case); + + enum { + max_array_allocation_length = 0x00FFFFFF + }; + + // allocation of arrays + // obj : will contain pointer to allocated object + // len : array length in number of elements + // t : scratch register - contents destroyed + // header_size: size of object header in words + // f : element scale factor + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, + int f, Register klass, Label& slow_case); + + int rsp_offset() const { return _rsp_offset; } + void set_rsp_offset(int n) { _rsp_offset = n; } + + void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3, + bool inv_a4, bool inv_a5) PRODUCT_RETURN; + + // This platform only uses signal-based null checks. The Label is not needed. + void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } + + void load_parameter(int offset_in_words, Register reg); + +#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp new file mode 100644 index 00000000000..b75126fba44 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp @@ -0,0 +1,346 @@ +/* + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T4 RT4 + +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + int null_check_offset = -1; + Label done; + + verify_oop(obj); + + // save object being locked into the BasicObjectLock + st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + assert(scratch != noreg, "should have scratch register at this point"); + null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case); + } else { + null_check_offset = offset(); + } + + // Load object header + ld_ptr(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked + ori(hdr, hdr, markOopDesc::unlocked_value); + // save unlocked object header into the displaced header location on the stack + st_ptr(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + lea(SCR2, Address(obj, hdr_offset)); + cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done); + // if the object header was the same, we're done + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) sp <= hdr + // 3) hdr <= sp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr - sp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + sub_d(hdr, hdr, SP); + li(SCR1, aligned_mask - os::vm_page_size()); + andr(hdr, hdr, SCR1); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) + st_ptr(hdr, Address(disp_hdr, 0)); + // otherwise we don't care about the result and handle locking via runtime call + bnez(hdr, slow_case); + // done + bind(done); + return null_check_offset; +} + +void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + + if (UseBiasedLocking) { + // load object + ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + biased_locking_exit(obj, hdr, done); + } + + // load displaced header + ld_ptr(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + beqz(hdr, done); + if (!UseBiasedLocking) { + // load object + ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + if (hdr_offset) { + lea(SCR1, Address(obj, hdr_offset)); + cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); + } else { + cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case); + } + // done + bind(done); +} + +// Defines obj, preserves var_size_in_bytes +void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, + int con_size_in_bytes, Register t1, Register t2, + Label& slow_case) { + if (UseTLAB) { + tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); + } else { + eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); + } +} + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, + Register t1, Register t2) { + assert_different_registers(obj, klass, len); + if (UseBiasedLocking && !len->is_valid()) { + assert_different_registers(obj, klass, len, t1, t2); + ld_ptr(t1, Address(klass, Klass::prototype_header_offset())); + } else { + // This assumes that all prototype bits fit in an int32_t + li(t1, (int32_t)(intptr_t)markOopDesc::prototype()); + } + st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes())); + + if (UseCompressedClassPointers) { // Take care not to kill klass + encode_klass_not_null(t1, klass); + st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes())); + } else { + st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes())); + } + + if (len->is_valid()) { + st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); + } else if (UseCompressedClassPointers) { + store_klass_gap(obj, R0); + } +} + +// preserves obj, destroys len_in_bytes +// +// Scratch registers: t1 = T0, t2 = T1 +// +void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, + int hdr_size_in_bytes, Register t1, Register t2) { + assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); + assert(t1 == T0 && t2 == T1, "must be"); + Label done; + + // len_in_bytes is positive and ptr sized + addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes); + beqz(len_in_bytes, done); + + // zero_words() takes ptr in t1 and count in bytes in t2 + lea(t1, Address(obj, hdr_size_in_bytes)); + addi_d(t2, len_in_bytes, -BytesPerWord); + + Label loop; + bind(loop); + stx_d(R0, t1, t2); + addi_d(t2, t2, -BytesPerWord); + bge(t2, R0, loop); + + bind(done); +} + +void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, + int object_size, Register klass, Label& slow_case) { + assert_different_registers(obj, t1, t2); + assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); + + try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); + + initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB); +} + +// Scratch registers: t1 = T0, t2 = T1 +void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, + int con_size_in_bytes, Register t1, Register t2, + bool is_tlab_allocated) { + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, + "con_size_in_bytes is not multiple of alignment"); + const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; + + initialize_header(obj, klass, noreg, t1, t2); + + if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { + // clear rest of allocated space + const Register index = t2; + if (var_size_in_bytes != noreg) { + move(index, var_size_in_bytes); + initialize_body(obj, index, hdr_size_in_bytes, t1, t2); + } else if (con_size_in_bytes > hdr_size_in_bytes) { + con_size_in_bytes -= hdr_size_in_bytes; + lea(t1, Address(obj, hdr_size_in_bytes)); + Label loop; + li(SCR1, con_size_in_bytes - BytesPerWord); + bind(loop); + stx_d(R0, t1, SCR1); + addi_d(SCR1, SCR1, -BytesPerWord); + bge(SCR1, R0, loop); + } + } + + membar(StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == A0, "must be"); + call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); + } + + verify_oop(obj); +} + +void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, + int header_size, int f, Register klass, Label& slow_case) { + assert_different_registers(obj, len, t1, t2, klass); + + // determine alignment mask + assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); + + // check for negative or excessive length + li(SCR1, (int32_t)max_array_allocation_length); + bge_far(len, SCR1, slow_case, false); + + const Register arr_size = t2; // okay to be the same + // align object end + li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); + slli_w(SCR1, len, f); + add_d(arr_size, arr_size, SCR1); + bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); + + try_allocate(obj, arr_size, 0, t1, t2, slow_case); + + initialize_header(obj, klass, len, t1, t2); + + // clear rest of allocated space + initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2); + + membar(StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == A0, "must be"); + call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type); + } + + verify_oop(obj); +} + +void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) { + assert(bang_size_in_bytes >= framesize, "stack bang size incorrect"); + // Make sure there is enough stack space for this method's activation. + // Note that we do this before creating a frame. + generate_stack_overflow_check(bang_size_in_bytes); + MacroAssembler::build_frame(framesize); +} + +void C1_MacroAssembler::remove_frame(int framesize) { + MacroAssembler::remove_frame(framesize); +} + +void C1_MacroAssembler::verified_entry() { + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a b, bl, nop, break. + // Make it a NOP. + nop(); +} + +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { + // rbp, + 0: link + // + 1: return address + // + 2: argument with offset 0 + // + 3: argument with offset 1 + // + 4: ... + + ld_ptr(reg, Address(FP, (offset_in_words + 2) * BytesPerWord)); +} + +#ifndef PRODUCT +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + if (!VerifyOops) return; + verify_oop_addr(Address(SP, stack_offset), "oop"); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + if (!VerifyOops) return; + Label not_null; + bnez(r, not_null); + stop("non-null oop required"); + bind(not_null); + verify_oop(r); +} + +void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, + bool inv_a3, bool inv_a4, bool inv_a5) { +#ifdef ASSERT + static int nn; + if (inv_a0) li(A0, 0xDEAD); + if (inv_s0) li(S0, 0xDEAD); + if (inv_a2) li(A2, nn++); + if (inv_a3) li(A3, 0xDEAD); + if (inv_a4) li(A4, 0xDEAD); + if (inv_a5) li(A5, 0xDEAD); +#endif +} +#endif // ifndef PRODUCT diff --git a/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp new file mode 100644 index 00000000000..a750dca323b --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp @@ -0,0 +1,1252 @@ +/* + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Defs.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "compiler/disassembler.hpp" +#include "compiler/oopMap.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/universe.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "register_loongarch.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_loongarch.inline.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#endif + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T5 RT5 +#define T6 RT6 +#define T8 RT8 + +// Implementation of StubAssembler + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { + // setup registers + assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, + "registers must be different"); + assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different"); + assert(args_size >= 0, "illegal args_size"); + bool align_stack = false; + + move(A0, TREG); + set_num_rt_args(0); // Nothing on stack + + Label retaddr; + set_last_Java_frame(SP, FP, retaddr); + + // do the call + call(entry, relocInfo::runtime_call_type); + bind(retaddr); + int call_offset = offset(); + // verify callee-saved register +#ifdef ASSERT + { Label L; + get_thread(SCR1); + beq(TREG, SCR1, L); + stop("StubAssembler::call_RT: TREG not callee saved?"); + bind(L); + } +#endif + reset_last_Java_frame(true); + + // check for pending exceptions + { Label L; + // check for pending exceptions (java_thread is set upon return) + ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset()))); + beqz(SCR1, L); + // exception pending => remove activation and forward to exception handler + // make sure that the vm_results are cleared + if (oop_result1->is_valid()) { + st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); + } + if (metadata_result->is_valid()) { + st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); + } + if (frame_size() == no_frame_size) { + leave(); + jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + } else if (_stub_id == Runtime1::forward_exception_id) { + should_not_reach_here(); + } else { + jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type); + } + bind(L); + } + // get oop results if there are any and reset the values in the thread + if (oop_result1->is_valid()) { + get_vm_result(oop_result1, TREG); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result, TREG); + } + return call_offset; +} + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, + address entry, Register arg1) { + move(A1, arg1); + return call_RT(oop_result1, metadata_result, entry, 1); +} + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, + address entry, Register arg1, Register arg2) { + if (A1 == arg2) { + if (A2 == arg1) { + move(SCR1, arg1); + move(arg1, arg2); + move(arg2, SCR1); + } else { + move(A2, arg2); + move(A1, arg1); + } + } else { + move(A1, arg1); + move(A2, arg2); + } + return call_RT(oop_result1, metadata_result, entry, 2); +} + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, + address entry, Register arg1, Register arg2, Register arg3) { + // if there is any conflict use the stack + if (arg1 == A2 || arg1 == A3 || + arg2 == A1 || arg2 == A3 || + arg3 == A1 || arg3 == A2) { + addi_d(SP, SP, -4 * wordSize); + st_ptr(arg1, Address(SP, 0 * wordSize)); + st_ptr(arg2, Address(SP, 1 * wordSize)); + st_ptr(arg3, Address(SP, 2 * wordSize)); + ld_ptr(arg1, Address(SP, 0 * wordSize)); + ld_ptr(arg2, Address(SP, 1 * wordSize)); + ld_ptr(arg3, Address(SP, 2 * wordSize)); + addi_d(SP, SP, 4 * wordSize); + } else { + move(A1, arg1); + move(A2, arg2); + move(A3, arg3); + } + return call_RT(oop_result1, metadata_result, entry, 3); +} + +// Implementation of StubFrame + +class StubFrame: public StackObj { + private: + StubAssembler* _sasm; + + public: + StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); +};; + +#define __ _sasm-> + +StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { + _sasm = sasm; + __ set_info(name, must_gc_arguments); + __ enter(); +} + +// load parameters that were stored with LIR_Assembler::store_parameter +// Note: offsets for store_parameter and load_argument must match +void StubFrame::load_argument(int offset_in_words, Register reg) { + __ load_parameter(offset_in_words, reg); +} + +StubFrame::~StubFrame() { + __ leave(); + __ jr(RA); +} + +#undef __ + +// Implementation of Runtime1 + +#define __ sasm-> + +const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2; + +// Stack layout for saving/restoring all the registers needed during a runtime +// call (this includes deoptimization) +// Note: note that users of this frame may well have arguments to some runtime +// while these values are on the stack. These positions neglect those arguments +// but the code in save_live_registers will take the argument count into +// account. +// + +enum reg_save_layout { + reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */ +}; + +// Save off registers which might be killed by calls into the runtime. +// Tries to smart of about FP registers. In particular we separate +// saving and describing the FPU registers for deoptimization since we +// have to save the FPU registers twice if we describe them. The +// deopt blob is the only thing which needs to describe FPU registers. +// In all other cases it should be sufficient to simply save their +// current value. + +static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; +static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; +static int reg_save_size_in_words; +static int frame_size_in_bytes = -1; + +static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { + int frame_size_in_bytes = reg_save_frame_size * BytesPerWord; + sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); + int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + + for (int i = A0->encoding(); i <= T8->encoding(); i++) { + Register r = as_Register(i); + if (i != SCR1->encoding() && i != SCR2->encoding()) { + int sp_offset = cpu_reg_save_offsets[i]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); + } + } + + if (save_fpu_registers) { + for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { + FloatRegister r = as_FloatRegister(i); + int sp_offset = fpu_reg_save_offsets[i]; + oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); + } + } + + return oop_map; +} + +static OopMap* save_live_registers(StubAssembler* sasm, + bool save_fpu_registers = true) { + __ block_comment("save_live_registers"); + + // integer registers except zr & ra & tp & sp + __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize); + + for (int i = 4; i < 32; i++) + __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); + + if (save_fpu_registers) { + for (int i = 0; i < 32; i++) + __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize)); + } + + return generate_oop_map(sasm, save_fpu_registers); +} + +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (restore_fpu_registers) { + for (int i = 0; i < 32; i ++) + __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); + } + + for (int i = 4; i < 32; i++) + __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); + + __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); +} + +static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true) { + if (restore_fpu_registers) { + for (int i = 0; i < 32; i ++) + __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize)); + } + + for (int i = 5; i < 32; i++) + __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize)); + + __ addi_d(SP, SP, (32 - 4 + 32) * wordSize); +} + +void Runtime1::initialize_pd() { + int sp_offset = 0; + int i; + + // all float registers are saved explicitly + assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here"); + for (i = 0; i < FrameMap::nof_fpu_regs; i++) { + fpu_reg_save_offsets[i] = sp_offset; + sp_offset += 2; // SP offsets are in halfwords + } + + for (i = 4; i < FrameMap::nof_cpu_regs; i++) { + Register r = as_Register(i); + cpu_reg_save_offsets[i] = sp_offset; + sp_offset += 2; // SP offsets are in halfwords + } +} + +// target: the entry point of the method that creates and posts the exception oop +// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2) + +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, + bool has_argument) { + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm); + int call_offset; + if (!has_argument) { + call_offset = __ call_RT(noreg, noreg, target); + } else { + __ move(A1, SCR1); + __ move(A2, SCR2); + call_offset = __ call_RT(noreg, noreg, target); + } + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + return oop_maps; +} + +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + __ block_comment("generate_handle_exception"); + + // incoming parameters + const Register exception_oop = A0; + const Register exception_pc = A1; + // other registers used in this stub + + // Save registers, if required. + OopMapSet* oop_maps = new OopMapSet(); + OopMap* oop_map = NULL; + switch (id) { + case forward_exception_id: + // We're handling an exception in the context of a compiled frame. + // The registers have been saved in the standard places. Perform + // an exception lookup in the caller and dispatch to the handler + // if found. Otherwise unwind and dispatch to the callers + // exception handler. + oop_map = generate_oop_map(sasm, 1 /*thread*/); + + // load and clear pending exception oop into A0 + __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset())); + __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset())); + + // load issuing PC (the return address for this stub) into A1 + __ ld_ptr(exception_pc, Address(FP, 1 * BytesPerWord)); + + // make sure that the vm_results are cleared (may be unnecessary) + __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset())); + __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset())); + break; + case handle_exception_nofpu_id: + case handle_exception_id: + // At this point all registers MAY be live. + oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: { + // At this point all registers except exception oop (A0) and + // exception pc (RA) are dead. + const int frame_size = 2 /*fp, return address*/; + oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); + sasm->set_frame_size(frame_size); + break; + } + default: ShouldNotReachHere(); + } + + // verify that only A0 and A1 are valid at this time + __ invalidate_registers(false, true, true, true, true, true); + // verify that A0 contains a valid exception + __ verify_not_null_oop(exception_oop); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are + // empty before writing to them + Label oop_empty; + __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); + __ beqz(SCR1, oop_empty); + __ stop("exception oop already set"); + __ bind(oop_empty); + + Label pc_empty; + __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); + __ beqz(SCR1, pc_empty); + __ stop("exception pc already set"); + __ bind(pc_empty); +#endif + + // save exception oop and issuing pc into JavaThread + // (exception handler will load it from here) + __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset())); + __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset())); + + // patch throwing pc into return address (has bci & oop map) + __ st_ptr(exception_pc, Address(FP, 1 * BytesPerWord)); + + // compute the exception handler. + // the exception oop and the throwing pc are read from the fields in JavaThread + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); + oop_maps->add_gc_map(call_offset, oop_map); + + // A0: handler address + // will be the deopt blob if nmethod was deoptimized while we looked up + // handler regardless of whether handler existed in the nmethod. + + // only A0 is valid at this time, all other registers have been destroyed by the runtime call + __ invalidate_registers(false, true, true, true, true, true); + + // patch the return address, this stub will directly return to the exception handler + __ st_ptr(A0, Address(FP, 1 * BytesPerWord)); + + switch (id) { + case forward_exception_id: + case handle_exception_nofpu_id: + case handle_exception_id: + // Restore the registers that were saved at the beginning. + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: + break; + default: ShouldNotReachHere(); + } + + return oop_maps; +} + +void Runtime1::generate_unwind_exception(StubAssembler *sasm) { + // incoming parameters + const Register exception_oop = A0; + // callee-saved copy of exception_oop during runtime call + const Register exception_oop_callee_saved = S0; + // other registers used in this stub + const Register exception_pc = A1; + const Register handler_addr = A3; + + // verify that only A0, is valid at this time + __ invalidate_registers(false, true, true, true, true, true); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset())); + __ beqz(SCR1, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); + __ beqz(SCR1, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Save our return address because + // exception_handler_for_return_address will destroy it. We also + // save exception_oop + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(RA, Address(SP, 0 * wordSize)); + __ st_ptr(exception_oop, Address(SP, 1 * wordSize)); + + // search the exception handler address of the caller (using the return address) + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA); + // V0: exception handler address of the caller + + // Only V0 is valid at this time; all other registers have been + // destroyed by the call. + __ invalidate_registers(false, true, true, true, false, true); + + // move result of call into correct register + __ move(handler_addr, A0); + + // get throwing pc (= return address). + // RA has been destroyed by the call + __ ld_ptr(RA, Address(SP, 0 * wordSize)); + __ ld_ptr(exception_oop, Address(SP, 1 * wordSize)); + __ addi_d(SP, SP, 2 * wordSize); + __ move(A1, RA); + + __ verify_not_null_oop(exception_oop); + + // continue at exception handler (return address removed) + // note: do *not* remove arguments when unwinding the + // activation since the caller assumes having + // all arguments on the stack when entering the + // runtime to determine the exception handler + // (GC happens at call site with arguments!) + // A0: exception oop + // A1: throwing pc + // A3: exception handler + __ jr(handler_addr); +} + +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + // use the maximum number of runtime-arguments here because it is difficult to + // distinguish each RT-Call. + // Note: This number affects also the RT-Call in generate_handle_exception because + // the oop-map is shared for all calls. + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + OopMap* oop_map = save_live_registers(sasm); + + __ move(A0, TREG); + Label retaddr; + __ set_last_Java_frame(SP, FP, retaddr); + // do the call + __ call(target, relocInfo::runtime_call_type); + __ bind(retaddr); + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(__ offset(), oop_map); + // verify callee-saved register +#ifdef ASSERT + { Label L; + __ get_thread(SCR1); + __ beq(TREG, SCR1, L); + __ stop("StubAssembler::call_RT: rthread not callee saved?"); + __ bind(L); + } +#endif + + __ reset_last_Java_frame(true); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset())); + __ beqz(SCR1, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset())); + __ beqz(SCR1, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Runtime will return true if the nmethod has been deoptimized, this is the + // expected scenario and anything else is an error. Note that we maintain a + // check on the result purely as a defensive measure. + Label no_deopt; + __ beqz(A0, no_deopt); // Have we deoptimized? + + // Perform a re-execute. The proper return address is already on the stack, + // we just need to restore registers, pop all of our frame but the return + // address and jump to the deopt blob. + restore_live_registers(sasm); + __ leave(); + __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); + + __ bind(no_deopt); + restore_live_registers(sasm); + __ leave(); + __ jr(RA); + + return oop_maps; +} + +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + // for better readability + const bool must_gc_arguments = true; + const bool dont_gc_arguments = false; + + // default value; overwritten for some optimized stubs that are called + // from methods that do not use the fpu + bool save_fpu_registers = true; + + // stub code & info for the different stubs + OopMapSet* oop_maps = NULL; + OopMap* oop_map = NULL; + switch (id) { + { + case forward_exception_id: + { + oop_maps = generate_handle_exception(id, sasm); + __ leave(); + __ jr(RA); + } + break; + + case throw_div0_exception_id: + { + StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: + { + StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; + + case new_instance_id: + case fast_new_instance_id: + case fast_new_instance_init_check_id: + { + Register klass = A3; // Incoming + Register obj = A0; // Result + + if (id == new_instance_id) { + __ set_info("new_instance", dont_gc_arguments); + } else if (id == fast_new_instance_id) { + __ set_info("fast new_instance", dont_gc_arguments); + } else { + assert(id == fast_new_instance_init_check_id, "bad StubID"); + __ set_info("fast new_instance init check", dont_gc_arguments); + } + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && + !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Label slow_path; + Register obj_size = S0; + Register t1 = T0; + Register t2 = T1; + assert_different_registers(klass, obj, obj_size, t1, t2); + + __ addi_d(SP, SP, -2 * wordSize); + __ st_ptr(S0, Address(SP, 0)); + + if (id == fast_new_instance_init_check_id) { + // make sure the klass is initialized + __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset())); + __ li(SCR2, InstanceKlass::fully_initialized); + __ bne_far(SCR1, SCR2, slow_path); + } + +#ifdef ASSERT + // assert object can be fast path allocated + { + Label ok, not_ok; + __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); + __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0) + __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit); + __ beqz(SCR1, ok); + __ bind(not_ok); + __ stop("assert(can be fast path allocated)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // get the instance size (size is postive so movl is fine for 64bit) + __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset())); + + __ eden_allocate(obj, obj_size, 0, t1, slow_path); + + __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false); + __ verify_oop(obj); + __ ld_ptr(S0, Address(SP, 0)); + __ addi_d(SP, SP, 2 * wordSize); + __ jr(RA); + + __ bind(slow_path); + __ ld_ptr(S0, Address(SP, 0)); + __ addi_d(SP, SP, 2 * wordSize); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_a0(sasm); + __ verify_oop(obj); + __ leave(); + __ jr(RA); + + // A0,: new instance + } + + break; + + case counter_overflow_id: + { + Register bci = A0, method = A1; + __ enter(); + OopMap* map = save_live_registers(sasm); + // Retrieve bci + __ ld_w(bci, Address(FP, 2 * BytesPerWord)); + // And a pointer to the Method* + __ ld_d(method, Address(FP, 3 * BytesPerWord)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + __ jr(RA); + } + break; + + case new_type_array_id: + case new_object_array_id: + { + Register length = S0; // Incoming + Register klass = A3; // Incoming + Register obj = A0; // Result + + if (id == new_type_array_id) { + __ set_info("new_type_array", dont_gc_arguments); + } else { + __ set_info("new_object_array", dont_gc_arguments); + } + +#ifdef ASSERT + // assert object type is really an array of the proper kind + { + Label ok; + Register t0 = obj; + __ ld_w(t0, Address(klass, Klass::layout_helper_offset())); + __ srai_w(t0, t0, Klass::_lh_array_tag_shift); + int tag = ((id == new_type_array_id) + ? Klass::_lh_array_tag_type_value + : Klass::_lh_array_tag_obj_value); + __ li(SCR1, tag); + __ beq(t0, SCR1, ok); + __ stop("assert(is an array klass)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Register arr_size = A5; + Register t1 = T0; + Register t2 = T1; + Label slow_path; + assert_different_registers(length, klass, obj, arr_size, t1, t2); + + // check that array length is small enough for fast path. + __ li(SCR1, C1_MacroAssembler::max_array_allocation_length); + __ blt_far(SCR1, length, slow_path, false); + + // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) + // since size is positive ldrw does right thing on 64bit + __ ld_w(t1, Address(klass, Klass::layout_helper_offset())); + // since size is positive movw does right thing on 64bit + __ move(arr_size, length); + __ sll_w(arr_size, length, t1); + __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift + + exact_log2(Klass::_lh_header_size_mask + 1) - 1, + Klass::_lh_header_size_shift); + __ add_d(arr_size, arr_size, t1); + __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up + __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0); + + __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size + + __ initialize_header(obj, klass, length, t1, t2); + __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); + assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); + assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); + __ andi(t1, t1, Klass::_lh_header_size_mask); + __ sub_d(arr_size, arr_size, t1); // body length + __ add_d(t1, t1, obj); // body start + __ initialize_body(t1, arr_size, 0, t1, t2); + __ membar(Assembler::StoreStore); + __ verify_oop(obj); + + __ jr(RA); + + __ bind(slow_path); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + int call_offset; + if (id == new_type_array_id) { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); + } else { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_a0(sasm); + + __ verify_oop(obj); + __ leave(); + __ jr(RA); + + // A0: new array + } + break; + + case new_multi_array_id: + { + StubFrame f(sasm, "new_multi_array", dont_gc_arguments); + // A0,: klass + // S0,: rank + // A2: address of 1st dimension + OopMap* map = save_live_registers(sasm); + __ move(A1, A0); + __ move(A3, A2); + __ move(A2, S0); + int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_a0(sasm); + + // A0,: new multi array + __ verify_oop(A0); + } + break; + + case register_finalizer_id: + { + __ set_info("register_finalizer", dont_gc_arguments); + + // This is called via call_runtime so the arguments + // will be place in C abi locations + + __ verify_oop(A0); + + // load the klass and check the has finalizer flag + Label register_finalizer; + Register t = A5; + __ load_klass(t, A0); + __ ld_w(t, Address(t, Klass::access_flags_offset())); + __ li(SCR1, JVM_ACC_HAS_FINALIZER); + __ andr(SCR1, t, SCR1); + __ bnez(SCR1, register_finalizer); + __ jr(RA); + + __ bind(register_finalizer); + __ enter(); + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + // Now restore all the live registers + restore_live_registers(sasm); + + __ leave(); + __ jr(RA); + } + break; + + case throw_class_cast_exception_id: + { + StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { + StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } + break; + + case slow_subtype_check_id: + { + // Typical calling sequence: + // __ push(klass_RInfo); // object klass or other subclass + // __ push(sup_k_RInfo); // array element klass or other superclass + // __ bl(slow_subtype_check); + // Note that the subclass is pushed first, and is therefore deepest. + enum layout { + a0_off, a0_off_hi, + a2_off, a2_off_hi, + a4_off, a4_off_hi, + a5_off, a5_off_hi, + sup_k_off, sup_k_off_hi, + klass_off, klass_off_hi, + framesize, + result_off = sup_k_off + }; + + __ set_info("slow_subtype_check", dont_gc_arguments); + __ addi_d(SP, SP, -4 * wordSize); + __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); + __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); + __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); + __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); + + // This is called by pushing args and not with C abi + __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass + __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass + + Label miss; + __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss); + + // fallthrough on success: + __ li(SCR1, 1); + __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result + __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); + __ addi_d(SP, SP, 4 * wordSize); + __ jr(RA); + + __ bind(miss); + __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result + __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size)); + __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size)); + __ addi_d(SP, SP, 4 * wordSize); + __ jr(RA); + } + break; + + case monitorenter_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorenter_id: + { + StubFrame f(sasm, "monitorenter", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + + // Called with store_parameter and not C abi + + f.load_argument(1, A0); // A0,: object + f.load_argument(0, A1); // A1,: lock address + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case monitorexit_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorexit_id: + { + StubFrame f(sasm, "monitorexit", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + + // Called with store_parameter and not C abi + + f.load_argument(0, A0); // A0,: lock address + + // note: really a leaf routine but must setup last java sp + // => use call_RT for now (speed can be improved by + // doing last java sp setup manually) + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case deoptimize_id: + { + StubFrame f(sasm, "deoptimize", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + f.load_argument(0, A1); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + __ leave(); + __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); + } + break; + + case throw_range_check_failed_id: + { + StubFrame f(sasm, "range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; + + case unwind_exception_id: + { + __ set_info("unwind_exception", dont_gc_arguments); + // note: no stubframe since we are about to leave the current + // activation and we are calling a leaf VM function only. + generate_unwind_exception(sasm); + } + break; + + case access_field_patching_id: + { + StubFrame f(sasm, "access_field_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } + break; + + case load_klass_patching_id: + { + StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } + break; + + case load_mirror_patching_id: + { + StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } + break; + + case load_appendix_patching_id: + { + StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } + break; + + case handle_exception_nofpu_id: + case handle_exception_id: + { + StubFrame f(sasm, "handle_exception", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case handle_exception_from_callee_id: + { + StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case throw_index_exception_id: + { + StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { + StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); + } + break; + +#if INCLUDE_ALL_GCS + + case g1_pre_barrier_slow_id: + { + StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments); + // arg0 : previous value of memory + + BarrierSet* bs = Universe::heap()->barrier_set(); + if (bs->kind() != BarrierSet::G1SATBCTLogging) { + __ li(A0, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0); + __ should_not_reach_here(); + break; + } + + const Register pre_val = A0; + const Register thread = TREG; + const Register tmp = SCR2; + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + + Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + Label done; + Label runtime; + + // Can we store original value in the thread's buffer? + __ ld_ptr(tmp, queue_index); + __ beqz(tmp, runtime); + + __ addi_d(tmp, tmp, -wordSize); + __ st_ptr(tmp, queue_index); + __ ld_ptr(SCR1, buffer); + __ add_d(tmp, tmp, SCR1); + f.load_argument(0, SCR1); + __ st_ptr(SCR1, Address(tmp, 0)); + __ b(done); + + __ bind(runtime); + __ pushad(); + f.load_argument(0, pre_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); + __ popad(); + __ bind(done); + } + break; + case g1_post_barrier_slow_id: + { + StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments); + + // arg0: store_address + Address store_addr(FP, 2*BytesPerWord); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regions. + // Must check to see if card is already dirty + + const Register thread = TREG; + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + const Register card_offset = SCR2; + // RA is free here, so we can use it to hold the byte_map_base. + const Register byte_map_base = RA; + + assert_different_registers(card_offset, byte_map_base, SCR1); + + f.load_argument(0, card_offset); + __ srli_d(card_offset, card_offset, CardTableModRefBS::card_shift); + __ load_byte_map_base(byte_map_base); + __ ldx_bu(SCR1, byte_map_base, card_offset); + __ addi_d(SCR1, SCR1, -(int)G1SATBCardTableModRefBS::g1_young_card_val()); + __ beqz(SCR1, done); + + assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0"); + + __ membar(Assembler::StoreLoad); + __ ldx_bu(SCR1, byte_map_base, card_offset); + __ beqz(SCR1, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + __ stx_b(R0, byte_map_base, card_offset); + + // Convert card offset into an address in card_addr + Register card_addr = card_offset; + __ add_d(card_addr, byte_map_base, card_addr); + + __ ld_ptr(SCR1, queue_index); + __ beqz(SCR1, runtime); + __ addi_d(SCR1, SCR1, -wordSize); + __ st_ptr(SCR1, queue_index); + + // Reuse RA to hold buffer_addr + const Register buffer_addr = RA; + + __ ld_ptr(buffer_addr, buffer); + __ stx_d(card_addr, buffer_addr, SCR1); + __ b(done); + + __ bind(runtime); + __ pushad(); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + __ popad(); + __ bind(done); + + } + break; +#endif + + case predicate_failed_trap_id: + { + StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); + + OopMap* map = save_live_registers(sasm); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type); + } + break; + + case dtrace_object_alloc_id: + { + // A0: object + StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments); + save_live_registers(sasm); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0); + + restore_live_registers(sasm); + } + break; + + default: + { + StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); + __ li(A0, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0); + __ should_not_reach_here(); + } + break; + } + } + return oop_maps; +} + +#undef __ + +const char *Runtime1::pd_name_for_address(address entry) { + Unimplemented(); + return 0; +} diff --git a/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp new file mode 100644 index 00000000000..df052a058c8 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + +#ifndef COMPILER2 +define_pd_global(bool, BackgroundCompilation, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); +define_pd_global(bool, UseOnStackReplacement, true ); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 1500 ); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); +define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); +define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(bool, ProfileInterpreter, false); +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(bool, NeverActAsServerClassMachine, true ); +define_pd_global(uint64_t,MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); +#endif // !COMPILER2 +define_pd_global(bool, UseTypeProfile, false); +define_pd_global(bool, RoundFPResults, true ); + +define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, false); +define_pd_global(bool, TwoOperandLIRForm, false ); + +define_pd_global(intx, SafepointPollOffset, 0 ); + +#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp new file mode 100644 index 00000000000..044b0d2536d --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, UseTLAB, true); +define_pd_global(bool, ResizeTLAB, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +#ifdef CC_INTERP +define_pd_global(bool, ProfileInterpreter, false); +#else +define_pd_global(bool, ProfileInterpreter, true); +#endif // CC_INTERP +define_pd_global(bool, TieredCompilation, true); +define_pd_global(intx, CompileThreshold, 10000); +define_pd_global(intx, BackEdgeThreshold, 100000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 3); +define_pd_global(intx, FLOATPRESSURE, 6); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(intx, MinJumpTableSize, 10); +define_pd_global(intx, INTPRESSURE, 13); +define_pd_global(intx, InteriorEntryAlignment, 16); +define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, LoopUnrollLimit, 60); +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(intx, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t,MaxRAM, 128ULL*G); +define_pd_global(intx, RegisterCostAreaRatio, 16000); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, false); +define_pd_global(bool, OptoScheduling, false); +define_pd_global(bool, OptoBundling, false); + +define_pd_global(intx, ReservedCodeCacheSize, 48*M); +define_pd_global(uintx, CodeCacheMinBlockLength, 4); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +define_pd_global(bool, TrapBasedRangeChecks, false); + +// Heap related flags +define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +#endif // CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp new file mode 100644 index 00000000000..c7bf590b60d --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" + +// processor dependent initialization for LoongArch + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); +} diff --git a/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp new file mode 100644 index 00000000000..652f6c10926 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp new file mode 100644 index 00000000000..70a47fc7722 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" + +// Release the CompiledICHolder* associated with this call site is there is one. +void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) { + // This call site might have become stale so inspect it carefully. + NativeCall* call = nativeCall_at(call_site->addr()); + if (is_icholder_entry(call->destination())) { + NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value()); + InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data()); + } +} + +bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) { + // This call site might have become stale so inspect it carefully. + NativeCall* call = nativeCall_at(call_site->addr()); + return is_icholder_entry(call->destination()); +} + +// ---------------------------------------------------------------------------- + +#define __ _masm. +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) { + address mark = cbuf.insts_mark(); // get mark within main instrs section + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); + if (base == NULL) return NULL; // CodeBuffer::expand failed + + // static stub relocation stores the instruction address of the call + __ relocate(static_stub_Relocation::spec(mark), 0); + + // Code stream for loading method may be changed. + __ ibar(0); + + // Rmethod contains methodOop, it should be relocated for GC + // static stub relocation also tags the methodOop in the code-stream. + __ mov_metadata(Rmethod, NULL); + // This is recognized as unresolved by relocs/nativeInst/ic code + + cbuf.set_insts_mark(); + __ patchable_jump(__ pc()); + // Update current stubs pointer and restore code_end. + __ end_a_stub(); + return base; +} +#undef __ + +int CompiledStaticCall::to_interp_stub_size() { + return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size; +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 16; +} + +void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); +#ifndef LOONGARCH64 + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); +#else + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); +#endif + + assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), + "a) MT-unsafe modification of inline cache"); + assert(jump->jump_destination() == jump->instruction_address() || jump->jump_destination() == entry, + "b) MT-unsafe modification of inline cache"); + + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); +#ifndef LOONGARCH64 + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); +#else + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); +#endif + method_holder->set_data(0); + jump->set_jump_destination(jump->instruction_address()); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledStaticCall::verify() { + // Verify call. + NativeCall::verify(); + if (os::is_MP()) { + verify_alignment(); + } + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); +#ifndef LOONGARCH64 + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); +#else + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); +#endif + + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT diff --git a/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp new file mode 100644 index 00000000000..cb655401395 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP + +// Inline functions for memory copy and fill. + +// Contains inline asm implementations +#ifdef TARGET_OS_ARCH_linux_loongarch +# include "copy_linux_loongarch.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_solaris_loongarch +# include "copy_solaris_loongarch.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_windows_loongarch +# include "copy_windows_loongarch.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_bsd_loongarch +# include "copy_bsd_loongarch.inline.hpp" +#endif +// Inline functions for memory copy and fill. + +// Contains inline asm implementations + +// Template for atomic, element-wise copy. +template +static void copy_conjoint_atomic(const T* from, T* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +#endif //CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp new file mode 100644 index 00000000000..45d86f5bfed --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP + + protected: + +#if 0 + address generate_asm_interpreter_entry(bool synchronized); + address generate_native_entry(bool synchronized); + address generate_abstract_entry(void); + address generate_math_entry(AbstractInterpreter::MethodKind kind); + address generate_empty_entry(void); + address generate_accessor_entry(void); + void lock_method(void); + void generate_stack_overflow_check(void); + + void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); + void generate_counter_overflow(Label* do_continue); +#endif + + void generate_more_monitors(); + void generate_deopt_handling(); + address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only + void generate_compute_interpreter_state(const Register state, + const Register prev_state, + const Register sender_sp, + bool native); // C++ interpreter only + +#endif // CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp new file mode 100644 index 00000000000..d6c0df3b77e --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/cppInterpreter.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#ifdef SHARK +#include "shark/shark_globals.hpp" +#endif + +#ifdef CC_INTERP + +// Routine exists to make tracebacks look decent in debugger +// while "shadow" interpreter frames are on stack. It is also +// used to distinguish interpreter frames. + +extern "C" void RecursiveInterpreterActivation(interpreterState istate) { + ShouldNotReachHere(); +} + +bool CppInterpreter::contains(address pc) { + Unimplemented(); +} + +#define STATE(field_name) Lstate, in_bytes(byte_offset_of(BytecodeInterpreter, field_name)) +#define __ _masm-> + +Label frame_manager_entry; +Label fast_accessor_slow_entry_path; // fast accessor methods need to be able to jmp to unsynchronized + // c++ interpreter entry point this holds that entry point label. + +static address unctrap_frame_manager_entry = NULL; + +static address interpreter_return_address = NULL; +static address deopt_frame_manager_return_atos = NULL; +static address deopt_frame_manager_return_btos = NULL; +static address deopt_frame_manager_return_itos = NULL; +static address deopt_frame_manager_return_ltos = NULL; +static address deopt_frame_manager_return_ftos = NULL; +static address deopt_frame_manager_return_dtos = NULL; +static address deopt_frame_manager_return_vtos = NULL; + +const Register prevState = G1_scratch; + +void InterpreterGenerator::save_native_result(void) { + Unimplemented(); +} + +void InterpreterGenerator::restore_native_result(void) { + Unimplemented(); +} + +// A result handler converts/unboxes a native call result into +// a java interpreter/compiler result. The current frame is an +// interpreter frame. The activation frame unwind code must be +// consistent with that of TemplateTable::_return(...). In the +// case of native methods, the caller's SP was not modified. +address CppInterpreterGenerator::generate_result_handler_for(BasicType type) { + Unimplemented(); +} + +address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) { + Unimplemented(); +} + +address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) { + Unimplemented(); +} + +address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) { + Unimplemented(); +} + +address CppInterpreter::return_entry(TosState state, int length) { + Unimplemented(); +} + +address CppInterpreter::deopt_entry(TosState state, int length) { + Unimplemented(); +} + +void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) { + Unimplemented(); +} + +address InterpreterGenerator::generate_empty_entry(void) { + Unimplemented(); +} + +address InterpreterGenerator::generate_accessor_entry(void) { + Unimplemented(); +} + +address InterpreterGenerator::generate_native_entry(bool synchronized) { + Unimplemented(); +} + +void CppInterpreterGenerator::generate_compute_interpreter_state(const Register state, + const Register prev_state, + bool native) { + Unimplemented(); +} + +void InterpreterGenerator::lock_method(void) { + Unimplemented(); +} + +void CppInterpreterGenerator::generate_deopt_handling() { + Unimplemented(); +} + +void CppInterpreterGenerator::generate_more_monitors() { + Unimplemented(); +} + + +static address interpreter_frame_manager = NULL; + +void CppInterpreterGenerator::adjust_callers_stack(Register args) { + Unimplemented(); +} + +address InterpreterGenerator::generate_normal_entry(bool synchronized) { + Unimplemented(); +} + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : CppInterpreterGenerator(code) { + Unimplemented(); +} + + +static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) { + Unimplemented(); +} + +int AbstractInterpreter::size_top_interpreter_activation(methodOop method) { + Unimplemented(); +} + +void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill, + frame* caller, + frame* current, + methodOop method, + intptr_t* locals, + intptr_t* stack, + intptr_t* stack_base, + intptr_t* monitor_base, + intptr_t* frame_bottom, + bool is_top_frame + ) +{ + Unimplemented(); +} + +void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp) { + Unimplemented(); +} + + +int AbstractInterpreter::layout_activation(methodOop method, + int tempcount, // Number of slots on java expression stack in use + int popframe_extra_args, + int moncount, // Number of active monitors + int callee_param_size, + int callee_locals_size, + frame* caller, + frame* interpreter_frame, + bool is_top_frame) { + Unimplemented(); +} + +#endif // CC_INTERP diff --git a/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp new file mode 100644 index 00000000000..50de03653b1 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nmethod.hpp" +#include "runtime/frame.hpp" +#include "runtime/init.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" +#include "utilities/top.hpp" + +#ifndef PRODUCT + +void pd_ps(frame f) { + intptr_t* sp = f.sp(); + intptr_t* prev_sp = sp - 1; + intptr_t *pc = NULL; + intptr_t *next_pc = NULL; + int count = 0; + tty->print("register window backtrace from %#lx:\n", p2i(sp)); +} + +// This function is used to add platform specific info +// to the error reporting code. + +void pd_obfuscate_location(char *buf,int buflen) {} + +#endif // PRODUCT diff --git a/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp new file mode 100644 index 00000000000..62478be3dc8 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/disassembler.hpp" +#include "depChecker_loongarch.hpp" + +// Nothing to do on LoongArch diff --git a/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp new file mode 100644 index 00000000000..598be0ee6f4 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP + +// Nothing to do on LoongArch + +#endif // CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp new file mode 100644 index 00000000000..ccd89e8d6d2 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP + + static int pd_instruction_alignment() { + return sizeof(int); + } + + static const char* pd_cpu_opts() { + return "gpr-names=64"; + } + +#endif // CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp new file mode 100644 index 00000000000..0f50a5715de --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp @@ -0,0 +1,711 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_loongarch.inline.hpp" + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif + + +// Profiling/safepoint support +// for Profiling - acting on another frame. walks sender frames +// if valid. +// frame profile_find_Java_sender_frame(JavaThread *thread); + +bool frame::safe_for_sender(JavaThread *thread) { + address sp = (address)_sp; + address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers + static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0; + size_t usable_stack_size = thread->stack_size() - stack_guard_size; + + // sp must be within the usable part of the stack (not in guards) + bool sp_safe = (sp < thread->stack_base()) && + (sp >= thread->stack_base() - usable_stack_size); + + + if (!sp_safe) { + return false; + } + + // unextended sp must be within the stack and above or equal sp + bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && + (unextended_sp >= sp); + + if (!unextended_sp_safe) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 + bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); + + // We know sp/unextended_sp are safe only fp is questionable here + + // If the current frame is known to the code cache then we can attempt to + // construct the sender and do some validation of it. This goes a long way + // toward eliminating issues when we get in frame construction code + + if (_cb != NULL ) { + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + + intptr_t* sender_sp = NULL; + intptr_t* sender_unextended_sp = NULL; + address sender_pc = NULL; + intptr_t* saved_fp = NULL; + + if (is_interpreted_frame()) { + // fp must be safe + if (!fp_safe) { + return false; + } + + sender_pc = (address) this->fp()[return_addr_offset]; + // for interpreted frames, the value below is the sender "raw" sp, + // which can be different from the sender unextended sp (the sp seen + // by the sender) because of current frame local variables + sender_sp = (intptr_t*) addr_at(sender_sp_offset); + sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; + saved_fp = (intptr_t*) this->fp()[link_offset]; + + } else { + // must be some sort of compiled/runtime frame + // fp does not have to be safe (although it could be check for c1?) + + // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc + if (_cb->frame_size() <= 0) { + return false; + } + + sender_sp = _unextended_sp + _cb->frame_size(); + sender_unextended_sp = sender_sp; + // On LA the return_address is always the word on the stack + sender_pc = (address) *(sender_sp-1); + // Note: frame::sender_sp_offset is only valid for compiled frame + saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + + // FP is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP + // is really a frame pointer. + + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + return sender.is_interpreted_frame_valid(thread); + + } + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + // Could be a zombie method + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // Could just be some random pointer within the codeBlob + if (!sender_blob->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp()); + + return jcw_safe; + } + + if (sender_blob->is_nmethod()) { + nmethod* nm = sender_blob->as_nmethod_or_null(); + if (nm != NULL) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) { + return false; + } + } + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because the return address counts against the callee's frame. + + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_nmethod(), "should count return address at least"); + return false; + } + + // We should never be able to see anything here except an nmethod. If something in the + // code cache (current frame) is called by an entity within the code cache that entity + // should not be anything but the call stub (already covered), the interpreter (already covered) + // or an nmethod. + + if (!sender_blob->is_nmethod()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + return true; + } + // Note: fp == NULL is not really a prerequisite for this to be safe to + // walk for c2. However we've modified the code such that if we get + // a failure with fp != NULL that we then try with FP == NULL. + // This is basically to mimic what a last_frame would look like if + // c2 had generated it. + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + + if (!fp_safe) { + return false; + } + + // Will the pc we fetch be non-zero (which we'll find at the oldest frame) + + if ( (address) this->fp()[return_addr_offset] == NULL) return false; + + + // could try and do some more potential verification of native frame if we could think of some... + + return true; + +} + +void frame::patch_pc(Thread* thread, address pc) { + assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + } + + // Either the return address is the original one or we are going to + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; + _cb = CodeCache::find_blob(pc); + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + _deopt_state = is_deoptimized; + // leave _pc as is + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +int frame::frame_size(RegisterMap* map) const { + frame sender = this->sender(map); + return sender.sp() - sp(); +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + // Entry frame's arguments are always in relation to unextended_sp() + return &unextended_sp()[index]; +} + +// sender_sp +#ifdef CC_INTERP +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + // QQQ why does this specialize method exist if frame::sender_sp() does same thing? + // seems odd and if we always know interpreted vs. non then sender_sp() is really + // doing too much work. + return get_interpreterState()->sender_sp(); +} + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return get_interpreterState()->monitor_base(); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + return (BasicObjectLock*) get_interpreterState()->stack_base(); +} + +#else // CC_INTERP + +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return (intptr_t*) at(interpreter_frame_sender_sp_offset); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); +} + + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); + // make sure the pointer points inside the frame + assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); + assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); + return result; +} + +void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; +} + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_last_sp(intptr_t* sp) { + *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; +} +#endif // CC_INTERP + +frame frame::sender_for_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + if (jfa->last_Java_pc() != NULL ) { + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + return fr; + } + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); + return fr; +} + +frame frame::sender_for_interpreter_frame(RegisterMap* map) const { + // sp is the raw sp from the sender after adapter or interpreter extension + intptr_t* sender_sp = this->sender_sp(); + + // This is the sp before any possible extension (adapter/locals). + intptr_t* unextended_sp = interpreter_frame_sender_sp(); + + // The interpreter and compiler(s) always save FP in a known + // location on entry. We must record where that location is + // so this if FP was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves FP if we record where it is then + // we don't have to always save FP on entry and exit to c2 compiled + // code, on entry will be enough. +#ifdef COMPILER2 + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); + } +#endif /* COMPILER2 */ + return frame(sender_sp, unextended_sp, link(), sender_pc()); +} + + +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// +// Verifies the calculated original PC of a deoptimization PC for the +// given unextended SP. The unextended SP might also be the saved SP +// for MethodHandle call sites. +#ifdef ASSERT +void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) { + frame fr; + + // This is ugly but it's better than to change {get,set}_original_pc + // to take an SP value as argument. And it's only a debugging + // method anyway. + fr._unextended_sp = unextended_sp; + + address original_pc = nm->get_original_pc(&fr); + assert(nm->insts_contains(original_pc), "original PC must be in nmethod"); + assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be"); +} +#endif + + +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +void frame::adjust_unextended_sp() { + // On LoongArch, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null(); + if (sender_nm != NULL) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_nm->is_deopt_entry(_pc) || + sender_nm->is_deopt_mh_entry(_pc)) { + DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp)); + } + } +} + +//------------------------------------------------------------------------------ +// frame::update_map_with_saved_link +void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { + // The interpreter and compiler(s) always save fp in a known + // location on entry. We must record where that location is + // so that if fp was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves fp if we record where it is then + // we don't have to always save fp on entry and exit to c2 compiled + // code, on entry will be enough. + map->set_location(FP->as_VMReg(), (address) link_addr); + // this is weird "H" ought to be at a higher address however the + // oopMaps seems to have the "H" regs at the same address and the + // vanilla register. + // XXXX make this go away + if (true) { + map->set_location(FP->as_VMReg()->next(), (address) link_addr); + } +} + +//------------------------------sender_for_compiled_frame----------------------- +frame frame::sender_for_compiled_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + + // frame owned by optimizing compiler + assert(_cb->frame_size() >= 0, "must have non-zero frame size"); + + intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); + intptr_t* unextended_sp = sender_sp; + +#ifdef ASSERT + const bool c1_compiled = _cb->is_compiled_by_c1(); + bool native = _cb->is_nmethod() && ((nmethod*)_cb)->is_native_method(); + if (c1_compiled && native) { + assert(sender_sp == fp() + frame::sender_sp_offset, "incorrect frame size"); + } +#endif // ASSERT + // On Intel the return_address is always the word on the stack + // the fp in compiler points to sender fp, but in interpreter, fp points to return address, + // so getting sender for compiled frame is not same as interpreter frame. + // we hard code here temporarily + // spark + address sender_pc = (address) *(sender_sp-1); + + intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); + + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + + // Since the prolog does the save and restore of epb there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + update_map_with_saved_link(map, saved_fp_addr); + } + assert(sender_sp != sp(), "must have changed"); + return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + +frame frame::sender(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map->set_include_argument_oops(false); + + if (is_entry_frame()) return sender_for_entry_frame(map); + if (is_interpreted_frame()) return sender_for_interpreter_frame(map); + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), link(), sender_pc()); +} + + +bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) { + assert(is_interpreted_frame(), "must be interpreter frame"); + Method* method = interpreter_frame_method(); + // When unpacking an optimized frame the frame pointer is + // adjusted with: + int diff = (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; + printf("^^^^^^^^^^^^^^^adjust fp in deopt fp = 0%lx \n", (intptr_t)(fp - diff)); + return _fp == (fp - diff); +} + +void frame::pd_gc_epilog() { + // nothing done here now +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { +// QQQ +#ifdef CC_INTERP +#else + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { + return false; + } + if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { + return false; + } + if (fp() + interpreter_frame_initial_sp_offset < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + + // do some validation of frame elements + + // first the method + + Method* m = *interpreter_frame_method_addr(); + + // validate the method we'd find in this potential sender + if (!m->is_valid_method()) return false; + + // stack frames shouldn't be much larger than max_stack elements + + //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { + if (fp() - sp() > 4096) { // stack frames shouldn't be large. + return false; + } + + // validate bci/bcx + + intptr_t bcx = interpreter_frame_bcx(); + if (m->validate_bci_from_bcx(bcx) < 0) { + return false; + } + + // validate ConstantPoolCache* + + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + + if (cp == NULL || !cp->is_metaspace_object()) return false; + + // validate locals + + address locals = (address) *interpreter_frame_locals_addr(); + + if (locals > thread->stack_base() || locals < (address) fp()) return false; + + // We'd have to be pretty unlucky to be mislead at this point + +#endif // CC_INTERP + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { +#ifdef CC_INTERP + // Needed for JVMTI. The result should always be in the interpreterState object + assert(false, "NYI"); + interpreterState istate = get_interpreterState(); +#endif // CC_INTERP + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + intptr_t* tos_addr; + if (method->is_native()) { + // Prior to calling into the runtime to report the method_exit the possible + // return value is pushed to the native stack. If the result is a jfloat/jdouble + // then ST0 is saved. See the note in generate_native_result + tos_addr = (intptr_t*)sp(); + if (type == T_FLOAT || type == T_DOUBLE) { + tos_addr += 2; + } + } else { + tos_addr = (intptr_t*)interpreter_frame_tos_address(); + } + + switch (type) { + case T_OBJECT : + case T_ARRAY : { + oop obj; + if (method->is_native()) { +#ifdef CC_INTERP + obj = istate->_oop_temp; +#else + obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); +#endif // CC_INTERP + } else { + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; + case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; + case T_CHAR : value_result->c = *(jchar*)tos_addr; break; + case T_SHORT : value_result->s = *(jshort*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + + return type; +} + + +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + return &interpreter_frame_tos_address()[index]; +} + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mdx); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcx); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } +} +#endif + +intptr_t *frame::initial_deoptimization_info() { + // used to reset the saved FP + return fp(); +} + +intptr_t* frame::real_fp() const { + if (_cb != NULL) { + // use the frame size if valid + int size = _cb->frame_size(); + if (size > 0) { + return unextended_sp() + size; + } + } + // else rely on fp() + assert(! is_compiled_frame(), "unknown compiled frame size"); + return fp(); +} + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* sp, void* fp, void* pc) { + init((intptr_t*)sp, (intptr_t*)fp, (address)pc); +} +#endif diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp new file mode 100644 index 00000000000..964026e6219 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp @@ -0,0 +1,229 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP + +#include "runtime/synchronizer.hpp" +#include "utilities/top.hpp" + +// A frame represents a physical stack frame (an activation). Frames can be +// C or Java frames, and the Java frames can be interpreted or compiled. +// In contrast, vframes represent source-level activations, so that one physical frame +// can correspond to multiple source level frames because of inlining. +// A frame is comprised of {pc, fp, sp} +// ------------------------------ Asm interpreter ---------------------------------------- +// Layout of asm interpreter frame: +// [expression stack ] * <- sp +// [monitors ] \ +// ... | monitor block size +// [monitors ] / +// [monitor block size ] +// [byte code index/pointr] = bcx() bcx_offset +// [pointer to locals ] = locals() locals_offset +// [constant pool cache ] = cache() cache_offset +// [methodData ] = mdp() mdx_offset +// [methodOop ] = method() method_offset +// [last sp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset +// [old frame pointer ] <- fp = link() +// [return pc ] +// [oop temp ] (only for native calls) +// [locals and parameters ] +// <- sender sp +// ------------------------------ Asm interpreter ---------------------------------------- + +// ------------------------------ C++ interpreter ---------------------------------------- +// +// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run) +// +// <- SP (current sp) +// [local variables ] BytecodeInterpreter::run local variables +// ... BytecodeInterpreter::run local variables +// [local variables ] BytecodeInterpreter::run local variables +// [old frame pointer ] fp [ BytecodeInterpreter::run's fp ] +// [return pc ] (return to frame manager) +// [interpreter_state* ] (arg to BytecodeInterpreter::run) -------------- +// [expression stack ] <- last_Java_sp | +// [... ] * <- interpreter_state.stack | +// [expression stack ] * <- interpreter_state.stack_base | +// [monitors ] \ | +// ... | monitor block size | +// [monitors ] / <- interpreter_state.monitor_base | +// [struct interpretState ] <-----------------------------------------| +// [return pc ] (return to callee of frame manager [1] +// [locals and parameters ] +// <- sender sp + +// [1] When the c++ interpreter calls a new method it returns to the frame +// manager which allocates a new frame on the stack. In that case there +// is no real callee of this newly allocated frame. The frame manager is +// aware of the additional frame(s) and will pop them as nested calls +// complete. Howevers tTo make it look good in the debugger the frame +// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation +// with a fake interpreter_state* parameter to make it easy to debug +// nested calls. + +// Note that contrary to the layout for the assembly interpreter the +// expression stack allocated for the C++ interpreter is full sized. +// However this is not as bad as it seems as the interpreter frame_manager +// will truncate the unused space on succesive method calls. +// +// ------------------------------ C++ interpreter ---------------------------------------- + +// Layout of interpreter frame: +// +// [ monitor entry ] <--- sp +// ... +// [ monitor entry ] +// -9 [ monitor block top ] ( the top monitor entry ) +// -8 [ byte code pointer ] (if native, bcp = 0) +// -7 [ constant pool cache ] +// -6 [ methodData ] mdx_offset(not core only) +// -5 [ mirror ] +// -4 [ methodOop ] +// -3 [ locals offset ] +// -2 [ last_sp ] +// -1 [ sender's sp ] +// 0 [ sender's fp ] <--- fp +// 1 [ return address ] +// 2 [ oop temp offset ] (only for native calls) +// 3 [ result handler offset ] (only for native calls) +// 4 [ result type info ] (only for native calls) +// [ local var m-1 ] +// ... +// [ local var 0 ] +// [ argumnet word n-1 ] <--- ( sender's sp ) +// ... +// [ argument word 0 ] <--- S7 + + public: + enum { + pc_return_offset = 0, + // All frames + link_offset = 0, + return_addr_offset = 1, + // non-interpreter frames + sender_sp_offset = 2, + +#ifndef CC_INTERP + + // Interpreter frames + interpreter_frame_return_addr_offset = 1, + interpreter_frame_result_handler_offset = 3, // for native calls only + interpreter_frame_oop_temp_offset = 2, // for native calls only + + interpreter_frame_sender_fp_offset = 0, + interpreter_frame_sender_sp_offset = -1, + // outgoing sp before a call to an invoked method + interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, + interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, + interpreter_frame_mdx_offset = interpreter_frame_method_offset - 1, + interpreter_frame_cache_offset = interpreter_frame_mdx_offset - 1, + interpreter_frame_bcx_offset = interpreter_frame_cache_offset - 1, + interpreter_frame_initial_sp_offset = interpreter_frame_bcx_offset - 1, + + interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, + interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + +#endif // CC_INTERP + + // Entry frames + entry_frame_call_wrapper_offset = -9, + + // Native frames + + native_frame_initial_param_offset = 2 + + }; + + intptr_t ptr_at(int offset) const { + return *ptr_at_addr(offset); + } + + void ptr_at_put(int offset, intptr_t value) { + *ptr_at_addr(offset) = value; + } + + private: + // an additional field beyond _sp and _pc: + intptr_t* _fp; // frame pointer + // The interpreter and adapters will extend the frame of the caller. + // Since oopMaps are based on the sp of the caller before extension + // we need to know that value. However in order to compute the address + // of the return address we need the real "raw" sp. Since sparc already + // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's + // original sp we use that convention. + + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + intptr_t* ptr_at_addr(int offset) const { + return (intptr_t*) addr_at(offset); + } +#ifdef ASSERT + // Used in frame::sender_for_{interpreter,compiled}_frame + static void verify_deopt_original_pc( nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false); + static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) { + verify_deopt_original_pc(nm, unextended_sp, true); + } +#endif + + public: + // Constructors + + frame(intptr_t* sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* fp); + + void init(intptr_t* sp, intptr_t* fp, address pc); + + // accessors for the instance variables + intptr_t* fp() const { return _fp; } + + inline address* sender_pc_addr() const; + + // return address of param, zero origin index. + inline address* native_param_addr(int idx) const; + + // expression stack tos if we are nested in a java call + intptr_t* interpreter_frame_last_sp() const; + + // helper to update a map with callee-saved FP + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + +#ifndef CC_INTERP + // deoptimization support + void interpreter_frame_set_last_sp(intptr_t* sp); +#endif // CC_INTERP + +#ifdef CC_INTERP + inline interpreterState get_interpreterState() const; +#endif // CC_INTERP + +#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp new file mode 100644 index 00000000000..3d22339ad7d --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp @@ -0,0 +1,312 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP +#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP + +#include "code/codeCache.hpp" + +// Inline functions for Loongson frames: + +// Constructors: + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _unextended_sp = NULL; + _fp = NULL; + _cb = NULL; + _deopt_state = unknown; +} + +inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { + init(sp, fp, pc); +} + +inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = unextended_sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = (address)(sp[-1]); + + // Here's a sticky one. This constructor can be called via AsyncGetCallTrace + // when last_Java_sp is non-null but the pc fetched is junk. If we are truly + // unlucky the junk value could be to a zombied method and we'll die on the + // find_blob call. This is also why we can have no asserts on the validity + // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler + // -> pd_last_frame should use a specialized version of pd_last_frame which could + // call a specilaized frame constructor instead of this one. + // Then we could use the assert below. However this assert is of somewhat dubious + // value. + // assert(_pc != NULL, "no pc?"); + + _cb = CodeCache::find_blob(_pc); + adjust_unextended_sp(); + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +// Accessors + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() + && unextended_sp() == other.unextended_sp() + && fp() == other.fp() + && pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Relationals on frames based +// Return true if the frame is younger (more recent activation) than the frame represented by id +inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() < id ; } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + + + +inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } +inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } + + +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +// Return address: + +inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } +inline address frame::sender_pc() const { return *sender_pc_addr(); } + +// return address of param, zero origin index. +inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); } + +#ifdef CC_INTERP + +inline interpreterState frame::get_interpreterState() const { + return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize )); +} + +inline intptr_t* frame::sender_sp() const { + // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames? + if (is_interpreted_frame()) { + assert(false, "should never happen"); + return get_interpreterState()->sender_sp(); + } else { + return addr_at(sender_sp_offset); + } +} + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return &(get_interpreterState()->_locals); +} + +inline intptr_t* frame::interpreter_frame_bcx_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return (intptr_t*) &(get_interpreterState()->_bcp); +} + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return &(get_interpreterState()->_constants); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return &(get_interpreterState()->_method); +} + +inline intptr_t* frame::interpreter_frame_mdx_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return (intptr_t*) &(get_interpreterState()->_mdx); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + assert(is_interpreted_frame(), "wrong frame type"); + return get_interpreterState()->_stack + 1; +} + +#else // asm interpreter +inline intptr_t* frame::sender_sp() const { return addr_at( sender_sp_offset); } + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**)addr_at(interpreter_frame_locals_offset); +} + +inline intptr_t* frame::interpreter_frame_last_sp() const { + return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); +} + +inline intptr_t* frame::interpreter_frame_bcx_addr() const { + return (intptr_t*)addr_at(interpreter_frame_bcx_offset); +} + + +inline intptr_t* frame::interpreter_frame_mdx_addr() const { + return (intptr_t*)addr_at(interpreter_frame_mdx_offset); +} + + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)addr_at(interpreter_frame_method_offset); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + intptr_t* last_sp = interpreter_frame_last_sp(); + if (last_sp == NULL ) { + return sp(); + } else { + // sp() may have been extended by an adapter + assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); + return last_sp; + } +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + +#endif // CC_INTERP + +inline int frame::pd_oop_map_offset_adjustment() const { + return 0; +} + +inline int frame::interpreter_frame_monitor_size() { + return BasicObjectLock::size(); +} + + +// expression stack +// (the max_stack arguments are used by the GC; see class FrameClosure) + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); + return monitor_end-1; +} + + +inline jint frame::interpreter_frame_expression_stack_direction() { return -1; } + + +// Entry frames + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); +} + +// Compiled frames + +inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) { + return (nof_args - local_index + (local_index < nof_args ? 1: -1)); +} + +inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) { + return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors); +} + +inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) { + return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1); +} + +inline bool frame::volatile_across_calls(Register reg) { + return true; +} + + + +inline oop frame::saved_oop_result(RegisterMap* map) const { + return *((oop*) map->location(V0->as_VMReg())); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + *((oop*) map->location(V0->as_VMReg())) = obj; +} + +#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP diff --git a/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp new file mode 100644 index 00000000000..f9f93b9e657 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP +// Size of LoongArch Instructions +const int BytesPerInstWord = 4; + +const int StackAlignmentInBytes = (2*wordSize); + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are properly extended to 64 bits. +// If set, SharedRuntime::c_calling_convention() must adapt +// signatures accordingly. +const bool CCallingConventionRequiresIntsAsLongs = false; + +#define SUPPORTS_NATIVE_CX8 + +#endif // CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp new file mode 100644 index 00000000000..182be608a30 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +#ifdef CORE +define_pd_global(bool, UseSSE, 0); +#endif /* CORE */ +define_pd_global(bool, ConvertSleepToYield, true); +define_pd_global(bool, ShareVtableStubs, true); +define_pd_global(bool, CountInterpCalls, true); + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast +define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this + +define_pd_global(intx, CodeEntryAlignment, 16); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); +define_pd_global(intx, InlineSmallCode, 2000); + +define_pd_global(uintx, TLABSize, 0); +define_pd_global(uintx, NewSize, 1024 * K); +define_pd_global(intx, PreInflateSpin, 10); + +define_pd_global(intx, PrefetchFieldsAhead, -1); + +define_pd_global(intx, StackYellowPages, 2); +define_pd_global(intx, StackRedPages, 1); +define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1)); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); +define_pd_global(bool, UseMembar, true); +// GC Ergo Flags +define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread + +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, PreserveFramePointer, false); +// Only c2 cares about this at the moment +define_pd_global(intx, AllocatePrefetchStyle, 2); +define_pd_global(intx, AllocatePrefetchDistance, -1); + +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ + \ + product(bool, UseCodeCacheAllocOpt, true, \ + "Allocate code cache within 32-bit memory address space") \ + \ + product(bool, UseLSX, false, \ + "Use LSX 128-bit vector instructions") \ + \ + product(bool, UseLASX, false, \ + "Use LASX 256-bit vector instructions") \ + \ + product(intx, UseSyncLevel, 10000, \ + "The sync level on Loongson CPUs" \ + "UseSyncLevel == 10000, 111, for all Loongson CPUs, " \ + "UseSyncLevel == 4000, 101, maybe for GS464V" \ + "UseSyncLevel == 3000, 001, maybe for GS464V" \ + "UseSyncLevel == 2000, 011, maybe for GS464E/GS264" \ + "UseSyncLevel == 1000, 110, maybe for GS464") \ + \ + product(bool, UseUnalignedAccesses, false, \ + "Use unaligned memory accesses in Unsafe") \ + \ + product(bool, UseCRC32, false, \ + "Use CRC32 instructions for CRC32 computation") \ + \ + product(bool, UseActiveCoresMP, false, \ + "Eliminate barriers for single active cpu") + +#endif // CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp new file mode 100644 index 00000000000..8c782253462 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/oop.inline.hpp" +#include "oops/oop.inline2.hpp" + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +int InlineCacheBuffer::ic_stub_code_size() { + return NativeMovConstReg::instruction_size + + NativeGeneralJump::instruction_size + + 1; + // so that code_end can be set in CodeBuffer + // 64bit 15 = 6 + 8 bytes + 1 byte + // 32bit 7 = 2 + 4 bytes + 1 byte +} + + +// we use T1 as cached oop(klass) now. this is the target of virtual call, +// when reach here, the receiver in T0 +// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, + address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + // note: even though the code contains an embedded oop, we do not need reloc info + // because + // (1) the oop is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear + // assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); +#define __ masm-> + __ patchable_li52(T1, (long)cached_value); + // TODO: confirm reloc + __ jmp(entry_point, relocInfo::runtime_call_type); + __ flush(); +#undef __ +} + + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); + return jump->jump_destination(); +} + + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + // creation also verifies the object + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); + // Verifies the jump + NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); + void* o= (void*)move->data(); + return o; +} diff --git a/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp new file mode 100644 index 00000000000..d577e41f59c --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) +{ +#define __ _masm-> + StubCodeMark mark(this, "ICache", "flush_icache_stub"); + address start = __ pc(); + + __ ibar(0); + __ ori(V0, RA2, 0); + __ jr(RA); + + *flush_icache_stub = (ICache::flush_icache_stub_t)start; +#undef __ +} diff --git a/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp new file mode 100644 index 00000000000..15e45cb3508 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP + +// Interface for updating the instruction cache. Whenever the VM modifies +// code, part of the processor instruction cache potentially has to be flushed. + +class ICache : public AbstractICache { + public: + enum { + stub_size = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes + line_size = 32, // flush instruction affects a dword + log2_line_size = 5 // log2(line_size) + }; +}; + +#endif // CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp new file mode 100644 index 00000000000..8c84f21511b --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp @@ -0,0 +1,1960 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interp_masm_loongarch_64.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiRedefineClassesTrace.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +// Implementation of InterpreterMacroAssembler + +#ifdef CC_INTERP +void InterpreterMacroAssembler::get_method(Register reg) { +} +#endif // CC_INTERP + +void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { + if (UseUnalignedAccesses) { + ld_hu(reg, BCP, offset); + } else { + ld_bu(reg, BCP, offset); + ld_bu(tmp, BCP, offset + 1); + bstrins_d(reg, tmp, 15, 8); + } +} + +void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) { + if (UseUnalignedAccesses) { + ld_wu(reg, BCP, offset); + } else { + ldr_w(reg, BCP, offset); + ldl_w(reg, BCP, offset + 3); + lu32i_d(reg, 0); + } +} + +#ifndef CC_INTERP + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments) { + // interpreter specific + // + // Note: No need to save/restore bcp & locals pointer + // since these are callee saved registers and no blocking/ + // GC can happen in leaf calls. + // Further Note: DO NOT save/restore bcp/locals. If a caller has + // already saved them so that it can use BCP/LVP as temporaries + // then a save/restore here will DESTROY the copy the caller + // saved! There used to be a save_bcp() that only happened in + // the ASSERT path (no restore_bcp). Which caused bizarre failures + // when jvm built with ASSERTs. +#ifdef ASSERT + save_bcp(); + { + Label L; + ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); + beq(AT,R0,L); + stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); + bind(L); + } +#endif + // super call + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); + // interpreter specific + // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals + // but since they may not have been saved (and we don't want to + // save them here (see note above) the assert is invalid. +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // interpreter specific + // + // Note: Could avoid restoring locals ptr (callee saved) - however doesn't + // really make a difference for these runtime calls, since they are + // slow anyway. Btw., bcp must be saved/restored since it may change + // due to GC. + assert(java_thread == noreg , "not expecting a precomputed java thread"); + save_bcp(); +#ifdef ASSERT + { + Label L; + ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); + beq(AT, R0, L); + stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, + entry_point, number_of_arguments, + check_exceptions); + // interpreter specific + restore_bcp(); + restore_locals(); +} + + +void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { + if (JvmtiExport::can_pop_frame()) { + Label L; + // Initiate popframe handling only if it is not already being + // processed. If the flag has the popframe_processing bit set, it + // means that this code is called *during* popframe handling - we + // don't want to reenter. + // This method is only called just after the call into the vm in + // call_VM_base, so the arg registers are available. + // Not clear if any other register is available, so load AT twice + assert(AT != java_thread, "check"); + ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); + andi(AT, AT, JavaThread::popframe_pending_bit); + beq(AT, R0, L); + + ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); + andi(AT, AT, JavaThread::popframe_processing_bit); + bne(AT, R0, L); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + jr(V0); + bind(L); + } +} + + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + Register thread = T8; +#ifndef OPT_THREAD + get_thread(thread); +#else + move(T8, TREG); +#endif + ld_ptr(thread, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + const Address tos_addr (thread, in_bytes(JvmtiThreadState::earlyret_tos_offset())); + const Address oop_addr (thread, in_bytes(JvmtiThreadState::earlyret_oop_offset())); + const Address val_addr (thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); + //V0, oop_addr,V1,val_addr + switch (state) { + case atos: + ld_ptr(V0, oop_addr); + st_ptr(R0, oop_addr); + verify_oop(V0, state); + break; + case ltos: + ld_ptr(V0, val_addr); // fall through + break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: + ld_w(V0, val_addr); + break; + case ftos: + fld_s(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); + break; + case dtos: + fld_d(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); + break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + // Clean up tos value in the thread object + li(AT, (int)ilgl); + st_w(AT, tos_addr); + st_w(R0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); +} + + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { + if (JvmtiExport::can_force_early_return()) { + Label L; + Register tmp = T4; + + assert(java_thread != AT, "check"); + assert(java_thread != tmp, "check"); + ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + beq(AT, R0, L); + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); + li(tmp, JvmtiThreadState::earlyret_pending); + bne(tmp, AT, L); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + ld_w(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); + move(A0, AT); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); + jr(V0); + bind(L); + } +} + + +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, + int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + ld_bu(AT, BCP, bcp_offset); + ld_bu(reg, BCP, bcp_offset + 1); + bstrins_w(reg, AT, 15, 8); +} + + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + get_2_byte_integer_at_bcp(index, AT, bcp_offset); + } else if (index_size == sizeof(u4)) { + assert(EnableInvokeDynamic, "giant index used only for JSR 292"); + get_4_byte_integer_at_bcp(index, bcp_offset); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + nor(index, index, R0); + slli_w(index, index, 0); + } else if (index_size == sizeof(u1)) { + ld_bu(index, BCP, bcp_offset); + } else { + ShouldNotReachHere(); + } +} + + +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, + Register index, + int bcp_offset, + size_t index_size) { + assert_different_registers(cache, index); + get_cache_index_at_bcp(index, bcp_offset, index_size); + ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); + shl(index, 2); +} + + +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register index, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) { + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // We use a 32-bit load here since the layout of 64-bit words on + // little-endian machines allow us that. + alsl_d(AT, index, cache, Address::times_ptr - 1); + ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); + if(os::is_MP()) { + membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore)); + } + + const int shift_count = (1 + byte_no) * BitsPerByte; + assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || + (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), + "correct shift count"); + srli_d(bytecode, bytecode, shift_count); + assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); + li(AT, ConstantPoolCacheEntry::bytecode_1_mask); + andr(bytecode, bytecode, AT); +} + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + assert(cache != tmp, "must use different register"); + get_cache_index_at_bcp(tmp, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); + shl(tmp, 2 + LogBytesPerWord); + ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize); + // skip past the header + addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset())); + add_d(cache, cache, tmp); +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register mcs, Label& skip) { + Label has_counters; + ld_d(mcs, method, in_bytes(Method::method_counters_offset())); + bne(mcs, R0, has_counters); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method); + ld_d(mcs, method, in_bytes(Method::method_counters_offset())); + beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory + bind(has_counters); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index) { + assert_different_registers(result, index); + // convert from field index to resolved_references() index and from + // word index to byte offset. Since this is a java object, it can be compressed + Register tmp = index; // reuse + shl(tmp, LogBytesPerHeapOop); + + get_constant_pool(result); + // load pointer for resolved_references[] objArray + ld_d(result, result, ConstantPool::resolved_references_offset_in_bytes()); + // JNIHandles::resolve(obj); + ld_d(result, result, 0); //? is needed? + // Add in the index + add_d(result, result, tmp); + load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); +} + +// Resets LVP to locals. Register sub_klass cannot be any of the above. +void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { + + assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); + assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); + assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); + // Profile the not-null value's klass. + // Here T4 and T1 are used as temporary registers. + profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1 + + // Do the check. + check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 + + // Profile the failure of the check. + profile_typecheck_failed(T4); // blows T4 + +} + + + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + ld_d(r, SP, 0); + addi_d(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + ld_w(r, SP, 0); + addi_d(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_l(Register r) { + ld_d(r, SP, 0); + addi_d(SP, SP, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_f(FloatRegister r) { + fld_s(r, SP, 0); + addi_d(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_d(FloatRegister r) { + fld_d(r, SP, 0); + addi_d(SP, SP, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + addi_d(SP, SP, - Interpreter::stackElementSize); + st_d(r, SP, 0); +} + +void InterpreterMacroAssembler::push_i(Register r) { + // For compatibility reason, don't change to sw. + addi_d(SP, SP, - Interpreter::stackElementSize); + st_d(r, SP, 0); +} + +void InterpreterMacroAssembler::push_l(Register r) { + addi_d(SP, SP, -2 * Interpreter::stackElementSize); + st_d(r, SP, 0); + st_d(R0, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_f(FloatRegister r) { + addi_d(SP, SP, - Interpreter::stackElementSize); + fst_s(r, SP, 0); +} + +void InterpreterMacroAssembler::push_d(FloatRegister r) { + addi_d(SP, SP, -2 * Interpreter::stackElementSize); + fst_d(r, SP, 0); + st_d(R0, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: pop_i(); break; + case ltos: pop_l(); break; + case ftos: pop_f(); break; + case dtos: pop_d(); break; + case vtos: /* nothing to do */ break; + default: ShouldNotReachHere(); + } + verify_oop(FSR, state); +} + +//FSR=V0,SSR=V1 +void InterpreterMacroAssembler::push(TosState state) { + verify_oop(FSR, state); + switch (state) { + case atos: push_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: push_i(); break; + case ltos: push_l(); break; + case ftos: push_f(); break; + case dtos: push_d(); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ld_d(val, SP, Interpreter::expr_offset_in_bytes(n)); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + st_d(val, SP, Interpreter::expr_offset_in_bytes(n)); +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { + // record last_sp + move(Rsender, SP); + st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + if (JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. +#ifndef OPT_THREAD + get_thread(temp); +#else + move(temp, TREG); +#endif + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + ld_w(AT, temp, in_bytes(JavaThread::interp_only_mode_offset())); + beq(AT, R0, run_compiled_code); + ld_d(AT, method, in_bytes(Method::interpreter_entry_offset())); + jr(AT); + bind(run_compiled_code); + } + + ld_d(AT, method, in_bytes(Method::from_interpreted_offset())); + jr(AT); +} + + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. LoongArch64 does not do this. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { + // Nothing LoongArch64 specific to be done here +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +// assume the next bytecode in T8. +void InterpreterMacroAssembler::dispatch_base(TosState state, + address* table, + bool verifyoop) { + if (VerifyActivationFrameSize) { + Label L; + + sub_d(T2, FP, SP); + int min_frame_size = (frame::link_offset - + frame::interpreter_frame_initial_sp_offset) * wordSize; + addi_d(T2, T2, -min_frame_size); + bge(T2, R0, L); + stop("broken stack frame"); + bind(L); + } + // FIXME: I do not know which register should pass to verify_oop + if (verifyoop) verify_oop(FSR, state); + + if((long)table >= (long)Interpreter::dispatch_table(btos) && + (long)table <= (long)Interpreter::dispatch_table(vtos)) { + int table_size = (long)Interpreter::dispatch_table(itos) - + (long)Interpreter::dispatch_table(stos); + int table_offset = ((int)state - (int)itos) * table_size; + + // S8 points to the starting address of Interpreter::dispatch_table(itos). + // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8. + if (table_offset != 0) { + if (is_simm(table_offset, 12)) { + alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); + ld_d(T3, T3, table_offset); + } else { + li(T2, table_offset); + alsl_d(T3, Rnext, S8, LogBytesPerWord - 1); + ldx_d(T3, T2, T3); + } + } else { + slli_d(T2, Rnext, LogBytesPerWord); + ldx_d(T3, S8, T2); + } + } else { + li(T3, (long)table); + slli_d(T2, Rnext, LogBytesPerWord); + ldx_d(T3, T2, T3); + } + jr(T3); +} + +void InterpreterMacroAssembler::dispatch_only(TosState state) { + dispatch_base(state, Interpreter::dispatch_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { + dispatch_base(state, Interpreter::normal_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { + dispatch_base(state, Interpreter::normal_table(state), false); +} + + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step) { + // load next bytecode + ld_bu(Rnext, BCP, step); + increment(BCP, step); + dispatch_base(state, Interpreter::dispatch_table(state)); +} + +void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + // load current bytecode + ld_bu(Rnext, BCP, 0); + dispatch_base(state, table); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +// used registers : T1, T2, T3, T8 +// T1 : thread, method access flags +// T2 : monitor entry pointer +// T3 : method, monitor top +// T8 : unlock flag +void InterpreterMacroAssembler::remove_activation( + TosState state, + Register ret_addr, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { + // Note: Registers V0, V1 and F0, F1 may be in use for the result + // check if synchronized method + Label unlocked, unlock, no_unlock; + + // get the value of _do_not_unlock_if_synchronized into T8 +#ifndef OPT_THREAD + Register thread = T1; + get_thread(thread); +#else + Register thread = TREG; +#endif + ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + // reset the flag + st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + // get method access flags + ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize); + ld_w(T1, T3, in_bytes(Method::access_flags_offset())); + andi(T1, T1, JVM_ACC_SYNCHRONIZED); + beq(T1, R0, unlocked); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. + bne(T8, R0, no_unlock); + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object has + // not been unlocked by an explicit monitorexit bytecode. + addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize + - (int)sizeof(BasicObjectLock)); + // address of first monitor + ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + bne(T1, R0, unlock); + pop(state); + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + // I think LA do not need empty_FPU_stack + // remove possible return value from FPU-stack, otherwise stack could overflow + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. If requested, + // install an illegal_monitor_state_exception. Continue with + // stack unrolling. + if (install_monitor_exception) { + // remove possible return value from FPU-stack, + // otherwise stack could overflow + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + + } + + b(unlocked); + } + + bind(unlock); + unlock_object(c_rarg0); + pop(state); + + // Check that for block-structured locking (i.e., that all locked + // objects has been unlocked) + bind(unlocked); + + // V0, V1: Might contain return value + + // Check that all monitors are unlocked + { + Label loop, exception, entry, restart; + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Address monitor_block_top(FP, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + + bind(restart); + // points to current entry, starting with top-most entry + ld_d(c_rarg0, monitor_block_top); + // points to word before bottom of monitor block + addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + b(entry); + + // Entry already locked, need to throw exception + bind(exception); + + if (throw_monitor_exception) { + // Throw exception + // remove possible return value from FPU-stack, + // otherwise stack could overflow + empty_FPU_stack(); + MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception + // Unlock does not block, so don't have to worry about the frame + // We don't have to preserve c_rarg0, since we are going to + // throw an exception + + push(state); + unlock_object(c_rarg0); + pop(state); + + if (install_monitor_exception) { + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + } + + b(restart); + } + + bind(loop); + ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + bne(T1, R0, exception);// check if current entry is used + + addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry + bind(entry); + bne(c_rarg0, T3, loop); // check if bottom reached + } + + bind(no_unlock); + + // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + // remove activation + ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + ld_d(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize); + ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); +} + +#endif // C_INTERP + +// Lock object +// +// Args: +// c_rarg0: BasicObjectLock to be used for locking +// +// Kills: +// T1 +// T2 +void InterpreterMacroAssembler::lock_object(Register lock_reg) { + assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); + + if (UseHeavyMonitors) { + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); + } else { + Label done, slow_case; + const Register tmp_reg = T2; + const Register scr_reg = T1; + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); + + // Load object pointer into scr_reg + ld_d(scr_reg, lock_reg, obj_offset); + + if (UseBiasedLocking) { + // Note: we use noreg for the temporary register since it's hard + // to come up with a free register on all incoming code paths + biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); + } + + // Load (object->mark() | 1) into tmp_reg + ld_d(AT, scr_reg, 0); + ori(tmp_reg, AT, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + st_d(tmp_reg, lock_reg, mark_offset); + + assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); + + if (PrintBiasedLockingStatistics) { + Label succ, fail; + cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); + bind(succ); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); + b(done); + bind(fail); + } else { + cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); + } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) SP <= mark < SP + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in tmp_reg as the result of cmpxchg + sub_d(tmp_reg, tmp_reg, SP); + li(AT, 7 - os::vm_page_size()); + andr(tmp_reg, tmp_reg, AT); + // Save the test result, for recursive case, the result is zero + st_d(tmp_reg, lock_reg, mark_offset); + if (PrintBiasedLockingStatistics) { + bnez(tmp_reg, slow_case); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); + } + beqz(tmp_reg, done); + + bind(slow_case); + // Call the runtime routine for slow case + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); + + bind(done); + } +} + +// Unlocks an object. Used in monitorexit bytecode and +// remove_activation. Throws an IllegalMonitorException if object is +// not locked by current thread. +// +// Args: +// c_rarg0: BasicObjectLock for lock +// +// Kills: +// T1 +// T2 +// T3 +// Throw an IllegalMonitorException if object is not locked by current thread +void InterpreterMacroAssembler::unlock_object(Register lock_reg) { + assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); + + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + } else { + Label done; + const Register tmp_reg = T1; + const Register scr_reg = T2; + const Register hdr_reg = T3; + + save_bcp(); // Save in case of exception + + // Convert from BasicObjectLock structure to object and BasicLock structure + // Store the BasicLock address into tmp_reg + addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); + + // Load oop into scr_reg + ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); + // free entry + st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); + if (UseBiasedLocking) { + biased_locking_exit(scr_reg, hdr_reg, done); + } + + // Load the old header from BasicLock structure + ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); + // zero for recursive case + beqz(hdr_reg, done); + + // Atomic swap back the old header + cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); + + // Call the runtime routine for slow case. + st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj + call_VM(NOREG, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + lock_reg); + + bind(done); + + restore_bcp(); + } +} + +#ifndef CC_INTERP + +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, + Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ld_d(mdp, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); + beq(mdp, R0, zero_continue); +} + + +// Set the method data pointer for the current bcp. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + + // V0 and T0 will be used as two temporary registers. + push2(V0, T0); + + get_method(T0); + // Test MDO to avoid the call if it is NULL. + ld_d(V0, T0, in_bytes(Method::method_data_offset())); + beq(V0, R0, set_mdp); + + // method: T0 + // bcp: BCP --> S0 + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); + // mdi: V0 + // mdo is guaranteed to be non-zero here, we checked for it before the call. + get_method(T0); + ld_d(T0, T0, in_bytes(Method::method_data_offset())); + addi_d(T0, T0, in_bytes(MethodData::data_offset())); + add_d(V0, T0, V0); + bind(set_mdp); + st_d(V0, FP, frame::interpreter_frame_mdx_offset * wordSize); + pop2(T0, V0); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + Register method = T5; + Register mdp = T6; + Register tmp = A0; + push(method); + push(mdp); + push(tmp); + test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue + get_method(method); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset())); + ld_d(AT, method, in_bytes(Method::const_offset())); + add_d(tmp, tmp, AT); + addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset())); + beq(tmp, BCP, verify_continue); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); + bind(verify_continue); + pop(tmp); + pop(mdp); + pop(method); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, + int constant, + Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address data(mdp_in, constant); + st_d(value, data); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int constant, + bool decrement) { + // Counter address + Address data(mdp_in, constant); + + increment_mdp_data_at(data, decrement); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Address data, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // %%% this does 64bit counters at best it is wasting space + // at worst it is a rare bug when counters overflow + Register tmp = S0; + push(tmp); + if (decrement) { + // Decrement the register. + ld_d(AT, data); + addi_d(tmp, AT, (int32_t) -DataLayout::counter_increment); + // If the decrement causes the counter to overflow, stay negative + Label L; + blt(tmp, R0, L); + addi_d(tmp, tmp, (int32_t) DataLayout::counter_increment); + bind(L); + st_d(tmp, data); + } else { + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + ld_d(AT, data); + // Increment the register. + addi_d(tmp, AT, DataLayout::counter_increment); + // If the increment causes the counter to overflow, pull back by 1. + slt(AT, tmp, R0); + sub_d(tmp, tmp, AT); + st_d(tmp, data); + } + pop(tmp); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + Register reg, + int constant, + bool decrement) { + Register tmp = S0; + push(S0); + if (decrement) { + // Decrement the register. + add_d(AT, mdp_in, reg); + assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); + ld_d(AT, AT, constant); + + addi_d(tmp, AT, (int32_t) -DataLayout::counter_increment); + // If the decrement causes the counter to overflow, stay negative + Label L; + blt(tmp, R0, L); + addi_d(tmp, tmp, (int32_t) DataLayout::counter_increment); + bind(L); + + add_d(AT, mdp_in, reg); + st_d(tmp, AT, constant); + } else { + add_d(AT, mdp_in, reg); + assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !"); + ld_d(AT, AT, constant); + + // Increment the register. + addi_d(tmp, AT, DataLayout::counter_increment); + // If the increment causes the counter to overflow, pull back by 1. + slt(AT, tmp, R0); + sub_d(tmp, tmp, AT); + + add_d(AT, mdp_in, reg); + st_d(tmp, AT, constant); + } + pop(S0); +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, + int flag_byte_constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + int header_offset = in_bytes(DataLayout::header_offset()); + int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); + // Set the flag + ld_w(AT, Address(mdp_in, header_offset)); + if(Assembler::is_simm(header_bits, 12)) { + ori(AT, AT, header_bits); + } else { + push(T8); + // T8 is used as a temporary register. + li(T8, header_bits); + orr(AT, AT, T8); + pop(T8); + } + st_w(AT, Address(mdp_in, header_offset)); +} + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if (test_value_out == noreg) { + ld_d(AT, Address(mdp_in, offset)); + bne(AT, value, not_equal_continue); + } else { + // Put the test value into a register, so caller can use it: + ld_d(test_value_out, Address(mdp_in, offset)); + bne(value, test_value_out, not_equal_continue); + } +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); + ld_d(AT, mdp_in, offset_of_disp); + add_d(mdp_in, mdp_in, AT); + st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + Register reg, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add_d(AT, reg, mdp_in); + assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12"); + ld_d(AT, AT, offset_of_disp); + add_d(mdp_in, mdp_in, AT); + st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if(Assembler::is_simm(constant, 12)) { + addi_d(mdp_in, mdp_in, constant); + } else { + li(AT, constant); + add_d(mdp_in, mdp_in, AT); + } + st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + push(return_bci); // save/restore across call_VM + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), + return_bci); + pop(return_bci); +} + + +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, + Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + // We inline increment_mdp_data_at to return bumped_count in a register + //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); + ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + push(T8); + // T8 is used as a temporary register. + addi_d(T8, bumped_count, DataLayout::counter_increment); + slt(AT, T8, R0); + sub_d(bumped_count, T8, AT); + pop(T8); + st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + + // The method data pointer needs to be updated to correspond to + // the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register mdp, + Register reg2, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + bnez(receiver, not_null); + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + b(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, reg2, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + +// This routine creates a state machine for updating the multi-row +// type profile at a virtual call site (or other type-sensitive bytecode). +// The machine visits each row (of receiver/count) until the receiver type +// is found, or until it runs out of rows. At the same time, it remembers +// the location of the first empty row. (An empty row records null for its +// receiver, and can be allocated for a newly-observed receiver type.) +// Because there are two degrees of freedom in the state, a simple linear +// search will not work; it must be a decision tree. Hence this helper +// function is recursive, to generate the required tree structured code. +// It's the interpreter, so we are trading off code space for speed. +// See below for example code. +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } + return; + } + + int last_row = VirtualCallData::row_limit() - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the receiver and for null. + // Take any of three different outcomes: + // 1. found receiver => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + bool test_for_null_also = (row == start_row); + + // See if the receiver is receiver[n]. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); + test_mdp_data_at(mdp, recvr_offset, receiver, + (test_for_null_also ? reg2 : noreg), + next_test); + // (Reg2 now contains the receiver from the CallData.) + + // The receiver is receiver[n]. Increment count[n]. + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); + increment_mdp_data_at(mdp, count_offset); + beq(R0, R0, done); + bind(next_test); + + if (test_for_null_also) { + Label found_null; + // Failed the equality check on receiver[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (is_virtual_call) { + beq(reg2, R0, found_null); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + beq(R0, R0, done); + bind(found_null); + } else { + bne(reg2, R0, done); + } + break; + } + // Since null is rare, make it be the branch-taken case. + beq(reg2, R0, found_null); + + // Put all the "Case 3" tests here. + record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); + + // Found a null. Keep searching for a matching receiver, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching receiver, but we + // observed the receiver[start_row] is NULL. + + // Fill in the receiver field and increment the count. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); + set_mdp_data_at(mdp, recvr_offset, receiver); + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); + li(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { + beq(R0, R0, done); + } +} + +// Example state machine code for three profile rows: +// // main copy of decision tree, rooted at row[1] +// if (row[0].rec == rec) { row[0].incr(); goto done; } +// if (row[0].rec != NULL) { +// // inner copy of decision tree, rooted at row[1] +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[1].rec != NULL) { +// // degenerate decision tree, rooted at row[2] +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// if (row[2].rec != NULL) { goto done; } // overflow +// row[2].init(rec); goto done; +// } else { +// // remember row[1] is empty +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[1].init(rec); goto done; +// } +// } else { +// // remember row[0] is empty +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[0].init(rec); goto done; +// } +// done: + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, Register reg2, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); + + bind (done); +} + +void InterpreterMacroAssembler::profile_ret(Register return_bci, + Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + for (row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, + in_bytes(RetData::bci_offset(row)), + return_bci, noreg, + next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, + in_bytes(RetData::bci_displacement_offset(row))); + b(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, true); + + bind (profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, reg2, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, + in_bytes(MultiBranchData::default_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + in_bytes(MultiBranchData:: + default_displacement_offset())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register mdp, + Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + + // case_array_offset_in_bytes() + li(reg2, in_bytes(MultiBranchData::per_case_size())); + mul_d(index, index, reg2); + addi_d(index, index, in_bytes(MultiBranchData::case_array_offset())); + + // Update the case count + increment_mdp_data_at(mdp, + index, + in_bytes(MultiBranchData::relative_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + index, + in_bytes(MultiBranchData:: + relative_displacement_offset())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::narrow(Register result) { + // Get method->_constMethod->_result_type + ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize); + ld_d(T4, T4, in_bytes(Method::const_offset())); + ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset())); + + Label done, notBool, notByte, notChar; + + // common case first + addi_d(AT, T4, -T_INT); + beq(AT, R0, done); + + // mask integer result to narrower return type. + addi_d(AT, T4, -T_BOOLEAN); + bne(AT, R0, notBool); + andi(result, result, 0x1); + beq(R0, R0, done); + + bind(notBool); + addi_d(AT, T4, -T_BYTE); + bne(AT, R0, notByte); + ext_w_b(result, result); + beq(R0, R0, done); + + bind(notByte); + addi_d(AT, T4, -T_CHAR); + bne(AT, R0, notChar); + bstrpick_d(result, result, 15, 0); + beq(R0, R0, done); + + bind(notChar); + ext_w_h(result, result); + + // Nothing to do for T_INT + bind(done); +} + + +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { + Label update, next, none; + + verify_oop(obj); + + if (mdo_addr.index() != noreg) { + guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); + guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); + push(T0); + alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1); + } + + bnez(obj, update); + + if (mdo_addr.index() == noreg) { + ld_d(AT, mdo_addr); + } else { + ld_d(AT, T0, mdo_addr.disp()); + } + ori(AT, AT, TypeEntries::null_seen); + if (mdo_addr.index() == noreg) { + st_d(AT, mdo_addr); + } else { + st_d(AT, T0, mdo_addr.disp()); + } + + b(next); + + bind(update); + load_klass(obj, obj); + + if (mdo_addr.index() == noreg) { + ld_d(AT, mdo_addr); + } else { + ld_d(AT, T0, mdo_addr.disp()); + } + xorr(obj, obj, AT); + + assert(TypeEntries::type_klass_mask == -4, "must be"); + bstrpick_d(AT, obj, 63, 2); + beqz(AT, next); + + andi(AT, obj, TypeEntries::type_unknown); + bnez(AT, next); + + if (mdo_addr.index() == noreg) { + ld_d(AT, mdo_addr); + } else { + ld_d(AT, T0, mdo_addr.disp()); + } + beqz(AT, none); + + addi_d(AT, AT, -(TypeEntries::null_seen)); + beqz(AT, none); + + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + if (mdo_addr.index() == noreg) { + ld_d(AT, mdo_addr); + } else { + ld_d(AT, T0, mdo_addr.disp()); + } + xorr(obj, obj, AT); + assert(TypeEntries::type_klass_mask == -4, "must be"); + bstrpick_d(AT, obj, 63, 2); + beqz(AT, next); + + // different than before. Cannot keep accurate profile. + if (mdo_addr.index() == noreg) { + ld_d(AT, mdo_addr); + } else { + ld_d(AT, T0, mdo_addr.disp()); + } + ori(AT, AT, TypeEntries::type_unknown); + if (mdo_addr.index() == noreg) { + st_d(AT, mdo_addr); + } else { + st_d(AT, T0, mdo_addr.disp()); + } + b(next); + + bind(none); + // first time here. Set profile type. + if (mdo_addr.index() == noreg) { + st_d(obj, mdo_addr); + } else { + st_d(obj, T0, mdo_addr.disp()); + } + + bind(next); + if (mdo_addr.index() != noreg) { + pop(T0); + } +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); + li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); + bne(tmp, AT, profile_continue); + + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + if (Assembler::is_simm(off_to_args, 12)) { + addi_d(mdp, mdp, off_to_args); + } else { + li(AT, off_to_args); + add_d(mdp, mdp, AT); + } + + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); + + if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) { + addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); + } else { + li(AT, i*TypeStackSlotEntries::per_arg_count()); + sub_w(tmp, tmp, AT); + } + + li(AT, TypeStackSlotEntries::per_arg_count()); + blt(tmp, AT, done); + } + ld_d(tmp, callee, in_bytes(Method::const_offset())); + + ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); + + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list + ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); + sub_d(tmp, tmp, AT); + + addi_w(tmp, tmp, -1); + + Address arg_addr = argument_address(tmp); + ld_d(tmp, arg_addr); + + Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp, mdo_arg_addr); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + if (Assembler::is_simm(to_add, 12)) { + addi_d(mdp, mdp, to_add); + } else { + li(AT, to_add); + add_d(mdp, mdp, AT); + } + + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); + + int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); + if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) { + addi_w(tmp, tmp, -1 * tmp_arg_counts); + } else { + li(AT, tmp_arg_counts); + sub_w(mdp, mdp, AT); + } + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + slli_w(tmp, tmp, exact_log2(DataLayout::cell_size)); + add_d(mdp, mdp, tmp); + } + st_d(mdp, FP, frame::interpreter_frame_mdx_offset * wordSize); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp, _bcp_register); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length + Label do_profile; + ld_b(tmp, _bcp_register, 0); + addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic); + beqz(AT, do_profile); + addi_d(AT, tmp, -1 * Bytecodes::_invokehandle); + beqz(AT, do_profile); + + get_method(tmp); + ld_b(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); + li(AT, vmIntrinsics::_compiledLambdaForm); + bne(tmp, AT, profile_continue); + + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + add_d(tmp, ret, R0); + profile_obj_type(tmp, mdo_ret_addr); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { + guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !"); + + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters + ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); + blt(tmp1, R0, profile_continue); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + add_d(mdp, mdp, tmp1); + ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); + decrement(tmp1, TypeStackSlotEntries::per_arg_count()); + + + Label loop; + bind(loop); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); + Address arg_type(mdp, tmp1, per_arg_scale, type_base); + + // load offset on the stack from the slot for this parameter + alsl_d(AT, tmp1, mdp, per_arg_scale - 1); + ld_d(tmp2, AT, off_base); + + sub_d(tmp2, R0, tmp2); + + // read the parameter from the local area + slli_d(AT, tmp2, Interpreter::stackElementScale()); + ldx_d(tmp2, AT, _locals_register); + + // profile the parameter + profile_obj_type(tmp2, arg_type); + + // go to next parameter + decrement(tmp1, TypeStackSlotEntries::per_arg_count()); + blt(R0, tmp1, loop); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { + if (state == atos) { + MacroAssembler::verify_oop(reg); + } +} + +void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { +} +#endif // !CC_INTERP + + +void InterpreterMacroAssembler::notify_method_entry() { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + Register tempreg = T0; +#ifndef OPT_THREAD + get_thread(T8); +#else + move(T8, TREG); +#endif + if (JvmtiExport::can_post_interpreter_events()) { + Label L; + ld_w(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset())); + beq(tempreg, R0, L); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_method_entry)); + bind(L); + } + + { + SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); + get_method(S3); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + //Rthread, + T8, + //Rmethod); + S3); + } +} + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode) { + Register tempreg = T0; +#ifndef OPT_THREAD + get_thread(T8); +#else + move(T8, TREG); +#endif + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label skip; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + // For c++ interpreter the result is always stored at a known location in the frame + // template interpreter will leave it on the top of the stack. + NOT_CC_INTERP(push(state);) + ld_w(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset())); + beq(tempreg, R0, skip); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + bind(skip); + NOT_CC_INTERP(pop(state)); + } + + { + // Dtrace notification + SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); + NOT_CC_INTERP(push(state)); + get_method(S3); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + //Rthread, Rmethod); + T8, S3); + NOT_CC_INTERP(pop(state)); + } +} + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, int mask, + Register scratch, bool preloaded, + Condition cond, Label* where) { + assert_different_registers(scratch, AT); + + if (!preloaded) { + ld_w(scratch, counter_addr); + } + addi_w(scratch, scratch, increment); + st_w(scratch, counter_addr); + + li(AT, mask); + andr(scratch, scratch, AT); + + if (cond == Assembler::zero) { + beq(scratch, R0, *where); + } else { + unimplemented(); + } +} diff --git a/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp new file mode 100644 index 00000000000..9113da54ff1 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP +#define CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP + +#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" + +// This file specializes the assember with interpreter-specific macros + + +class InterpreterMacroAssembler: public MacroAssembler { +#ifndef CC_INTERP + private: + + Register _locals_register; // register that contains the pointer to the locals + Register _bcp_register; // register that contains the bcp + + protected: + // Interpreter specific version of call_VM_base + virtual void call_VM_leaf_base(address entry_point, + int number_of_arguments); + + virtual void call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions); + + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + // base routine for all dispatches + void dispatch_base(TosState state, address* table, bool verifyoop = true); +#endif // CC_INTERP + + public: + // narrow int return value + void narrow(Register result); + + InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} + + void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); + void get_4_byte_integer_at_bcp(Register reg, int offset); + + void load_earlyret_value(TosState state); + +#ifdef CC_INTERP + void save_bcp() { /* not needed in c++ interpreter and harmless */ } + void restore_bcp() { /* not needed in c++ interpreter and harmless */ } + + // Helpers for runtime call arguments/results + void get_method(Register reg); + +#else + + // Interpreter-specific registers + void save_bcp() { + st_d(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize); + } + + void restore_bcp() { + ld_d(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize); + } + + void restore_locals() { + ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); + } + + // Helpers for runtime call arguments/results + void get_method(Register reg) { + ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize); + } + + void get_const(Register reg){ + get_method(reg); + ld_d(reg, reg, in_bytes(Method::const_offset())); + } + + void get_constant_pool(Register reg) { + get_const(reg); + ld_d(reg, reg, in_bytes(ConstMethod::constants_offset())); + } + + void get_constant_pool_cache(Register reg) { + get_constant_pool(reg); + ld_d(reg, reg, ConstantPool::cache_offset_in_bytes()); + } + + void get_cpool_and_tags(Register cpool, Register tags) { + get_constant_pool(cpool); + ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes()); + } + + void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); + void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_method_counters(Register method, Register mcs, Label& skip); + + // load cpool->resolved_references(index); + void load_resolved_reference_at_index(Register result, Register index); + + void pop_ptr( Register r = FSR); + void pop_i( Register r = FSR); + void pop_l( Register r = FSR); + void pop_f(FloatRegister r = FSF); + void pop_d(FloatRegister r = FSF); + + void push_ptr( Register r = FSR); + void push_i( Register r = FSR); + void push_l( Register r = FSR); + void push_f(FloatRegister r = FSF); + void push_d(FloatRegister r = FSF); + + void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } + + void push(Register r ) { ((MacroAssembler*)this)->push(r); } + + void pop(TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + + void empty_expression_stack() { + ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + // NULL last_sp until next java call + st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + } + + // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. + //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); + + // Dispatching + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + void dispatch_only(TosState state); + void dispatch_only_normal(TosState state); + void dispatch_only_noverify(TosState state); + void dispatch_next(TosState state, int step = 0); + void dispatch_via (TosState state, address* table); + + // jump to an invoked target + void prepare_to_jump_from_interpreted(); + void jump_from_interpreted(Register method, Register temp); + + + // Returning from interpreted functions + // + // Removes the current activation (incl. unlocking of monitors) + // and sets up the return address. This code is also used for + // exception unwindwing. In that case, we do not want to throw + // IllegalMonitorStateExceptions, since that might get us into an + // infinite rethrow exception loop. + // Additionally this code is used for popFrame and earlyReturn. + // In popFrame case we want to skip throwing an exception, + // installing an exception, and notifying jvmdi. + // In earlyReturn case we only want to skip throwing an exception + // and installing an exception. + void remove_activation(TosState state, Register ret_addr, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmdi = true); +#endif // CC_INTERP + + // Object locking + void lock_object (Register lock_reg); + void unlock_object(Register lock_reg); + +#ifndef CC_INTERP + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int constant, Register value); + void increment_mdp_data_at(Address data, bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant, + bool decrement = false); + void increment_mdp_data_at(Register mdp_in, Register reg, int constant, + bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, int mask, + Register scratch, bool preloaded, + Condition cond, Label* where); + void set_mdp_flag_at(Register mdp_in, int flag_constant); + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg2, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call); + + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); + + void profile_taken_branch(Register mdp, Register bumped_count); + void profile_not_taken_branch(Register mdp); + void profile_call(Register mdp); + void profile_final_call(Register mdp); + void profile_virtual_call(Register receiver, Register mdp, + Register scratch2, + bool receiver_can_be_null = false); + void profile_ret(Register return_bci, Register mdp); + void profile_null_seen(Register mdp); + void profile_typecheck(Register mdp, Register klass, Register scratch); + void profile_typecheck_failed(Register mdp); + void profile_switch_default(Register mdp); + void profile_switch_case(Register index_in_scratch, Register mdp, + Register scratch2); + + // Debugging + // only if +VerifyOops && state == atos + void verify_oop(Register reg, TosState state = atos); + // only if +VerifyFPU && (state == ftos || state == dtos) + void verify_FPU(int stack_depth, TosState state = ftos); + + void profile_obj_type(Register obj, const Address& mdo_addr); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); +#endif // !CC_INTERP + + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + + // support for jvmti/dtrace + void notify_method_entry(); + void notify_method_exit(TosState state, NotifyMethodExitMode mode); +}; + +#endif // CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP diff --git a/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp new file mode 100644 index 00000000000..7f253b2d516 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP + + +// Generation of Interpreter +// + friend class AbstractInterpreterGenerator; + + private: + + address generate_normal_entry(bool synchronized); + address generate_native_entry(bool synchronized); + address generate_abstract_entry(void); + address generate_math_entry(AbstractInterpreter::MethodKind kind); + address generate_empty_entry(void); + address generate_accessor_entry(void); + address generate_Reference_get_entry(); + address generate_CRC32_update_entry(); + address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); + void lock_method(void); + void generate_stack_overflow_check(void); + + void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); + void generate_counter_overflow(Label* do_continue); + +#endif // CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp new file mode 100644 index 00000000000..052eb997e47 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP + +#include "memory/allocation.hpp" + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + unsigned int _num_fp_args; + unsigned int _num_int_args; + int _stack_offset; + + void move(int from_offset, int to_offset); + void box(int from_offset, int to_offset); + void pass_int(); + void pass_long(); + void pass_object(); + void pass_float(); + void pass_double(); + + public: + // Creation + SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); + _num_int_args = (method->is_static() ? 1 : 0); + _num_fp_args = 0; + _stack_offset = 0; + } + + // Code generation + void generate(uint64_t fingerprint); + + // Code generation support + static Register from(); + static Register to(); + static Register temp(); +}; + +#endif // CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp new file mode 100644 index 00000000000..0c9df4aa711 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.inline.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +// Implementation of SignatureHandlerGenerator + +void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { + __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); + __ st_d(temp(), to(), to_offset * longSize); +} + +void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { + __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); + __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); + + __ maskeqz(temp(), temp(), AT); + __ st_w(temp(), to(), to_offset * wordSize); +} + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + // generate code to handle arguments + iterate(fingerprint); + // return result handler + __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); + // return + __ jr(RA); + + __ flush(); +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + if (_num_int_args < Argument::n_register_parameters - 1) { + __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); + } else { + __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); + __ st_w(AT, to(), _stack_offset); + _stack_offset += wordSize; + } +} + +// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + if (_num_int_args < Argument::n_register_parameters - 1) { + __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + } else { + __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); + __ st_d(AT, to(), _stack_offset); + _stack_offset += wordSize; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + if (_num_int_args < Argument::n_register_parameters - 1) { + Register reg = as_Register(++_num_int_args + RA0->encoding()); + if (_num_int_args == 1) { + assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); + __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); + } else { + __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset())); + __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); + __ maskeqz(reg, AT, reg); + } + } else { + __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset())); + __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset())); + __ maskeqz(temp(), AT, temp()); + __ st_d(temp(), to(), _stack_offset); + _stack_offset += wordSize; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + if (_num_fp_args < Argument::n_float_register_parameters) { + __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset())); + } else if (_num_int_args < Argument::n_register_parameters - 1) { + __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset())); + } else { + __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset())); + __ st_w(AT, to(), _stack_offset); + _stack_offset += wordSize; + } +} + +// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + if (_num_fp_args < Argument::n_float_register_parameters) { + __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + } else if (_num_int_args < Argument::n_register_parameters - 1) { + __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + } else { + __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1)); + __ st_d(AT, to(), _stack_offset); + _stack_offset += wordSize; + } +} + + +Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } +Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } +Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + + +class SlowSignatureHandler + : public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + intptr_t* _int_args; + intptr_t* _fp_args; + intptr_t* _fp_identifiers; + unsigned int _num_int_args; + unsigned int _num_fp_args; + + virtual void pass_int() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_int_args < Argument::n_register_parameters - 1) { + *_int_args++ = from_obj; + _num_int_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_long() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2 * Interpreter::stackElementSize; + + if (_num_int_args < Argument::n_register_parameters - 1) { + *_int_args++ = from_obj; + _num_int_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_object() + { + intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_int_args < Argument::n_register_parameters - 1) { + *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + _num_int_args++; + } else { + *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + } + } + + virtual void pass_float() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_fp_args < Argument::n_float_register_parameters) { + *_fp_args++ = from_obj; + _num_fp_args++; + } else if (_num_int_args < Argument::n_register_parameters - 1) { + *_int_args++ = from_obj; + _num_int_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_double() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + + if (_num_fp_args < Argument::n_float_register_parameters) { + *_fp_args++ = from_obj; + *_fp_identifiers |= (1 << _num_fp_args); // mark as double + _num_fp_args++; + } else if (_num_int_args < Argument::n_register_parameters - 1) { + *_int_args++ = from_obj; + _num_int_args++; + } else { + *_to++ = from_obj; + } + } + + public: + SlowSignatureHandler(methodHandle method, address from, intptr_t* to) + : NativeSignatureIterator(method) + { + _from = from; + _to = to; + + // see TemplateInterpreterGenerator::generate_slow_signature_handler() + _int_args = to - (method->is_static() ? 15 : 16); + _fp_args = to - 8; + _fp_identifiers = to - 9; + *(int*) _fp_identifiers = 0; + _num_int_args = (method->is_static() ? 1 : 0); + _num_fp_args = 0; + } +}; + + +IRT_ENTRY(address, + InterpreterRuntime::slow_signature_handler(JavaThread* thread, + Method* method, + intptr_t* from, + intptr_t* to)) + methodHandle m(thread, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments + SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); + + // return result handler + return Interpreter::result_handler(m->result_type()); +IRT_END diff --git a/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp new file mode 100644 index 00000000000..c83afbdaf03 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP + + public: + + // Sentinel placed in the code for interpreter returns so + // that i2c adapters and osr code can recognize an interpreter + // return address and convert the return to a specialized + // block of code to handle compiedl return values and cleaning + // the fpu stack. + static const int return_sentinel; + + static Address::ScaleFactor stackElementScale() { + return Address::times_8; + } + + // Offset from sp (which points to the last stack element) + static int expr_offset_in_bytes(int i) { return stackElementSize * i; } + // Size of interpreter code. Increase if too small. Interpreter will + // fail with a guarantee ("not enough space for interpreter generation"); + // if too small. + // Run with +PrintInterpreterSize to get the VM to print out the size. + // Max size with JVMTI and TaggedStackInterpreter + const static int InterpreterCodeSize = 168 * 1024; +#endif // CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp new file mode 100644 index 00000000000..5a4f102cfd0 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +address AbstractInterpreterGenerator::generate_slow_signature_handler() { + address entry = __ pc(); + // Rmethod: method + // LVP: pointer to locals + // A3: first stack arg + __ move(A3, SP); + __ addi_d(SP, SP, -18 * wordSize); + __ st_d(RA, SP, 0); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::slow_signature_handler), + Rmethod, LVP, A3); + + // V0: result handler + + // Stack layout: + // ... + // 18 stack arg0 <--- old sp + // 17 floatReg arg7 + // ... + // 10 floatReg arg0 + // 9 float/double identifiers + // 8 IntReg arg7 + // ... + // 2 IntReg arg1 + // 1 aligned slot + // SP: 0 return address + + // Do FP first so we can use A3 as temp + __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers + + for (int i= 0; i < Argument::n_float_register_parameters; i++) { + FloatRegister floatreg = as_FloatRegister(i + FA0->encoding()); + Label isdouble, done; + + __ andi(AT, A3, 1 << i); + __ bnez(AT, isdouble); + __ fld_s(floatreg, SP, (10 + i) * wordSize); + __ b(done); + __ bind(isdouble); + __ fld_d(floatreg, SP, (10 + i) * wordSize); + __ bind(done); + } + + // A0 is for env. + // If the mothed is not static, A1 will be corrected in generate_native_entry. + for (int i= 1; i < Argument::n_register_parameters; i++) { + Register reg = as_Register(i + A0->encoding()); + + __ ld_d(reg, SP, (1 + i) * wordSize); + } + + // A0/V0 contains the result from the call of + // InterpreterRuntime::slow_signature_handler so we don't touch it + // here. It will be loaded with the JNIEnv* later. + __ ld_d(RA, SP, 0); + __ addi_d(SP, SP, 18 * wordSize); + __ jr(RA); + return entry; +} + + +// +// Various method entries +// + +address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { + + // Rmethod: methodOop + // V0: scratrch + // Rsender: send 's sp + + if (!InlineIntrinsics) return NULL; // Generate a vanilla entry + + address entry_point = __ pc(); + //guarantee(0, "LA not implemented yet"); + // These don't need a safepoint check because they aren't virtually + // callable. We won't enter these intrinsics from compiled code. + // If in the future we added an intrinsic which was virtually callable + // we'd have to worry about how to safepoint so that this code is used. + + // mathematical functions inlined by compiler + // (interpreter must provide identical implementation + // in order to avoid monotonicity bugs when switching + // from interpreter to compiler in the middle of some + // computation) + // + // stack: [ lo(arg) ] <-- sp + // [ hi(arg) ] + { + // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are + // java methods. Interpreter::method_kind(...) will select + // this entry point for the corresponding methods in JDK 1.3. + __ fld_d(FA0, SP, 0 * wordSize); + __ fld_d(FA1, SP, 1 * wordSize); + __ push2(RA, FP); + __ addi_d(FP, SP, 2 * wordSize); + + // [ fp ] <-- sp + // [ ra ] + // [ lo ] <-- fp + // [ hi ] + //FIXME, need consider this + switch (kind) { + case Interpreter::java_lang_math_sin : + __ trigfunc('s'); + break; + case Interpreter::java_lang_math_cos : + __ trigfunc('c'); + break; + case Interpreter::java_lang_math_tan : + __ trigfunc('t'); + break; + case Interpreter::java_lang_math_sqrt: + __ fsqrt_d(F0, FA0); + break; + case Interpreter::java_lang_math_abs: + __ fabs_d(F0, FA0); + break; + case Interpreter::java_lang_math_log: + // Store to stack to convert 80bit precision back to 64bits + break; + case Interpreter::java_lang_math_log10: + // Store to stack to convert 80bit precision back to 64bits + break; + case Interpreter::java_lang_math_pow: + break; + case Interpreter::java_lang_math_exp: + break; + + default : + ShouldNotReachHere(); + } + + // must maintain return value in F0:F1 + __ ld_d(RA, FP, (-1) * wordSize); + //FIXME + __ ld_d(FP, FP, (-2) * wordSize); + __ move(SP, Rsender); + __ jr(RA); + } + return entry_point; +} + + +// Abstract method entry +// Attempt to execute abstract method. Throw exception +address InterpreterGenerator::generate_abstract_entry(void) { + + // Rmethod: methodOop + // V0: receiver (unused) + // Rsender : sender 's sp + address entry_point = __ pc(); + + // abstract method entry + // throw exception + // adjust stack to what a normal return would do + __ empty_expression_stack(); + __ restore_bcp(); + __ restore_locals(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + return entry_point; +} + + +// Empty method, generate a very fast return. + +address InterpreterGenerator::generate_empty_entry(void) { + + // Rmethod: methodOop + // V0: receiver (unused) + // Rsender: sender 's sp, must set sp to this value on return, on LoongArch, now use T0, as it right? + if (!UseFastEmptyMethods) return NULL; + + address entry_point = __ pc(); + //TODO: LA + //guarantee(0, "LA not implemented yet"); + Label slow_path; + __ li(RT0, SafepointSynchronize::address_of_state()); + __ ld_w(AT, RT0, 0); + __ li(RT0, (SafepointSynchronize::_not_synchronized)); + __ bne(AT, RT0,slow_path); + __ move(SP, Rsender); + __ jr(RA); + __ bind(slow_path); + (void) generate_normal_entry(false); + return entry_point; + +} + +void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { + + // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in + // the days we had adapter frames. When we deoptimize a situation where a + // compiled caller calls a compiled caller will have registers it expects + // to survive the call to the callee. If we deoptimize the callee the only + // way we can restore these registers is to have the oldest interpreter + // frame that we create restore these values. That is what this routine + // will accomplish. + + // At the moment we have modified c2 to not have any callee save registers + // so this problem does not exist and this routine is just a place holder. + + assert(f->is_interpreted_frame(), "must be interpreted"); +} diff --git a/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp new file mode 100644 index 00000000000..de97de58044 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP + +private: + + // FP value associated with _last_Java_sp: + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + // fence? + _last_Java_fp = NULL; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // We must clear _last_Java_sp before copying the rest of the new data + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing + // + if (_last_Java_sp != src->_last_Java_sp) + _last_Java_sp = NULL; + + _last_Java_fp = src->_last_Java_fp; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true + _last_Java_sp = src->_last_Java_sp; + } + + // Always walkable + bool walkable(void) { return true; } + // Never any thing to do since we are always walkable and can find address of return addresses + void make_walkable(JavaThread* thread) { } + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + +private: + + static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + +public: + + void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + // Assert (last_Java_sp == NULL || fp == NULL) + void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } + +#endif // CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp new file mode 100644 index 00000000000..5b52e54e080 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/codeBlob.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +#define BUFFER_SIZE 30*wordSize + +// Instead of issuing lfence for LoadLoad barrier, we create data dependency +// between loads, which is more efficient than lfence. + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + const char *name = NULL; + switch (type) { + case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; + case T_BYTE: name = "jni_fast_GetByteField"; break; + case T_CHAR: name = "jni_fast_GetCharField"; break; + case T_SHORT: name = "jni_fast_GetShortField"; break; + case T_INT: name = "jni_fast_GetIntField"; break; + case T_LONG: name = "jni_fast_GetLongField"; break; + case T_FLOAT: name = "jni_fast_GetFloatField"; break; + case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; + default: ShouldNotReachHere(); + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + Label slow; + + // return pc RA + // jni env A0 + // obj A1 + // jfieldID A2 + + address counter_addr = SafepointSynchronize::safepoint_counter_addr(); + __ li(AT, (long)counter_addr); + __ ld_w(T1, AT, 0); + + // Parameters(A0~A3) should not be modified, since they will be used in slow path + __ andi(AT, T1, 1); + __ bne(AT, R0, slow); + + __ move(T0, A1); + __ clear_jweak_tag(T0); + + __ ld_d(T0, T0, 0); // unbox, *obj + __ srli_d(T2, A2, 2); // offset + __ add_d(T0, T0, T2); + + __ li(AT, (long)counter_addr); + __ ld_w(AT, AT, 0); + __ bne(T1, AT, slow); + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); + switch (type) { + case T_BOOLEAN: __ ld_bu (V0, T0, 0); break; + case T_BYTE: __ ld_b (V0, T0, 0); break; + case T_CHAR: __ ld_hu (V0, T0, 0); break; + case T_SHORT: __ ld_h (V0, T0, 0); break; + case T_INT: __ ld_w (V0, T0, 0); break; + case T_LONG: __ ld_d (V0, T0, 0); break; + case T_FLOAT: __ fld_s (F0, T0, 0); break; + case T_DOUBLE: __ fld_d (F0, T0, 0); break; + default: ShouldNotReachHere(); + } + + __ jr(RA); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind (slow); + address slow_case_addr = NULL; + switch (type) { + case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; + case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; + case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; + case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; + case T_INT: slow_case_addr = jni_GetIntField_addr(); break; + case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; + case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; + case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; + default: ShouldNotReachHere(); + } + __ jmp(slow_case_addr); + + __ flush (); + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + return generate_fast_get_int_field0(T_LONG); +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_int_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_int_field0(T_DOUBLE); +} diff --git a/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp new file mode 100644 index 00000000000..554ff216ac2 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp @@ -0,0 +1,144 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP + +#include "memory/allocation.hpp" +#include "oops/oop.hpp" +#include "prims/jni.h" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_sparc.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +private: + + // 32bit Helper routines. + static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; + *(jint *)(to ) = from[0]; } + static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } + +public: + // In LoongArch64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] + // is 8 bytes. + // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. + // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. + // This error occurs in ReflectInvoke.java + // The parameter of DD(int) should be 4 instead of 0x550000004. + // + // See: [runtime/javaCalls.hpp] + + static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to). + // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), + // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. + static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + *(jlong*) (to) = from; + } + + // A long parameter occupies two slot. + // It must fit the layout rule in methodHandle. + // + // See: [runtime/reflection.cpp] Reflection::invoke() + // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + *(jlong*) (to + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + *(jlong*) (to + pos) = *from; + pos += 2; + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } + static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } + static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + +#undef _JNI_SLOT_OFFSET +#define _JNI_SLOT_OFFSET 0 + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to). + // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), + // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. + static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + *(jdouble*) (to) = from; + } + + // A long parameter occupies two slot. + // It must fit the layout rule in methodHandle. + // + // See: [runtime/reflection.cpp] Reflection::invoke() + // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + *(jdouble*) (to + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + *(jdouble*) (to + pos) = *from; + pos += 2; + } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + static inline jint get_int (intptr_t *from) { return *(jint *) from; } + static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } + static inline oop get_obj (intptr_t *from) { return *(oop *) from; } + static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } +#undef _JNI_SLOT_OFFSET +}; + +#endif // CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/jni_loongarch.h b/hotspot/src/cpu/loongarch/vm/jni_loongarch.h new file mode 100644 index 00000000000..eb25cbc3546 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/jni_loongarch.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef _JAVASOFT_JNI_MD_H_ +#define _JAVASOFT_JNI_MD_H_ + +// Note: please do not change these without also changing jni_md.h in the JDK +// repository +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility) + #define JNIEXPORT __attribute__((visibility("default"))) + #define JNIIMPORT __attribute__((visibility("default"))) +#else + #define JNIEXPORT + #define JNIIMPORT +#endif + +#define JNICALL + +typedef int jint; + + typedef long jlong; + +typedef signed char jbyte; + +#endif diff --git a/hotspot/src/cpu/loongarch/vm/loongarch.ad b/hotspot/src/cpu/loongarch/vm/loongarch.ad new file mode 100644 index 00000000000..48c44779e71 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/loongarch.ad @@ -0,0 +1,24 @@ +// +// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// diff --git a/hotspot/src/cpu/loongarch/vm/loongarch_64.ad b/hotspot/src/cpu/loongarch/vm/loongarch_64.ad new file mode 100644 index 00000000000..fa4bf6e1703 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/loongarch_64.ad @@ -0,0 +1,12861 @@ +// +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// GodSon3 Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. + +// format: +// reg_def name (call convention, c-call convention, ideal type, encoding); +// call convention : +// NS = No-Save +// SOC = Save-On-Call +// SOE = Save-On-Entry +// AS = Always-Save +// ideal type : +// see opto/opcodes.hpp for more info +// reg_class name (reg, ...); +// alloc_class name (reg, ...); +register %{ + +// General Registers +// Integer Registers + reg_def R0 ( NS, NS, Op_RegI, 0, VMRegImpl::Bad()); + reg_def RA ( NS, NS, Op_RegI, 1, RA->as_VMReg()); + reg_def RA_H ( NS, NS, Op_RegI, 1, RA->as_VMReg()->next()); + // TODO: LA + reg_def TP ( NS, NS, Op_RegI, 2, TP->as_VMReg()); + reg_def TP_H ( NS, NS, Op_RegI, 2, TP->as_VMReg()->next()); + reg_def SP ( NS, NS, Op_RegI, 3, SP->as_VMReg()); + reg_def SP_H ( NS, NS, Op_RegI, 3, SP->as_VMReg()->next()); + reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); + reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); + reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); + reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); + reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); + reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); + reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); + reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); + reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); + reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); + reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); + reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); + reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); + reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); + reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); + reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); + reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); + reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); + reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); + reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); + reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); + reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); + reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); + reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); + reg_def T4 (SOC, SOC, Op_RegI, 16, T4->as_VMReg()); + reg_def T4_H (SOC, SOC, Op_RegI, 16, T4->as_VMReg()->next()); + reg_def T5 (SOC, SOC, Op_RegI, 17, T5->as_VMReg()); + reg_def T5_H (SOC, SOC, Op_RegI, 17, T5->as_VMReg()->next()); + reg_def T6 (SOC, SOC, Op_RegI, 18, T6->as_VMReg()); + reg_def T6_H (SOC, SOC, Op_RegI, 18, T6->as_VMReg()->next()); + reg_def T7 (SOC, SOC, Op_RegI, 19, T7->as_VMReg()); + reg_def T7_H (SOC, SOC, Op_RegI, 19, T7->as_VMReg()->next()); + reg_def T8 (SOC, SOC, Op_RegI, 20, T8->as_VMReg()); + reg_def T8_H (SOC, SOC, Op_RegI, 20, T8->as_VMReg()->next()); + reg_def RX ( NS, NS, Op_RegI, 21, RX->as_VMReg()); + reg_def RX_H ( NS, NS, Op_RegI, 21, RX->as_VMReg()->next()); + reg_def FP ( NS, NS, Op_RegI, 22, FP->as_VMReg()); + reg_def FP_H ( NS, NS, Op_RegI, 22, FP->as_VMReg()->next()); + reg_def S0 (SOC, SOE, Op_RegI, 23, S0->as_VMReg()); + reg_def S0_H (SOC, SOE, Op_RegI, 23, S0->as_VMReg()->next()); + reg_def S1 (SOC, SOE, Op_RegI, 24, S1->as_VMReg()); + reg_def S1_H (SOC, SOE, Op_RegI, 24, S1->as_VMReg()->next()); + reg_def S2 (SOC, SOE, Op_RegI, 25, S2->as_VMReg()); + reg_def S2_H (SOC, SOE, Op_RegI, 25, S2->as_VMReg()->next()); + reg_def S3 (SOC, SOE, Op_RegI, 26, S3->as_VMReg()); + reg_def S3_H (SOC, SOE, Op_RegI, 26, S3->as_VMReg()->next()); + reg_def S4 (SOC, SOE, Op_RegI, 27, S4->as_VMReg()); + reg_def S4_H (SOC, SOE, Op_RegI, 27, S4->as_VMReg()->next()); + reg_def S5 (SOC, SOE, Op_RegI, 28, S5->as_VMReg()); + reg_def S5_H (SOC, SOE, Op_RegI, 28, S5->as_VMReg()->next()); + reg_def S6 (SOC, SOE, Op_RegI, 29, S6->as_VMReg()); + reg_def S6_H (SOC, SOE, Op_RegI, 29, S6->as_VMReg()->next()); + reg_def S7 (SOC, SOE, Op_RegI, 30, S7->as_VMReg()); + reg_def S7_H (SOC, SOE, Op_RegI, 30, S7->as_VMReg()->next()); + // TODO: LA + reg_def S8 ( NS, NS, Op_RegI, 31, S8->as_VMReg()); + reg_def S8_H ( NS, NS, Op_RegI, 31, S8->as_VMReg()->next()); + + +// Floating/Vector registers. +reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); +reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next() ); +reg_def F0_J ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) ); +reg_def F0_K ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) ); +reg_def F0_L ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) ); +reg_def F0_M ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) ); +reg_def F0_N ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) ); +reg_def F0_O ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) ); + +reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); +reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next() ); +reg_def F1_J ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) ); +reg_def F1_K ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) ); +reg_def F1_L ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) ); +reg_def F1_M ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) ); +reg_def F1_N ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) ); +reg_def F1_O ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) ); + +reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); +reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next() ); +reg_def F2_J ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) ); +reg_def F2_K ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) ); +reg_def F2_L ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) ); +reg_def F2_M ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) ); +reg_def F2_N ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) ); +reg_def F2_O ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) ); + +reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); +reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next() ); +reg_def F3_J ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) ); +reg_def F3_K ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) ); +reg_def F3_L ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) ); +reg_def F3_M ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) ); +reg_def F3_N ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) ); +reg_def F3_O ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) ); + +reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); +reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next() ); +reg_def F4_J ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) ); +reg_def F4_K ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) ); +reg_def F4_L ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) ); +reg_def F4_M ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) ); +reg_def F4_N ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) ); +reg_def F4_O ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) ); + +reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); +reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next() ); +reg_def F5_J ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) ); +reg_def F5_K ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) ); +reg_def F5_L ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) ); +reg_def F5_M ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) ); +reg_def F5_N ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) ); +reg_def F5_O ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) ); + +reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); +reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next() ); +reg_def F6_J ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) ); +reg_def F6_K ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) ); +reg_def F6_L ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) ); +reg_def F6_M ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) ); +reg_def F6_N ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) ); +reg_def F6_O ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) ); + +reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); +reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next() ); +reg_def F7_J ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) ); +reg_def F7_K ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) ); +reg_def F7_L ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) ); +reg_def F7_M ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) ); +reg_def F7_N ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) ); +reg_def F7_O ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) ); + +reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); +reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next() ); +reg_def F8_J ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) ); +reg_def F8_K ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) ); +reg_def F8_L ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) ); +reg_def F8_M ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) ); +reg_def F8_N ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) ); +reg_def F8_O ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) ); + +reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); +reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next() ); +reg_def F9_J ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) ); +reg_def F9_K ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) ); +reg_def F9_L ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) ); +reg_def F9_M ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) ); +reg_def F9_N ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) ); +reg_def F9_O ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) ); + +reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); +reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next() ); +reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) ); +reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) ); +reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) ); +reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) ); +reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) ); +reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) ); + +reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); +reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next() ); +reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) ); +reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) ); +reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) ); +reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) ); +reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) ); +reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) ); + +reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); +reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next() ); +reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) ); +reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) ); +reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) ); +reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) ); +reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) ); +reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) ); + +reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); +reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next() ); +reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) ); +reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) ); +reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) ); +reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) ); +reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) ); +reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) ); + +reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg() ); +reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next() ); +reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) ); +reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) ); +reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) ); +reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) ); +reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) ); +reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) ); + +reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg() ); +reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next() ); +reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) ); +reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) ); +reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) ); +reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) ); +reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) ); +reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) ); + +reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg() ); +reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next() ); +reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) ); +reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) ); +reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) ); +reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) ); +reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) ); +reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) ); + +reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg() ); +reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next() ); +reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) ); +reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) ); +reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) ); +reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) ); +reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) ); +reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) ); + +reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg() ); +reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next() ); +reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) ); +reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) ); +reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) ); +reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) ); +reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) ); +reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) ); + +reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg() ); +reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next() ); +reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) ); +reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) ); +reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) ); +reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) ); +reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) ); +reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) ); + +reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg() ); +reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next() ); +reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) ); +reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) ); +reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) ); +reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) ); +reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) ); +reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) ); + +reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg() ); +reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next() ); +reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) ); +reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) ); +reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) ); +reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) ); +reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) ); +reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) ); + +reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg() ); +reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next() ); +reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) ); +reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) ); +reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) ); +reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) ); +reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) ); +reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) ); + +reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg() ); +reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next() ); +reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) ); +reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) ); +reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) ); +reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) ); +reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) ); +reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) ); + +reg_def F24 ( SOC, SOC, Op_RegF, 24, F24->as_VMReg() ); +reg_def F24_H ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next() ); +reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) ); +reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) ); +reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) ); +reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) ); +reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) ); +reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) ); + +reg_def F25 ( SOC, SOC, Op_RegF, 25, F25->as_VMReg() ); +reg_def F25_H ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next() ); +reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) ); +reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) ); +reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) ); +reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) ); +reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) ); +reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) ); + +reg_def F26 ( SOC, SOC, Op_RegF, 26, F26->as_VMReg() ); +reg_def F26_H ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next() ); +reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) ); +reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) ); +reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) ); +reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) ); +reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) ); +reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) ); + +reg_def F27 ( SOC, SOC, Op_RegF, 27, F27->as_VMReg() ); +reg_def F27_H ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next() ); +reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) ); +reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) ); +reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) ); +reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) ); +reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) ); +reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) ); + +reg_def F28 ( SOC, SOC, Op_RegF, 28, F28->as_VMReg() ); +reg_def F28_H ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next() ); +reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) ); +reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) ); +reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) ); +reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) ); +reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) ); +reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) ); + +reg_def F29 ( SOC, SOC, Op_RegF, 29, F29->as_VMReg() ); +reg_def F29_H ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next() ); +reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) ); +reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) ); +reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) ); +reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) ); +reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) ); +reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) ); + +reg_def F30 ( SOC, SOC, Op_RegF, 30, F30->as_VMReg() ); +reg_def F30_H ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next() ); +reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) ); +reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) ); +reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) ); +reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) ); +reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) ); +reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) ); + +reg_def F31 ( SOC, SOC, Op_RegF, 31, F31->as_VMReg() ); +reg_def F31_H ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next() ); +reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) ); +reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) ); +reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) ); +reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) ); +reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) ); +reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) ); + + +// ---------------------------- +// Special Registers +//S6 is used for get_thread(S6) +//S5 is uesd for heapbase of compressed oop +alloc_class chunk0( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S5, S5_H, + S6, S6_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T8, T8_H, + T4, T4_H, + T1, T1_H, // inline_cache_reg + T6, T6_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + T5, T5_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H, + S8, S8_H + RA, RA_H, + SP, SP_H, // stack_pointer + FP, FP_H // frame_pointer + ); + +// F23 is scratch reg +alloc_class chunk1( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, + F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, + F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, + F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, + F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, + F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, + F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, + F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, + F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, + F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, + F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, + F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, + F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, + F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, + F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, + F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, + F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, + F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, + F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, + F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, + F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, + F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, + F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, + F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, + F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, + F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, + F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, + F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, + F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, + F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, + F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O); + +reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); +reg_class s0_reg( S0 ); +reg_class s1_reg( S1 ); +reg_class s2_reg( S2 ); +reg_class s3_reg( S3 ); +reg_class s4_reg( S4 ); +reg_class s5_reg( S5 ); +reg_class s6_reg( S6 ); +reg_class s7_reg( S7 ); + +reg_class t_reg( T0, T1, T2, T3, T8, T4 ); +reg_class t0_reg( T0 ); +reg_class t1_reg( T1 ); +reg_class t2_reg( T2 ); +reg_class t3_reg( T3 ); +reg_class t8_reg( T8 ); +reg_class t4_reg( T4 ); + +reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); +reg_class a0_reg( A0 ); +reg_class a1_reg( A1 ); +reg_class a2_reg( A2 ); +reg_class a3_reg( A3 ); +reg_class a4_reg( A4 ); +reg_class a5_reg( A5 ); +reg_class a6_reg( A6 ); +reg_class a7_reg( A7 ); + +// TODO: LA +//reg_class v0_reg( A0 ); +//reg_class v1_reg( A1 ); + +reg_class sp_reg( SP, SP_H ); +reg_class fp_reg( FP, FP_H ); + +reg_class v0_long_reg( A0, A0_H ); +reg_class v1_long_reg( A1, A1_H ); +reg_class a0_long_reg( A0, A0_H ); +reg_class a1_long_reg( A1, A1_H ); +reg_class a2_long_reg( A2, A2_H ); +reg_class a3_long_reg( A3, A3_H ); +reg_class a4_long_reg( A4, A4_H ); +reg_class a5_long_reg( A5, A5_H ); +reg_class a6_long_reg( A6, A6_H ); +reg_class a7_long_reg( A7, A7_H ); +reg_class t0_long_reg( T0, T0_H ); +reg_class t1_long_reg( T1, T1_H ); +reg_class t2_long_reg( T2, T2_H ); +reg_class t3_long_reg( T3, T3_H ); +reg_class t8_long_reg( T8, T8_H ); +reg_class t4_long_reg( T4, T4_H ); +reg_class s0_long_reg( S0, S0_H ); +reg_class s1_long_reg( S1, S1_H ); +reg_class s2_long_reg( S2, S2_H ); +reg_class s3_long_reg( S3, S3_H ); +reg_class s4_long_reg( S4, S4_H ); +reg_class s5_long_reg( S5, S5_H ); +reg_class s6_long_reg( S6, S6_H ); +reg_class s7_long_reg( S7, S7_H ); + +reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 ); + +reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 ); + +reg_class p_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T8, T8_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H + ); + +reg_class no_T8_p_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H + ); + +reg_class no_Ax_p_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + T0, T0_H + ); + +reg_class long_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T8, T8_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H + ); + + +// Floating point registers. +// F31 are not used as temporary registers in D2I +reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31); +reg_class dbl_reg( F0, F0_H, + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F8, F8_H, + F9, F9_H, + F10, F10_H, + F11, F11_H, + F12, F12_H, + F13, F13_H, + F14, F14_H, + F15, F15_H, + F16, F16_H, + F17, F17_H, + F18, F18_H, + F19, F19_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, + F27, F27_H, + F28, F28_H, + F29, F29_H, + F30, F30_H, + F31, F31_H); + +// Class for all 128bit vector registers +reg_class vectorx_reg( F0, F0_H, F0_J, F0_K, + F1, F1_H, F1_J, F1_K, + F2, F2_H, F2_J, F2_K, + F3, F3_H, F3_J, F3_K, + F4, F4_H, F4_J, F4_K, + F5, F5_H, F5_J, F5_K, + F6, F6_H, F6_J, F6_K, + F7, F7_H, F7_J, F7_K, + F8, F8_H, F8_J, F8_K, + F9, F9_H, F9_J, F9_K, + F10, F10_H, F10_J, F10_K, + F11, F11_H, F11_J, F11_K, + F12, F12_H, F12_J, F12_K, + F13, F13_H, F13_J, F13_K, + F14, F14_H, F14_J, F14_K, + F15, F15_H, F15_J, F15_K, + F16, F16_H, F16_J, F16_K, + F17, F17_H, F17_J, F17_K, + F18, F18_H, F18_J, F18_K, + F19, F19_H, F19_J, F19_K, + F20, F20_H, F20_J, F20_K, + F21, F21_H, F21_J, F21_K, + F22, F22_H, F22_J, F22_K, + F24, F24_H, F24_J, F24_K, + F25, F25_H, F25_J, F25_K, + F26, F26_H, F26_J, F26_K, + F27, F27_H, F27_J, F27_K, + F28, F28_H, F28_J, F28_K, + F29, F29_H, F29_J, F29_K, + F30, F30_H, F30_J, F30_K, + F31, F31_H, F31_J, F31_K); + +// Class for all 256bit vector registers +reg_class vectory_reg( F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O, + F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O, + F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O, + F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O, + F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O, + F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O, + F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O, + F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O, + F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O, + F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O, + F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O, + F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O, + F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O, + F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O, + F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O, + F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O, + F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O, + F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O, + F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O, + F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O, + F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O, + F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O, + F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O, + F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O, + F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O, + F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O, + F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O, + F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, + F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, + F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, + F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O); + +// TODO: LA +//reg_class flt_arg0( F0 ); +//reg_class dbl_arg0( F0, F0_H ); +//reg_class dbl_arg1( F1, F1_H ); + +%} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def ( , ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// +definitions %{ + int_def DEFAULT_COST ( 100, 100); + int_def HUGE_COST (1000000, 1000000); + + // Memory refs are twice as expensive as run-of-the-mill. + int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); + + // Branches are even more expensive. + int_def BRANCH_COST ( 300, DEFAULT_COST * 3); + // we use jr instruction to construct call, so more expensive + int_def CALL_COST ( 500, DEFAULT_COST * 5); +/* + int_def EQUAL ( 1, 1 ); + int_def NOT_EQUAL ( 2, 2 ); + int_def GREATER ( 3, 3 ); + int_def GREATER_EQUAL ( 4, 4 ); + int_def LESS ( 5, 5 ); + int_def LESS_EQUAL ( 6, 6 ); +*/ +%} + + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description + +source_hpp %{ +// Header information of the source block. +// Method declarations/definitions which are used outside +// the ad-scope can conveniently be defined here. +// +// To keep related declarations/definitions/uses close together, +// we switch between source %{ }% and source_hpp %{ }% freely as needed. + +class CallStubImpl { + + //-------------------------------------------------------------- + //---< Used for optimization in Compile::shorten_branches >--- + //-------------------------------------------------------------- + + public: + // Size of call trampoline stub. + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // number of relocations needed by a call trampoline stub + static uint reloc_call_trampoline() { + return 0; // no call trampolines on this platform + } +}; + +class HandlerImpl { + + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { + // NativeCall instruction size is the same as NativeJump. + // exception handler starts out as jump and can be patched to + // a call be deoptimization. (4932387) + // Note that this value is also credited (in output.cpp) to + // the size of the code section. + int size = NativeFarCall::instruction_size; + return round_to(size, 16); + } + + static uint size_deopt_handler() { + int size = NativeFarCall::instruction_size; + return round_to(size, 16); + } +}; + +%} // end source_hpp + +source %{ + +#define NO_INDEX 0 +#define RELOC_IMM64 Assembler::imm_operand +#define RELOC_DISP32 Assembler::disp32_operand + +#define V0_num A0_num +#define V0_H_num A0_H_num + +#define __ _masm. + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +// Emit exception handler code. +// Stuff framesize into a register and call a VM stub routine. +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + __ block_comment("; emit_exception_handler"); + + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + +// Emit deopt handler code. +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + __ block_comment("; emit_deopt_handler"); + + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_call(SharedRuntime::deopt_blob()->unpack()); + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + return true; // Per default match rules are supported. +} + +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + const int safety_zone = 3 * BytesPerInstWord; + int offs = offset - br_size + 4; + // To be conservative on LoongArch + // branch node should be end with: + // branch inst + offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2; + switch (rule) { + case jmpDir_long_rule: + case jmpDir_short_rule: + return Assembler::is_simm(offs, 26); + case jmpCon_flags_long_rule: + case jmpCon_flags_short_rule: + case branchConP_0_long_rule: + case branchConP_0_short_rule: + case branchConN2P_0_long_rule: + case branchConN2P_0_short_rule: + case cmpN_null_branch_long_rule: + case cmpN_null_branch_short_rule: + case branchConIU_reg_immI_0_long_rule: + case branchConIU_reg_immI_0_short_rule: + case branchConF_reg_reg_long_rule: + case branchConF_reg_reg_short_rule: + case branchConD_reg_reg_long_rule: + case branchConD_reg_reg_short_rule: + return Assembler::is_simm(offs, 21); + default: + return Assembler::is_simm(offs, 16); + } + return false; +} + + +// No additional cost for CMOVL. +const int Matcher::long_cmove_cost() { return 0; } + +// No CMOVF/CMOVD with SSE2 +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } + +// Does the CPU require late expand (see block.cpp for description of late expand)? +const bool Matcher::require_postalloc_expand = false; + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? True for Intel but false for most RISCs +const bool Matcher::clone_shift_expressions = false; + +// Do we need to mask the count passed to shift instructions or does +// the cpu only look at the lower 5/6 bits anyway? +const bool Matcher::need_masked_shift_count = false; + +bool Matcher::narrow_oop_use_complex_address() { + assert(UseCompressedOops, "only for compressed oops code"); + return false; +} + +bool Matcher::narrow_klass_use_complex_address() { + assert(UseCompressedClassPointers, "only for compressed klass code"); + return false; +} + +// This is UltraSparc specific, true just means we have fast l2f conversion +const bool Matcher::convL2FSupported(void) { + return true; +} + +// Vector ideal reg +const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); + switch(size) { + case 16: return Op_VecX; + case 32: return Op_VecY; + } + ShouldNotReachHere(); + return 0; +} + +// Only lowest bits of xmm reg are used for vector shift count. +const uint Matcher::vector_shift_count_ideal_reg(int size) { + assert(MaxVectorSize == 16 || MaxVectorSize == 32, ""); + switch(size) { + case 16: return Op_VecX; + case 32: return Op_VecY; + } + ShouldNotReachHere(); + return 0; +} + +// Max vector size in bytes. 0 if not supported. +const int Matcher::vector_width_in_bytes(BasicType bt) { + return (int)MaxVectorSize; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + int max_size = max_vector_size(bt); + int size = 0; + + if (UseLSX) size = 16; + size = size / type2aelembytes(bt); + return MIN2(size,max_size); +} + +// LoongArch supports misaligned vectors store/load? +const bool Matcher::misaligned_vectors_ok() { + return false; + //return !AlignVector; // can be changed by flag +} + +// Register for DIVI projection of divmodI +RegMask Matcher::divI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODI projection of divmodI +RegMask Matcher::modI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for DIVL projection of divmodL +RegMask Matcher::divL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +int Matcher::regnum_to_fpu_offset(int regnum) { + return regnum - 32; // The FP registers are in the second chunk +} + + +const bool Matcher::isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + return true; +} + + +// Return whether or not this register is ever used as an argument. This +// function is used on startup to build the trampoline stubs in generateOptoStub. +// Registers not mentioned will be killed by the VM call in the trampoline, and +// arguments in those registers not be available to the callee. +bool Matcher::can_be_java_arg( int reg ) { + // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention() + if ( reg == T0_num || reg == T0_H_num + || reg == A0_num || reg == A0_H_num + || reg == A1_num || reg == A1_H_num + || reg == A2_num || reg == A2_H_num + || reg == A3_num || reg == A3_H_num + || reg == A4_num || reg == A4_H_num + || reg == A5_num || reg == A5_H_num + || reg == A6_num || reg == A6_H_num + || reg == A7_num || reg == A7_H_num ) + return true; + + if ( reg == F0_num || reg == F0_H_num + || reg == F1_num || reg == F1_H_num + || reg == F2_num || reg == F2_H_num + || reg == F3_num || reg == F3_H_num + || reg == F4_num || reg == F4_H_num + || reg == F5_num || reg == F5_H_num + || reg == F6_num || reg == F6_H_num + || reg == F7_num || reg == F7_H_num ) + return true; + + return false; +} + +bool Matcher::is_spillable_arg( int reg ) { + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { + return false; +} + +// Register for MODL projection of divmodL +RegMask Matcher::modL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return FP_REG_mask(); +} + +// LoongArch doesn't support AES intrinsics +const bool Matcher::pass_original_key_for_aes() { + return false; +} + +int CallStaticJavaDirectNode::compute_padding(int current_offset) const { + return round_to(current_offset, alignment_required()) - current_offset; +} + +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { + return round_to(current_offset, alignment_required()) - current_offset; +} + +int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { + return round_to(current_offset, alignment_required()) - current_offset; +} + +int CallLeafDirectNode::compute_padding(int current_offset) const { + return round_to(current_offset, alignment_required()) - current_offset; +} + +int CallRuntimeDirectNode::compute_padding(int current_offset) const { + return round_to(current_offset, alignment_required()) - current_offset; +} + +// If CPU can load and store mis-aligned doubles directly then no fixup is +// needed. Else we split the double into 2 integer pieces and move it +// piece-by-piece. Only happens when passing doubles into C code as the +// Java calling convention forces doubles to be aligned. +const bool Matcher::misaligned_doubles_ok = false; +// Do floats take an entire double register or just half? +//const bool Matcher::float_in_double = true; +bool Matcher::float_in_double() { return false; } +// Threshold size for cleararray. +const int Matcher::init_array_short_size = 8 * BytesPerLong; +// Do ints take an entire long register or just half? +const bool Matcher::int_in_long = true; +// Is it better to copy float constants, or load them directly from memory? +// Intel can load a float constant from a direct address, requiring no +// extra registers. Most RISCs will have to materialize an address into a +// register first, so they would do better to copy the constant from stack. +const bool Matcher::rematerialize_float_constants = false; +// Advertise here if the CPU requires explicit rounding operations +// to implement the UseStrictFP mode. +const bool Matcher::strict_fp_requires_explicit_rounding = false; +// false => size gets scaled to BytesPerLong, ok. +const bool Matcher::init_array_count_is_in_bytes = false; + +// Indicate if the safepoint node needs the polling page as an input. +// Since LA doesn't have absolute addressing, it needs. +bool SafePointNode::needs_polling_address_input() { + return false; +} + +// !!!!! Special hack to get all type of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. +int MachCallStaticJavaNode::ret_addr_offset() { + // bl + return NativeCall::instruction_size; +} + +int MachCallDynamicJavaNode::ret_addr_offset() { + // lu12i_w IC_Klass, + // ori IC_Klass, + // lu32i_d IC_Klass + // lu52i_d IC_Klass + + // bl + return NativeMovConstReg::instruction_size + NativeCall::instruction_size; +} + +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float, rc_stack +enum RC { rc_bad, rc_int, rc_float, rc_stack }; +static enum RC rc_class( OptoReg::Name reg ) { + if( !OptoReg::is_valid(reg) ) return rc_bad; + if (OptoReg::is_stack(reg)) return rc_stack; + VMReg r = OptoReg::as_VMReg(reg); + if (r->is_Register()) return rc_int; + assert(r->is_FloatRegister(), "must be"); + return rc_float; +} + +// Helper methods for MachSpillCopyNode::implementation(). +static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, + int src_hi, int dst_hi, uint ireg, outputStream* st) { + int size = 0; + if (cbuf) { + MacroAssembler _masm(cbuf); + int offset = __ offset(); + switch (ireg) { + case Op_VecX: + __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); + break; + case Op_VecY: + __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0); + break; + default: + ShouldNotReachHere(); + } +#ifndef PRODUCT + } else if (!do_size) { + switch (ireg) { + case Op_VecX: + st->print("vori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); + break; + case Op_VecY: + st->print("xvori.b %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); + break; + default: + ShouldNotReachHere(); + } +#endif + } + size += 4; + return size; +} + +static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, + int stack_offset, int reg, uint ireg, outputStream* st) { + int size = 0; + if (cbuf) { + MacroAssembler _masm(cbuf); + int offset = __ offset(); + if (is_load) { + switch (ireg) { + case Op_VecX: + __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + break; + case Op_VecY: + __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + break; + default: + ShouldNotReachHere(); + } + } else { // store + switch (ireg) { + case Op_VecX: + __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + break; + case Op_VecY: + __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset); + break; + default: + ShouldNotReachHere(); + } + } +#ifndef PRODUCT + } else if (!do_size) { + if (is_load) { + switch (ireg) { + case Op_VecX: + st->print("vld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecY: + st->print("xvld %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + default: + ShouldNotReachHere(); + } + } else { // store + switch (ireg) { + case Op_VecX: + st->print("vst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + case Op_VecY: + st->print("xvst %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset); + break; + default: + ShouldNotReachHere(); + } + } +#endif + } + size += 4; + return size; +} + +static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset, + int dst_offset, uint ireg, outputStream* st) { + int size = 0; + if (cbuf) { + MacroAssembler _masm(cbuf); + switch (ireg) { + case Op_VecX: + __ vld(F23, SP, src_offset); + __ vst(F23, SP, dst_offset); + break; + case Op_VecY: + __ xvld(F23, SP, src_offset); + __ xvst(F23, SP, dst_offset); + break; + default: + ShouldNotReachHere(); + } +#ifndef PRODUCT + } else { + switch (ireg) { + case Op_VecX: + st->print("vld f23, %d(sp)\n\t" + "vst f23, %d(sp)\t# 128-bit mem-mem spill", + src_offset, dst_offset); + break; + case Op_VecY: + st->print("xvld f23, %d(sp)\n\t" + "xvst f23, %d(sp)\t# 256-bit mem-mem spill", + src_offset, dst_offset); + break; + default: + ShouldNotReachHere(); + } +#endif + } + size += 8; + return size; +} + +uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { + // Get registers to move + OptoReg::Name src_second = ra_->get_reg_second(in(1)); + OptoReg::Name src_first = ra_->get_reg_first(in(1)); + OptoReg::Name dst_second = ra_->get_reg_second(this ); + OptoReg::Name dst_first = ra_->get_reg_first(this ); + + enum RC src_second_rc = rc_class(src_second); + enum RC src_first_rc = rc_class(src_first); + enum RC dst_second_rc = rc_class(dst_second); + enum RC dst_first_rc = rc_class(dst_first); + + assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); + + // Generate spill code! + + if( src_first == dst_first && src_second == dst_second ) + return 0; // Self copy, no move + + if (bottom_type()->isa_vect() != NULL) { + uint ireg = ideal_reg(); + assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); + if (src_first_rc == rc_stack && dst_first_rc == rc_stack) { + // mem -> mem + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st); + } else if (src_first_rc == rc_float && dst_first_rc == rc_float) { + vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); + } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) { + int stack_offset = ra_->reg2offset(dst_first); + vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); + } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) { + int stack_offset = ra_->reg2offset(src_first); + vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); + } else { + ShouldNotReachHere(); + } + return 0; + } + + if (src_first_rc == rc_stack) { + // mem -> + if (dst_first_rc == rc_stack) { + // mem -> mem + assert(src_second != dst_first, "overlap"); + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ld_d(AT, Address(SP, src_offset)); + __ st_d(AT, Address(SP, dst_offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("ld_d AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" + "st_d AT, [SP + #%d]", + src_offset, dst_offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + // No pushl/popl, so: + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ld_w(AT, Address(SP, src_offset)); + __ st_w(AT, Address(SP, dst_offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("ld_w AT, [SP + #%d] spill 2\n\t" + "st_w AT, [SP + #%d]\n\t", + src_offset, dst_offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_int) { + // mem -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("ld_d %s, [SP + #%d]\t# spill 3", + Matcher::regName[dst_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + if (this->ideal_reg() == Op_RegI) + __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); + else { + if (Assembler::is_simm(offset, 12)) { + __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); + } else { + __ li(AT, offset); + __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT); + } + } +#ifndef PRODUCT + } else { + st->print("\n\t"); + if (this->ideal_reg() == Op_RegI) + st->print("ld_w %s, [SP + #%d]\t# spill 4", + Matcher::regName[dst_first], + offset); + else + st->print("ld_wu %s, [SP + #%d]\t# spill 5", + Matcher::regName[dst_first], + offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_float) { + // mem-> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("fld_d %s, [SP + #%d]\t# spill 6", + Matcher::regName[dst_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("fld_s %s, [SP + #%d]\t# spill 7", + Matcher::regName[dst_first], + offset); +#endif + } + } + } + return 0; + } else if (src_first_rc == rc_int) { + // gpr -> + if (dst_first_rc == rc_stack) { + // gpr -> mem + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("st_d %s, [SP + #%d] # spill 8", + Matcher::regName[src_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("st_w %s, [SP + #%d]\t# spill 9", + Matcher::regName[src_first], offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_int) { + // gpr -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ move(as_Register(Matcher::_regEncode[dst_first]), + as_Register(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("move(64bit) %s <-- %s\t# spill 10", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + return 0; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + if (this->ideal_reg() == Op_RegI) + __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); + else + __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("move(32-bit) %s <-- %s\t# spill 11", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + return 0; + } + } else if (dst_first_rc == rc_float) { + // gpr -> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("movgr2fr_d %s, %s\t# spill 12", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("movgr2fr_w %s, %s\t# spill 13", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } + return 0; + } + } else if (src_first_rc == rc_float) { + // xmm -> + if (dst_first_rc == rc_stack) { + // xmm -> mem + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("fst_d %s, [SP + #%d]\t# spill 14", + Matcher::regName[src_first], + offset); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("fst_s %s, [SP + #%d]\t# spill 15", + Matcher::regName[src_first], + offset); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_int) { + // xmm -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("movfr2gr_d %s, %s\t# spill 16", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("movfr2gr_s %s, %s\t# spill 17", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } + return 0; + } else if (dst_first_rc == rc_float) { + // xmm -> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("fmov_d %s <-- %s\t# spill 18", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + st->print("\n\t"); + st->print("fmov_s %s <-- %s\t# spill 19", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + } + return 0; + } + } + + assert(0," foo "); + Unimplemented(); + return 0; +} + +#ifndef PRODUCT +void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + implementation( NULL, ra_, false, st ); +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation( &cbuf, ra_, false, NULL ); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= +# + +#ifndef PRODUCT +void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { + st->print("BRK"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { + MacroAssembler _masm(&cbuf); + __ brk(5); +} + +uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { + return MachNode::size(ra_); +} + + +//============================================================================= +#ifndef PRODUCT +void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + Compile *C = ra_->C; + int framesize = C->frame_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + st->print_cr("addi_d SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); + st->print("\t"); + st->print_cr("ld_d RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); + st->print("\t"); + st->print_cr("ld_d FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); + if( do_polling() && C->is_method_compilation() ) { + st->print("\t"); + st->print_cr("Poll Safepoint # MachEpilogNode"); + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile *C = ra_->C; + MacroAssembler _masm(&cbuf); + int framesize = C->frame_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + __ ld_d(RA, Address(SP, framesize - wordSize)); + __ ld_d(FP, Address(SP, framesize - wordSize * 2)); + if (Assembler::is_simm(framesize, 12)) { + __ addi_d(SP, SP, framesize); + } else { + __ li(AT, framesize); + __ add_d(SP, SP, AT); + } + + if( do_polling() && C->is_method_compilation() ) { + __ li(AT, (long)os::get_polling_page()); + __ relocate(relocInfo::poll_return_type); + __ ld_w(AT, AT, 0); + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); // too many variables; just compute it the hard way +} + +int MachEpilogNode::reloc() const { + return 0; // a large enough number +} + +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +int MachEpilogNode::safepoint_offset() const { return 0; } + +//============================================================================= + +#ifndef PRODUCT +void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("ADDI_D %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); +} +#endif + + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + + if (Assembler::is_simm(offset, 12)) + return 4; + else + return 3 * 4; +} + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + + if (Assembler::is_simm(offset, 12)) { + __ addi_d(as_Register(reg), SP, offset); + } else { + __ lu12i_w(AT, Assembler::split_low20(offset >> 12)); + __ ori(AT, AT, Assembler::split_low12(offset)); + __ add_d(as_Register(reg), SP, AT); + } +} + +int MachCallRuntimeNode::ret_addr_offset() { + // pcaddu18i + // jirl + return NativeFarCall::instruction_size; +} + + +//============================================================================= +#ifndef PRODUCT +void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { + st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); +} +#endif + +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { + MacroAssembler _masm(&cbuf); + int i = 0; + for(i = 0; i < _count; i++) + __ nop(); +} + +uint MachNopNode::size(PhaseRegAlloc *) const { + return 4 * _count; +} +const Pipeline* MachNopNode::pipeline() const { + return MachNode::pipeline_class(); +} + +//============================================================================= + +//============================================================================= +#ifndef PRODUCT +void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + st->print_cr("load_klass(T4, T0)"); + st->print_cr("\tbeq(T4, iCache, L)"); + st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); + st->print_cr(" L:"); +} +#endif + + +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + int ic_reg = Matcher::inline_cache_reg_encode(); + Label L; + Register receiver = T0; + Register iCache = as_Register(ic_reg); + + __ load_klass(T4, receiver); + __ beq(T4, iCache, L); + __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); + __ bind(L); +} + +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + + + +//============================================================================= + +const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); + +int Compile::ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + Compile* C = ra_->C; + Compile::ConstantTable& constant_table = C->constant_table(); + MacroAssembler _masm(&cbuf); + + Register Rtoc = as_Register(ra_->get_encode(this)); + CodeSection* consts_section = cbuf.consts(); + int consts_size = consts_section->align_at_start(consts_section->size()); + assert(constant_table.size() == consts_size, "must be equal"); + + if (consts_section->size()) { + assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS, + "insts must be immediately follow consts"); + // Materialize the constant table base. + address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset()); + jint offs = (baseaddr - __ pc()) >> 2; + guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset"); + __ pcaddi(Rtoc, offs); + } +} + +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { + // pcaddi + return 1 * BytesPerInstWord; +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + Register r = as_Register(ra_->get_encode(this)); + st->print("pcaddi %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); +} +#endif + + +//============================================================================= +#ifndef PRODUCT +void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + Compile* C = ra_->C; + + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(bangsize)) { + st->print_cr("# stack bang"); st->print("\t"); + } + st->print("st_d RA, %d(SP) @ MachPrologNode\n\t", -wordSize); + st->print("st_d FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); + st->print("addi_d FP, SP, -%d \n\t", wordSize*2); + st->print("addi_d SP, SP, -%d \t",framesize); +} +#endif + + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + +#ifdef ASSERT + address start = __ pc(); +#endif + + if (C->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + } + + if (Assembler::is_simm(-framesize, 12)) { + __ addi_d(SP, SP, -framesize); + } else { + __ li(AT, -framesize); + __ add_d(SP, SP, AT); + } + __ st_d(RA, Address(SP, framesize - wordSize)); + __ st_d(FP, Address(SP, framesize - wordSize * 2)); + if (Assembler::is_simm(framesize - wordSize * 2, 12)) { + __ addi_d(FP, SP, framesize - wordSize * 2); + } else { + __ li(AT, framesize - wordSize * 2); + __ add_d(FP, SP, AT); + } + + assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry"); + + C->set_frame_complete(cbuf.insts_size()); + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + Compile::ConstantTable& constant_table = C->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + + +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); // too many variables; just compute it the hard way +} + +int MachPrologNode::reloc() const { + return 0; // a large enough number +} + +%} + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to output +// byte streams. Encoding classes generate functions which are called by +// Machine Instruction Nodes in order to generate the bit encoding of the +// instruction. Operands specify their base encoding interface with the +// interface keyword. There are currently supported four interfaces, +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an +// operand to generate a function which returns its register number when +// queried. CONST_INTER causes an operand to generate a function which +// returns the value of the constant when queried. MEMORY_INTER causes an +// operand to generate four functions which return the Base Register, the +// Index Register, the Scale Value, and the Offset Value of the operand when +// queried. COND_INTER causes an operand to generate six functions which +// return the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional instruction. +// Instructions specify two basic values for encoding. They use the +// ins_encode keyword to specify their encoding class (which must be one of +// the class names specified in the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular instruction +// needs for encoding need to be specified. +encode %{ + + enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf + MacroAssembler _masm(&cbuf); + // This is the instruction starting address for relocation info. + __ block_comment("Java_To_Runtime"); + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_call((address)$meth$$method); + %} + + enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine + // who we intended to call. + MacroAssembler _masm(&cbuf); + address addr = (address)$meth$$method; + address call; + __ block_comment("Java_Static_Call"); + + if ( !_method ) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. + call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); + } else if(_optimized_virtual) { + call = __ trampoline_call(AddressLiteral(addr, relocInfo::opt_virtual_call_type), &cbuf); + } else { + call = __ trampoline_call(AddressLiteral(addr, relocInfo::static_call_type), &cbuf); + } + + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + + if( _method ) { // Emit stub for static call + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + %} + + + // + // [Ref: LIR_Assembler::ic_call() ] + // + enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL + MacroAssembler _masm(&cbuf); + __ block_comment("Java_Dynamic_Call"); + address call = __ ic_call((address)$meth$$method); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + %} + + + enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ + Register result = $result$$Register; + Register sub = $sub$$Register; + Register super = $super$$Register; + Register length = $tmp$$Register; + Register tmp = T4; + Label miss; + + // result may be the same as sub + // 47c B40: # B21 B41 <- B20 Freq: 0.155379 + // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 + // 4bc mov S2, NULL #@loadConP + // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 + // + MacroAssembler _masm(&cbuf); + Label done; + __ check_klass_subtype_slow_path(sub, super, length, tmp, + NULL, &miss, + /*set_cond_codes:*/ true); + // Refer to X86_64's RDI + __ move(result, 0); + __ b(done); + + __ bind(miss); + __ li(result, 1); + __ bind(done); + %} + +%} + + +//---------LOONGARCH FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. +// +// S T A C K L A Y O U T Allocators stack-slot number +// | (to get allocators register number +// G Owned by | | v add SharedInfo::stack0) +// r CALLER | | +// o | +--------+ pad to even-align allocators stack-slot +// w V | pad0 | numbers; owned by CALLER +// t -----------+--------+----> Matcher::_in_arg_limit, unaligned +// h ^ | in | 5 +// | | args | 4 Holes in incoming args owned by SELF +// | | old | | 3 +// | | SP-+--------+----> Matcher::_old_SP, even aligned +// v | | ret | 3 return address +// Owned by +--------+ +// Self | pad2 | 2 pad to align old SP +// | +--------+ 1 +// | | locks | 0 +// | +--------+----> SharedInfo::stack0, even aligned +// | | pad1 | 11 pad to align new SP +// | +--------+ +// | | | 10 +// | | spills | 9 spills +// V | | 8 (pad0 slot for callee) +// -----------+--------+----> Matcher::_out_arg_limit, unaligned +// ^ | out | 7 +// | | args | 6 Holes in outgoing args owned by CALLEE +// Owned by new | | +// Callee SP-+--------+----> Matcher::_new_SP, even aligned +// | | +// +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is +// known from SELF's arguments and the Java calling convention. +// Region 6-7 is determined per call site. +// Note 2: If the calling convention leaves holes in the incoming argument +// area, those holes are owned by SELF. Holes in the outgoing area +// are owned by the CALLEE. Holes should not be nessecary in the +// incoming area, as the Java calling convention is completely under +// the control of the AD file. Doubles can be sorted and packed to +// avoid holes. Holes in the outgoing arguments may be nessecary for +// varargs C calling conventions. +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is +// even aligned with pad0 as needed. +// Region 6 is even aligned. Region 6-7 is NOT even aligned; +// region 6-11 is even aligned; it may be padded out more so that +// the region from SP to FP meets the minimum stack alignment. +// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack +// alignment. Region 11, pad1, may be dynamically extended so that +// SP meets the minimum alignment. + + +frame %{ + + stack_direction(TOWARDS_LOW); + + // These two registers define part of the calling convention + // between compiled code and the interpreter. + // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention + // for more information. + + inline_cache_reg(T1); // Inline Cache Register + interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter + + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset32); + + // Number of stack slots consumed by locking an object + // generate Compile::sync_stack_slots + sync_stack_slots(2); + + frame_pointer(SP); + + // Interpreter stores its frame pointer in a register which is + // stored to the stack by I2CAdaptors. + // I2CAdaptors convert from interpreted java to compiled java. + + interpreter_frame_pointer(FP); + + // generate Matcher::stack_alignment + stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); + + // Number of stack slots between incoming argument block and the start of + // a new frame. The PROLOG must add this many slots to the stack. The + // EPILOG must remove this many slots. + in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp + + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + varargs_C_out_slots_killed(0); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + // Ret Addr is on stack in slot 0 if no locks or verification or alignment. + // Otherwise, it is above the locks and verification slot and alignment word + //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); + return_addr(REG RA); + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + + + // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) + // StartNode::calling_convention call this. + calling_convention %{ + SharedRuntime::java_calling_convention(sig_bt, regs, length, false); + %} + + + + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + + + // SEE CallRuntimeNode::calling_convention for more information. + c_calling_convention %{ + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); + %} + + + // Location of C & interpreter return values + // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. + // SEE Matcher::match. + c_return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ + static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; + return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); + %} + + // Location of return values + // register(s) contain(s) return value for Op_StartC2I and Op_Start. + // SEE Matcher::match. + + return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ + static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; + return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); + %} + +%} + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(0); // Required cost attribute + +//----------Instruction Attributes--------------------------------------------- +ins_attrib ins_cost(100); // Required cost attribute +ins_attrib ins_size(32); // Required size attribute (in bits) +ins_attrib ins_pc_relative(0); // Required PC Relative flag +ins_attrib ins_short_branch(0); // Required flag: is this instruction a + // non-matching short branch variant of some + // long branch? +ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) + // specifies the alignment that some part of the instruction (not + // necessarily the start) requires. If > 1, a compute_padding() + // function must be provided for the instruction + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +// Vectors + +operand vecX() %{ + constraint(ALLOC_IN_RC(vectorx_reg)); + match(VecX); + + format %{ %} + interface(REG_INTER); +%} + +operand vecY() %{ + constraint(ALLOC_IN_RC(vectory_reg)); + match(VecY); + + format %{ %} + interface(REG_INTER); +%} + +// Flags register, used as output of compare instructions +operand FlagsReg() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegFlags); + + format %{ "T0" %} + interface(REG_INTER); +%} + +//----------Simple Operands---------------------------------------------------- +// TODO: Should we need to define some more special immediate number ? +// Immediate Operands +// Integer Immediate +operand immI() %{ + match(ConI); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU1() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 1)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU2() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 3)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU3() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 7)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU4() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 15)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU5() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 31)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU6() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 63)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immIU8() %{ + predicate((0 <= n->get_int()) && (n->get_int() <= 255)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI10() %{ + predicate((-512 <= n->get_int()) && (n->get_int() <= 511)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI12() %{ + predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_M65536() %{ + predicate(n->get_int() == -65536); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for decrement +operand immI_M1() %{ + predicate(n->get_int() == -1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for zero +operand immI_0() %{ + predicate(n->get_int() == 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_1() %{ + predicate(n->get_int() == 1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_2() %{ + predicate(n->get_int() == 2); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_16() %{ + predicate(n->get_int() == 16); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_24() %{ + predicate(n->get_int() == 24); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +// Constant for long shifts +operand immI_32() %{ + predicate(n->get_int() == 32); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for byte-wide masking +operand immI_255() %{ + predicate(n->get_int() == 255); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_65535() %{ + predicate(n->get_int() == 65535); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_MaxI() %{ + predicate(n->get_int() == 2147483647); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_M2047_2048() %{ + predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048)); + match(ConI); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// Valid scale values for addressing modes +operand immI_0_3() %{ + predicate(0 <= n->get_int() && (n->get_int() <= 3)); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_0_31() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 31); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_0_4095() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 4095); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_1_4() %{ + predicate(1 <= n->get_int() && (n->get_int() <= 4)); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_32_63() %{ + predicate(n->get_int() >= 32 && n->get_int() <= 63); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_M128_255() %{ + predicate((-128 <= n->get_int()) && (n->get_int() <= 255)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Operand for non-negtive integer mask +operand immI_nonneg_mask() %{ + predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate +operand immL() %{ + match(ConL); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +operand immLU5() %{ + predicate((0 <= n->get_long()) && (n->get_long() <= 31)); + match(ConL); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immL10() %{ + predicate((-512 <= n->get_long()) && (n->get_long() <= 511)); + match(ConL); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immL12() %{ + predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047)); + match(ConL); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate 32-bit signed +operand immL32() +%{ + predicate(n->get_long() == (int)n->get_long()); + match(ConL); + + op_cost(15); + format %{ %} + interface(CONST_INTER); +%} + +// bit 3..6 zero +operand immL_M121() %{ + predicate(n->get_long() == -121L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 0..2 zero +operand immL_M8() %{ + predicate(n->get_long() == -8L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 1..2 zero +operand immL_M7() %{ + predicate(n->get_long() == -7L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 2 zero +operand immL_M5() %{ + predicate(n->get_long() == -5L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 0..1 zero +operand immL_M4() %{ + predicate(n->get_long() == -4L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate zero +operand immL_0() %{ + predicate(n->get_long() == 0L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_7() %{ + predicate(n->get_long() == 7L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_MaxUI() %{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(20); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_M2047_2048() %{ + predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048)); + match(ConL); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_0_4095() %{ + predicate(n->get_long() >= 0 && n->get_long() <= 4095); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Operand for non-negtive long mask +operand immL_nonneg_mask() %{ + predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immP() %{ + match(ConP); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immP_0() %{ + predicate(n->get_ptr() == 0); + match(ConP); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immP_no_oop_cheap() %{ + predicate(!n->bottom_type()->isa_oop_ptr()); + match(ConP); + + op_cost(5); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Pointer for polling page +operand immP_poll() %{ + predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); + match(ConP); + op_cost(5); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immN() %{ + match(ConN); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immN_0() %{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() %{ + match(ConNKlass); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// Single-precision floating-point immediate +operand immF() %{ + match(ConF); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Single-precision floating-point zero +operand immF_0() %{ + predicate(jint_cast(n->getf()) == 0); + match(ConF); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Double-precision floating-point immediate +operand immD() %{ + match(ConD); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Double-precision floating-point zero +operand immD_0() %{ + predicate(jlong_cast(n->getd()) == 0); + match(ConD); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Register Operands +// Integer Register +operand mRegI() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegI); + + format %{ %} + interface(REG_INTER); +%} + +operand no_Ax_mRegI() %{ + constraint(ALLOC_IN_RC(no_Ax_int_reg)); + match(RegI); + match(mRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand mS0RegI() %{ + constraint(ALLOC_IN_RC(s0_reg)); + match(RegI); + match(mRegI); + + format %{ "S0" %} + interface(REG_INTER); +%} + +operand mS1RegI() %{ + constraint(ALLOC_IN_RC(s1_reg)); + match(RegI); + match(mRegI); + + format %{ "S1" %} + interface(REG_INTER); +%} + +operand mS3RegI() %{ + constraint(ALLOC_IN_RC(s3_reg)); + match(RegI); + match(mRegI); + + format %{ "S3" %} + interface(REG_INTER); +%} + +operand mS4RegI() %{ + constraint(ALLOC_IN_RC(s4_reg)); + match(RegI); + match(mRegI); + + format %{ "S4" %} + interface(REG_INTER); +%} + +operand mS5RegI() %{ + constraint(ALLOC_IN_RC(s5_reg)); + match(RegI); + match(mRegI); + + format %{ "S5" %} + interface(REG_INTER); +%} + +operand mS6RegI() %{ + constraint(ALLOC_IN_RC(s6_reg)); + match(RegI); + match(mRegI); + + format %{ "S6" %} + interface(REG_INTER); +%} + +operand mS7RegI() %{ + constraint(ALLOC_IN_RC(s7_reg)); + match(RegI); + match(mRegI); + + format %{ "S7" %} + interface(REG_INTER); +%} + + +operand mT0RegI() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegI); + match(mRegI); + + format %{ "T0" %} + interface(REG_INTER); +%} + +operand mT1RegI() %{ + constraint(ALLOC_IN_RC(t1_reg)); + match(RegI); + match(mRegI); + + format %{ "T1" %} + interface(REG_INTER); +%} + +operand mT2RegI() %{ + constraint(ALLOC_IN_RC(t2_reg)); + match(RegI); + match(mRegI); + + format %{ "T2" %} + interface(REG_INTER); +%} + +operand mT3RegI() %{ + constraint(ALLOC_IN_RC(t3_reg)); + match(RegI); + match(mRegI); + + format %{ "T3" %} + interface(REG_INTER); +%} + +operand mT8RegI() %{ + constraint(ALLOC_IN_RC(t8_reg)); + match(RegI); + match(mRegI); + + format %{ "T8" %} + interface(REG_INTER); +%} + +operand mT4RegI() %{ + constraint(ALLOC_IN_RC(t4_reg)); + match(RegI); + match(mRegI); + + format %{ "T4" %} + interface(REG_INTER); +%} + +operand mA0RegI() %{ + constraint(ALLOC_IN_RC(a0_reg)); + match(RegI); + match(mRegI); + + format %{ "A0" %} + interface(REG_INTER); +%} + +operand mA1RegI() %{ + constraint(ALLOC_IN_RC(a1_reg)); + match(RegI); + match(mRegI); + + format %{ "A1" %} + interface(REG_INTER); +%} + +operand mA2RegI() %{ + constraint(ALLOC_IN_RC(a2_reg)); + match(RegI); + match(mRegI); + + format %{ "A2" %} + interface(REG_INTER); +%} + +operand mA3RegI() %{ + constraint(ALLOC_IN_RC(a3_reg)); + match(RegI); + match(mRegI); + + format %{ "A3" %} + interface(REG_INTER); +%} + +operand mA4RegI() %{ + constraint(ALLOC_IN_RC(a4_reg)); + match(RegI); + match(mRegI); + + format %{ "A4" %} + interface(REG_INTER); +%} + +operand mA5RegI() %{ + constraint(ALLOC_IN_RC(a5_reg)); + match(RegI); + match(mRegI); + + format %{ "A5" %} + interface(REG_INTER); +%} + +operand mA6RegI() %{ + constraint(ALLOC_IN_RC(a6_reg)); + match(RegI); + match(mRegI); + + format %{ "A6" %} + interface(REG_INTER); +%} + +operand mA7RegI() %{ + constraint(ALLOC_IN_RC(a7_reg)); + match(RegI); + match(mRegI); + + format %{ "A7" %} + interface(REG_INTER); +%} + +operand mRegN() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t0_RegN() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t1_RegN() %{ + constraint(ALLOC_IN_RC(t1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t3_RegN() %{ + constraint(ALLOC_IN_RC(t3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t8_RegN() %{ + constraint(ALLOC_IN_RC(t8_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a0_RegN() %{ + constraint(ALLOC_IN_RC(a0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a1_RegN() %{ + constraint(ALLOC_IN_RC(a1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a2_RegN() %{ + constraint(ALLOC_IN_RC(a2_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a3_RegN() %{ + constraint(ALLOC_IN_RC(a3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a4_RegN() %{ + constraint(ALLOC_IN_RC(a4_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a5_RegN() %{ + constraint(ALLOC_IN_RC(a5_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a6_RegN() %{ + constraint(ALLOC_IN_RC(a6_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a7_RegN() %{ + constraint(ALLOC_IN_RC(a7_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s0_RegN() %{ + constraint(ALLOC_IN_RC(s0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s1_RegN() %{ + constraint(ALLOC_IN_RC(s1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s2_RegN() %{ + constraint(ALLOC_IN_RC(s2_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s3_RegN() %{ + constraint(ALLOC_IN_RC(s3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s4_RegN() %{ + constraint(ALLOC_IN_RC(s4_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s5_RegN() %{ + constraint(ALLOC_IN_RC(s5_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s6_RegN() %{ + constraint(ALLOC_IN_RC(s6_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s7_RegN() %{ + constraint(ALLOC_IN_RC(s7_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register +operand mRegP() %{ + constraint(ALLOC_IN_RC(p_reg)); + match(RegP); + match(a0_RegP); + + format %{ %} + interface(REG_INTER); +%} + +operand no_T8_mRegP() %{ + constraint(ALLOC_IN_RC(no_T8_p_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand no_Ax_mRegP() %{ + constraint(ALLOC_IN_RC(no_Ax_p_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s1_RegP() +%{ + constraint(ALLOC_IN_RC(s1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s3_RegP() +%{ + constraint(ALLOC_IN_RC(s3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s4_RegP() +%{ + constraint(ALLOC_IN_RC(s4_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s5_RegP() +%{ + constraint(ALLOC_IN_RC(s5_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s6_RegP() +%{ + constraint(ALLOC_IN_RC(s6_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s7_RegP() +%{ + constraint(ALLOC_IN_RC(s7_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t0_RegP() +%{ + constraint(ALLOC_IN_RC(t0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t1_RegP() +%{ + constraint(ALLOC_IN_RC(t1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t2_RegP() +%{ + constraint(ALLOC_IN_RC(t2_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t3_RegP() +%{ + constraint(ALLOC_IN_RC(t3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t8_RegP() +%{ + constraint(ALLOC_IN_RC(t8_long_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a0_RegP() +%{ + constraint(ALLOC_IN_RC(a0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a1_RegP() +%{ + constraint(ALLOC_IN_RC(a1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a2_RegP() +%{ + constraint(ALLOC_IN_RC(a2_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a3_RegP() +%{ + constraint(ALLOC_IN_RC(a3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a4_RegP() +%{ + constraint(ALLOC_IN_RC(a4_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + + +operand a5_RegP() +%{ + constraint(ALLOC_IN_RC(a5_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a6_RegP() +%{ + constraint(ALLOC_IN_RC(a6_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a7_RegP() +%{ + constraint(ALLOC_IN_RC(a7_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand v0_RegP() +%{ + constraint(ALLOC_IN_RC(v0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand v1_RegP() +%{ + constraint(ALLOC_IN_RC(v1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand mRegL() %{ + constraint(ALLOC_IN_RC(long_reg)); + match(RegL); + + format %{ %} + interface(REG_INTER); +%} + +operand mRegI2L(mRegI reg) %{ + match(ConvI2L reg); + + format %{ %} + interface(REG_INTER); +%} + +operand v0RegL() %{ + constraint(ALLOC_IN_RC(v0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand v1RegL() %{ + constraint(ALLOC_IN_RC(v1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a0RegL() %{ + constraint(ALLOC_IN_RC(a0_long_reg)); + match(RegL); + match(mRegL); + + format %{ "A0" %} + interface(REG_INTER); +%} + +operand a1RegL() %{ + constraint(ALLOC_IN_RC(a1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a2RegL() %{ + constraint(ALLOC_IN_RC(a2_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a3RegL() %{ + constraint(ALLOC_IN_RC(a3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t0RegL() %{ + constraint(ALLOC_IN_RC(t0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t1RegL() %{ + constraint(ALLOC_IN_RC(t1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t3RegL() %{ + constraint(ALLOC_IN_RC(t3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t8RegL() %{ + constraint(ALLOC_IN_RC(t8_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a4RegL() %{ + constraint(ALLOC_IN_RC(a4_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a5RegL() %{ + constraint(ALLOC_IN_RC(a5_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a6RegL() %{ + constraint(ALLOC_IN_RC(a6_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a7RegL() %{ + constraint(ALLOC_IN_RC(a7_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s0RegL() %{ + constraint(ALLOC_IN_RC(s0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s1RegL() %{ + constraint(ALLOC_IN_RC(s1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s3RegL() %{ + constraint(ALLOC_IN_RC(s3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s4RegL() %{ + constraint(ALLOC_IN_RC(s4_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s7RegL() %{ + constraint(ALLOC_IN_RC(s7_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +// Floating register operands +operand regF() %{ + constraint(ALLOC_IN_RC(flt_reg)); + match(RegF); + + format %{ %} + interface(REG_INTER); +%} + +//Double Precision Floating register operands +operand regD() %{ + constraint(ALLOC_IN_RC(dbl_reg)); + match(RegD); + + format %{ %} + interface(REG_INTER); +%} + +//----------Memory Operands---------------------------------------------------- +// Indirect Memory Operand +operand indirect(mRegP reg) %{ + constraint(ALLOC_IN_RC(p_reg)); + match(reg); + + format %{ "[$reg] @ indirect" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Plus Short Offset Operand +operand indOffset12(mRegP reg, immL12 off) +%{ + constraint(ALLOC_IN_RC(p_reg)); + match(AddP reg off); + + op_cost(10); + format %{ "[$reg + $off (12-bit)] @ indOffset12" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp($off); + %} +%} + +operand indOffset12I2L(mRegP reg, immI12 off) +%{ + constraint(ALLOC_IN_RC(p_reg)); + match(AddP reg (ConvI2L off)); + + op_cost(10); + format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Plus Index Register +operand indIndex(mRegP addr, mRegL index) %{ + constraint(ALLOC_IN_RC(p_reg)); + match(AddP addr index); + + op_cost(20); + format %{"[$addr + $index] @ indIndex" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale(0x0); + disp(0x0); + %} +%} + +operand indIndexI2L(mRegP reg, mRegI ireg) +%{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP reg (ConvI2L ireg)); + op_cost(10); + format %{ "[$reg + $ireg] @ indIndexI2L" %} + interface(MEMORY_INTER) %{ + base($reg); + index($ireg); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Operand +operand indirectNarrow(mRegN reg) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(DecodeN reg); + + format %{ "[$reg] @ indirectNarrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Plus Short Offset Operand +operand indOffset12Narrow(mRegN reg, immL12 off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(AddP (DecodeN reg) off); + + format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($off); + %} +%} + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below, such as cmpOpU. + +// Comparision Code +operand cmpOp() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x01); + not_equal(0x02); + greater(0x03); + greater_equal(0x04); + less(0x05); + less_equal(0x06); + overflow(0x7); + no_overflow(0x8); + %} +%} + + +// Comparision Code +// Comparison Code, unsigned compare. Used by FP also, with +// C2 (unordered) turned into GT or LT already. The other bits +// C0 and C3 are turned into Carry & Zero flags. +operand cmpOpU() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x01); + not_equal(0x02); + greater(0x03); + greater_equal(0x04); + less(0x05); + less_equal(0x06); + overflow(0x7); + no_overflow(0x8); + %} +%} + + +//----------Special Memory Operands-------------------------------------------- +// Stack Slot Operand - This operand is used for loading and storing temporary +// values on the stack where a match requires a value to +// flow through memory. +operand stackSlotP(sRegP reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotI(sRegI reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotF(sRegF reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotD(sRegD reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotL(sRegL reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + + +//------------------------OPERAND CLASSES-------------------------------------- +opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L, + indirectNarrow, indOffset12Narrow); +opclass memory_loadRange(indOffset12, indirect); + +opclass mRegLorI2L(mRegI2L, mRegL); +//----------PIPELINE----------------------------------------------------------- +// Rules which define the behavior of the target architectures pipeline. + +pipeline %{ + + //----------ATTRIBUTES--------------------------------------------------------- + attributes %{ + fixed_size_instructions; // Fixed size instructions + max_instructions_per_bundle = 1; // 1 instruction per bundle + max_bundles_per_cycle = 4; // Up to 4 bundles per cycle + bundle_unit_size=4; + instruction_unit_size = 4; // An instruction is 4 bytes long + instruction_fetch_unit_size = 16; // The processor fetches one line + instruction_fetch_units = 1; // of 16 bytes + + // List of nop instructions + nops( MachNop ); + %} + + //----------RESOURCES---------------------------------------------------------- + // Resources are the functional units available to the machine + + resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); + + //----------PIPELINE DESCRIPTION----------------------------------------------- + // Pipeline Description specifies the stages in the machine's pipeline + + // IF: fetch + // ID: decode + // RD: read + // CA: caculate + // WB: write back + // CM: commit + + pipe_desc(IF, ID, RD, CA, WB, CM); + + + //----------PIPELINE CLASSES--------------------------------------------------- + // Pipeline Classes describe the stages in which input and output are + // referenced by the hardware pipeline. + + //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 + pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ + single_instruction; + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+1; + DECODE : ID; + ALU : CA; + %} + + //No.19 Integer mult operation : dst <-- reg1 mult reg2 + pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+5; + DECODE : ID; + ALU2 : CA; + %} + + pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.19 Integer div operation : dst <-- reg1 div reg2 + pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.19 Integer mod operation : dst <-- reg1 mod reg2 + pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ + instruction_count(2); + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 + pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ + instruction_count(2); + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 + pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ + instruction_count(2); + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //no.16 load Long from memory : + pipe_class ialu_loadL(mRegL dst, memory mem) %{ + instruction_count(2); + mem : RD(read); + dst : WB(write)+5; + DECODE : ID; + MEM : RD; + %} + + //No.17 Store Long to Memory : + pipe_class ialu_storeL(mRegL src, memory mem) %{ + instruction_count(2); + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 + pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ + single_instruction; + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.3 Integer move operation : dst <-- reg + pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.4 No instructions : do nothing + pipe_class empty( ) %{ + instruction_count(0); + %} + + //No.5 UnConditional branch : + pipe_class pipe_jump( label labl ) %{ + multiple_bundles; + DECODE : ID; + BR : RD; + %} + + //No.6 ALU Conditional branch : + pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + DECODE : ID; + BR : RD; + %} + + //no.7 load integer from memory : + pipe_class ialu_loadI(mRegI dst, memory mem) %{ + mem : RD(read); + dst : WB(write)+3; + DECODE : ID; + MEM : RD; + %} + + //No.8 Store Integer to Memory : + pipe_class ialu_storeI(mRegI src, memory mem) %{ + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + + //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 + pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU : CA; + %} + + //No.22 Floating div operation : dst <-- reg1 div reg2 + pipe_class fpu_div(regF dst, regF src1, regF src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU2 : CA; + %} + + pipe_class fcvt_I2D(regD dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU1 : CA; + %} + + pipe_class fcvt_D2I(mRegI dst, regD src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU1 : CA; + %} + + pipe_class pipe_mfc1(mRegI dst, regD src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + MEM : RD; + %} + + pipe_class pipe_mtc1(regD dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + MEM : RD(5); + %} + + //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 + pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU2 : CA; + %} + + //No.11 Load Floating from Memory : + pipe_class fpu_loadF(regF dst, memory mem) %{ + instruction_count(1); + mem : RD(read); + dst : WB(write)+3; + DECODE : ID; + MEM : RD; + %} + + //No.12 Store Floating to Memory : + pipe_class fpu_storeF(regF src, memory mem) %{ + instruction_count(1); + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + //No.13 FPU Conditional branch : + pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + DECODE : ID; + BR : RD; + %} + +//No.14 Floating FPU reg operation : dst <-- op reg + pipe_class fpu1_regF(regF dst, regF src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU : CA; + %} + + pipe_class long_memory_op() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(30); + %} + + pipe_class simple_call() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(200); + BR : RD; + %} + + pipe_class call() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(200); + %} + + //FIXME: + //No.9 Piple slow : for multi-instructions + pipe_class pipe_slow( ) %{ + instruction_count(20); + force_serialization; + multiple_bundles; + fixed_latency(50); + %} + +%} + + + +//----------INSTRUCTIONS------------------------------------------------------- +// +// match -- States which machine-independent subtree may be replaced +// by this instruction. +// ins_cost -- The estimated cost of this instruction is used by instruction +// selection to identify a minimum cost tree of machine +// instructions that matches a tree of machine-independent +// instructions. +// format -- A string providing the disassembly for this instruction. +// The value of an instruction's operand may be inserted +// by referring to it with a '$' prefix. +// opcode -- Three instruction opcodes may be provided. These are referred +// to within an encode class as $primary, $secondary, and $tertiary +// respectively. The primary opcode is commonly used to +// indicate the type of machine instruction, while secondary +// and tertiary are often used for prefix options or addressing +// modes. +// ins_encode -- A list of encode classes with parameters. The encode class +// name must have been defined in an 'enc_class' specification +// in the encode section of the architecture description. + + +// Load Integer +instruct loadI(mRegI dst, memory mem) %{ + match(Set dst (LoadI mem)); + + ins_cost(125); + format %{ "ld_w $dst, $mem #@loadI" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadI_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + + ins_cost(125); + format %{ "ld_w $dst, $mem #@loadI_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Integer (32 bit signed) to Byte (8 bit signed) +instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "ld_b $dst, $mem\t# int -> byte #@loadI2B" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) +instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + + ins_cost(125); + format %{ "ld_bu $dst, $mem\t# int -> ubyte #@loadI2UB" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Short (16 bit signed) +instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); + + ins_cost(125); + format %{ "ld_h $dst, $mem\t# int -> short #@loadI2S" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); + %} + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) +instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + + ins_cost(125); + format %{ "ld_hu $dst, $mem\t# int -> ushort/char #@loadI2US" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); + %} + ins_pipe(ialu_loadI); +%} + +// Load Long. +instruct loadL(mRegL dst, memory mem) %{ +// predicate(!((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + + ins_cost(250); + format %{ "ld_d $dst, $mem #@loadL" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadL ); +%} + +// Load Long - UNaligned +instruct loadL_unaligned(mRegL dst, memory mem) %{ + match(Set dst (LoadL_unaligned mem)); + + // FIXME: Need more effective ldl/ldr + ins_cost(450); + format %{ "ld_d $dst, $mem #@loadL_unaligned\n\t" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadL ); +%} + +// Store Long +instruct storeL_reg(memory mem, mRegL src) %{ + match(Set mem (StoreL mem src)); + + ins_cost(200); + format %{ "st_d $mem, $src #@storeL_reg\n" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeL ); +%} + +instruct storeL_immL_0(memory mem, immL_0 zero) %{ + match(Set mem (StoreL mem zero)); + + ins_cost(180); + format %{ "st_d zero, $mem #@storeL_immL_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeL ); +%} + +// Load Compressed Pointer +instruct loadN(mRegN dst, memory mem) +%{ + match(Set dst (LoadN mem)); + + ins_cost(125); // XXX + format %{ "ld_wu $dst, $mem\t# compressed ptr @ loadN" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +instruct loadN2P(mRegP dst, memory mem) +%{ + match(Set dst (DecodeN (LoadN mem))); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + + ins_cost(125); // XXX + format %{ "ld_wu $dst, $mem\t# @ loadN2P" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +// Load Pointer +instruct loadP(mRegP dst, memory mem) %{ + match(Set dst (LoadP mem)); + + ins_cost(125); + format %{ "ld_d $dst, $mem #@loadP" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Klass Pointer +instruct loadKlass(mRegP dst, memory mem) %{ + match(Set dst (LoadKlass mem)); + + ins_cost(125); + format %{ "MOV $dst,$mem @ loadKlass" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); + %} + ins_pipe( ialu_loadI ); +%} + +// Load narrow Klass Pointer +instruct loadNKlass(mRegN dst, memory mem) +%{ + match(Set dst (LoadNKlass mem)); + + ins_cost(125); // XXX + format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +instruct loadN2PKlass(mRegP dst, memory mem) +%{ + match(Set dst (DecodeNKlass (LoadNKlass mem))); + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + + ins_cost(125); // XXX + format %{ "ld_wu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe( ialu_loadI ); // XXX +%} + +// Load Constant +instruct loadConI(mRegI dst, immI src) %{ + match(Set dst src); + + ins_cost(120); + format %{ "mov $dst, $src #@loadConI" %} + ins_encode %{ + Register dst = $dst$$Register; + int value = $src$$constant; + __ li(dst, value); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct loadConL(mRegL dst, immL src) %{ + match(Set dst src); + ins_cost(120); + format %{ "li $dst, $src @ loadConL" %} + ins_encode %{ + __ li($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_regL_regL); +%} + +// Load Range +instruct loadRange(mRegI dst, memory_loadRange mem) %{ + match(Set dst (LoadRange mem)); + + ins_cost(125); + format %{ "MOV $dst,$mem @ loadRange" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); + %} + ins_pipe( ialu_loadI ); +%} + + +instruct storeP(memory mem, mRegP src ) %{ + match(Set mem (StoreP mem src)); + + ins_cost(125); + format %{ "st_d $src, $mem #@storeP" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeI ); +%} + +// Store NULL Pointer, mark word, or other simple pointer constant. +instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ + match(Set mem (StoreP mem zero)); + + ins_cost(125); + format %{ "mov $mem, $zero #@storeImmP_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeI ); +%} + +// Store Compressed Pointer +instruct storeN(memory mem, mRegN src) +%{ + match(Set mem (StoreN mem src)); + + ins_cost(125); // XXX + format %{ "st_w $mem, $src\t# compressed ptr @ storeN" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeP2N(memory mem, mRegP src) +%{ + match(Set mem (StoreN mem (EncodeP src))); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + + ins_cost(125); // XXX + format %{ "st_w $mem, $src\t# @ storeP2N" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeNKlass(memory mem, mRegN src) +%{ + match(Set mem (StoreNKlass mem src)); + + ins_cost(125); // XXX + format %{ "st_w $mem, $src\t# compressed klass ptr @ storeNKlass" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeP2NKlass(memory mem, mRegP src) +%{ + match(Set mem (StoreNKlass mem (EncodePKlass src))); + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + + ins_cost(125); // XXX + format %{ "st_w $mem, $src\t# @ storeP2NKlass" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeImmN_immN_0(memory mem, immN_0 zero) +%{ + match(Set mem (StoreN mem zero)); + + ins_cost(125); // XXX + format %{ "storeN0 zero, $mem\t# compressed ptr" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +// Store Byte +instruct storeB_immB_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreB mem zero)); + + format %{ "mov $mem, zero #@storeB_immB_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeB(memory mem, mRegI src) %{ + match(Set mem (StoreB mem src)); + + ins_cost(125); + format %{ "st_b $src, $mem #@storeB" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeB_convL2I(memory mem, mRegL src) %{ + match(Set mem (StoreB mem (ConvL2I src))); + + ins_cost(125); + format %{ "st_b $src, $mem #@storeB_convL2I" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +// Load Byte (8bit signed) +instruct loadB(mRegI dst, memory mem) %{ + match(Set dst (LoadB mem)); + + ins_cost(125); + format %{ "ld_b $dst, $mem #@loadB" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadB_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadB mem))); + + ins_cost(125); + format %{ "ld_b $dst, $mem #@loadB_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Byte (8bit UNsigned) +instruct loadUB(mRegI dst, memory mem) %{ + match(Set dst (LoadUB mem)); + + ins_cost(125); + format %{ "ld_bu $dst, $mem #@loadUB" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadUB_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + + ins_cost(125); + format %{ "ld_bu $dst, $mem #@loadUB_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Short (16bit signed) +instruct loadS(mRegI dst, memory mem) %{ + match(Set dst (LoadS mem)); + + ins_cost(125); + format %{ "ld_h $dst, $mem #@loadS" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Short (16 bit signed) to Byte (8 bit signed) +instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); + %} + ins_pipe(ialu_loadI); +%} + +instruct loadS_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadS mem))); + + ins_cost(125); + format %{ "ld_h $dst, $mem #@loadS_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +// Store Integer Immediate +instruct storeI_immI_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreI mem zero)); + + format %{ "mov $mem, zero #@storeI_immI_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +// Store Integer +instruct storeI(memory mem, mRegI src) %{ + match(Set mem (StoreI mem src)); + + ins_cost(125); + format %{ "st_w $mem, $src #@storeI" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeI_convL2I(memory mem, mRegL src) %{ + match(Set mem (StoreI mem (ConvL2I src))); + + ins_cost(125); + format %{ "st_w $mem, $src #@storeI_convL2I" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +// Load Float +instruct loadF(regF dst, memory mem) %{ + match(Set dst (LoadF mem)); + + ins_cost(150); + format %{ "loadF $dst, $mem #@loadF" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadConP_general(mRegP dst, immP src) %{ + match(Set dst src); + + ins_cost(120); + format %{ "li $dst, $src #@loadConP_general" %} + + ins_encode %{ + Register dst = $dst$$Register; + long* value = (long*)$src$$constant; + + if($src->constant_reloc() == relocInfo::metadata_type){ + int klass_index = __ oop_recorder()->find_index((Klass*)value); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + + __ relocate(rspec); + __ patchable_li52(dst, (long)value); + } else if($src->constant_reloc() == relocInfo::oop_type){ + int oop_index = __ oop_recorder()->find_index((jobject)value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + __ relocate(rspec); + __ patchable_li52(dst, (long)value); + } else if ($src->constant_reloc() == relocInfo::none) { + __ li(dst, (long)value); + } + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ + match(Set dst src); + + ins_cost(80); + format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} + + ins_encode %{ + if ($src->constant_reloc() == relocInfo::metadata_type) { + __ mov_metadata($dst$$Register, (Metadata*)$src$$constant); + } else { + __ li($dst$$Register, $src$$constant); + } + %} + + ins_pipe(ialu_regI_regI); +%} + + +instruct loadConP_poll(mRegP dst, immP_poll src) %{ + match(Set dst src); + + ins_cost(50); + format %{ "li $dst, $src #@loadConP_poll" %} + + ins_encode %{ + Register dst = $dst$$Register; + intptr_t value = (intptr_t)$src$$constant; + + __ li(dst, (jlong)value); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConP_immP_0(mRegP dst, immP_0 src) +%{ + match(Set dst src); + + ins_cost(50); + format %{ "mov $dst, R0\t# ptr" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + __ add_d(dst_reg, R0, R0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ + match(Set dst src); + format %{ "move $dst, R0\t# compressed NULL ptr" %} + ins_encode %{ + __ move($dst$$Register, R0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConN(mRegN dst, immN src) %{ + match(Set dst src); + + ins_cost(125); + format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} + ins_encode %{ + Register dst = $dst$$Register; + __ set_narrow_oop(dst, (jobject)$src$$constant); + %} + ins_pipe( ialu_regI_regI ); // XXX +%} + +instruct loadConNKlass(mRegN dst, immNKlass src) %{ + match(Set dst src); + + ins_cost(125); + format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} + ins_encode %{ + Register dst = $dst$$Register; + __ set_narrow_klass(dst, (Klass*)$src$$constant); + %} + ins_pipe( ialu_regI_regI ); // XXX +%} + +//FIXME +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ + match(TailCall jump_target method_oop ); + ins_cost(300); + format %{ "JMP $jump_target \t# @TailCalljmpInd" %} + + ins_encode %{ + Register target = $jump_target$$Register; + Register oop = $method_oop$$Register; + + // RA will be used in generate_forward_exception() + __ push(RA); + + __ move(S3, oop); + __ jr(target); + %} + + ins_pipe( pipe_jump ); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException( a0_RegP ex_oop ) +%{ + match(Set ex_oop (CreateEx)); + + // use the following format syntax + format %{ "# exception oop is in A0; no code emitted @CreateException" %} + ins_encode %{ + // X86 leaves this function empty + __ block_comment("CreateException is empty in LA"); + %} + ins_pipe( empty ); +// ins_pipe( pipe_jump ); +%} + + +/* The mechanism of exception handling is clear now. + +- Common try/catch: + [stubGenerator_loongarch.cpp] generate_forward_exception() + |- V0, V1 are created + |- T4 <= SharedRuntime::exception_handler_for_return_address + `- jr T4 + `- the caller's exception_handler + `- jr OptoRuntime::exception_blob + `- here +- Rethrow(e.g. 'unwind'): + * The callee: + |- an exception is triggered during execution + `- exits the callee method through RethrowException node + |- The callee pushes exception_oop(T0) and exception_pc(RA) + `- The callee jumps to OptoRuntime::rethrow_stub() + * In OptoRuntime::rethrow_stub: + |- The VM calls _rethrow_Java to determine the return address in the caller method + `- exits the stub with tailjmpInd + |- pops exception_oop(V0) and exception_pc(V1) + `- jumps to the return address(usually an exception_handler) + * The caller: + `- continues processing the exception_blob with V0/V1 +*/ + +// Rethrow exception: +// The exception oop will come in the first argument position. +// Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() +%{ + match(Rethrow); + + // use the following format syntax + format %{ "JMP rethrow_stub #@RethrowException" %} + ins_encode %{ + __ block_comment("@ RethrowException"); + + cbuf.set_insts_mark(); + cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); + + // call OptoRuntime::rethrow_stub to get the exception handler in parent method + __ patchable_jump((address)OptoRuntime::rethrow_stub()); + %} + ins_pipe( pipe_jump ); +%} + +// ============================================================================ +// Branch Instructions --- long offset versions + +// Jump Direct +instruct jmpDir_long(label labl) %{ + match(Goto); + effect(USE labl); + + ins_cost(300); + format %{ "JMP $labl #@jmpDir_long" %} + + ins_encode %{ + Label* L = $labl$$label; + __ jmp_far(*L); + %} + + ins_pipe( pipe_jump ); + //ins_pc_relative(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cop$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ blt_long(op2, op1, *L, true /* signed */); + break; + case 0x04: //above_equal + __ bge_long(op1, op2, *L, true /* signed */); + break; + case 0x05: //below + __ blt_long(op1, op2, *L, true /* signed */); + break; + case 0x06: //below_equal + __ bge_long(op2, op1, *L, true /* signed */); + break; + default: + Unimplemented(); + } + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + +instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = AT; + Label* L = $labl$$label; + int flag = $cop$$cmpcode; + + __ li(op2, $src2$$constant); + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ blt_long(op2, op1, *L, true /* signed */); + break; + case 0x04: //above_equal + __ bge_long(op1, op2, *L, true /* signed */); + break; + case 0x05: //below + __ blt_long(op1, op2, *L, true /* signed */); + break; + case 0x06: //below_equal + __ bge_long(op2, op1, *L, true /* signed */); + break; + default: + Unimplemented(); + } + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + + +// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! +instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{ + match(If cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %} + + ins_encode %{ + Label* L = $labl$$label; + switch($cop$$cmpcode) { + case 0x01: //equal + __ bne_long($cr$$Register, R0, *L); + break; + case 0x02: //not equal + __ beq_long($cr$$Register, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + +// Conditional jumps +instruct branchConP_0_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP op1 zero)); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConP_0_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConN2P_0_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP (DecodeN op1) zero)); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) + { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ + match(If cmp (CmpP op1 op2)); +// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + ins_cost(200); + format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ blt_long(op2, op1, *L, false /* unsigned */); + break; + case 0x04: //above_equal + __ bge_long(op1, op2, *L, false /* unsigned */); + break; + case 0x05: //below + __ blt_long(op1, op2, *L, false /* unsigned */); + break; + case 0x06: //below_equal + __ bge_long(op2, op1, *L, false /* unsigned */); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_null_branch_long" %} + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + default: + Unimplemented(); + } + %} +//TODO: pipe_branchP or create pipe_branchN LEE + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_reg_branch_long" %} + ins_encode %{ + Register op1_reg = $op1$$Register; + Register op2_reg = $op2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1_reg, op2_reg, *L); + break; + case 0x02: //not_equal + __ bne_long(op1_reg, op2_reg, *L); + break; + case 0x03: //above + __ blt_long(op2_reg, op1_reg, *L, false /* unsigned */); + break; + case 0x04: //above_equal + __ bge_long(op1_reg, op2_reg, *L, false /* unsigned */); + break; + case 0x05: //below + __ blt_long(op1_reg, op2_reg, *L, false /* unsigned */); + break; + case 0x06: //below_equal + __ bge_long(op2_reg, op1_reg, *L, false /* unsigned */); + break; + default: + Unimplemented(); + } + %} + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ blt_long(op2, op1, *L, false /* unsigned */); + break; + case 0x04: //above_equal + __ bge_long(op1, op2, *L, false /* unsigned */); + break; + case 0x05: //below + __ blt_long(op1, op2, *L, false /* unsigned */); + break; + case 0x06: //below_equal + __ bge_long(op2, op1, *L, false /* unsigned */); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ li(AT, val); + switch(flag) { + case 0x01: //equal + __ beq_long(op1, AT, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, AT, *L); + break; + case 0x03: //above + __ blt_long(AT, op1, *L, false /* unsigned */); + break; + case 0x04: //above_equal + __ bge_long(op1, AT, *L, false /* unsigned */); + break; + case 0x05: //below + __ blt_long(op1, AT, *L, false /* unsigned */); + break; + case 0x06: //below_equal + __ bge_long(AT, op1, *L, false /* unsigned */); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ blt_long(op2, op1, *L, true /* signed */); + break; + case 0x04: //above_equal + __ bge_long(op1, op2, *L, true /* signed */); + break; + case 0x05: //below + __ blt_long(op1, op2, *L, true /* signed */); + break; + case 0x06: //below_equal + __ bge_long(op2, op1, *L, true /* signed */); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(170); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, R0, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, R0, *L); + break; + case 0x03: //greater + __ blt_long(R0, op1, *L, true /* signed */); + break; + case 0x04: //greater_equal + __ bge_long(op1, R0, *L, true /* signed */); + break; + case 0x05: //less + __ blt_long(op1, R0, *L, true /* signed */); + break; + case 0x06: //less_equal + __ bge_long(R0, op1, *L, true /* signed */); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(200); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ li(AT, val); + switch(flag) { + case 0x01: //equal + __ beq_long(op1, AT, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, AT, *L); + break; + case 0x03: //greater + __ blt_long(AT, op1, *L, true /* signed */); + break; + case 0x04: //greater_equal + __ bge_long(op1, AT, *L, true /* signed */); + break; + case 0x05: //less + __ blt_long(op1, AT, *L, true /* signed */); + break; + case 0x06: //less_equal + __ bge_long(AT, op1, *L, true /* signed */); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ + match( If cmp (CmpU src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, R0, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, R0, *L); + break; + case 0x03: //above + __ bne_long(R0, op1, *L); + break; + case 0x04: //above_equal + __ beq_long(R0, R0, *L); + break; + case 0x05: //below + return; + break; + case 0x06: //below_equal + __ beq_long(op1, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */); + break; + + case 0x04: //greater_equal + __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */); + break; + + case 0x05: //less + __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */); + break; + + case 0x06: //less_equal + __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ + match( If cmp (CmpUL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */); + break; + + case 0x04: //greater_equal + __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */); + break; + + case 0x05: //less + __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */); + break; + + case 0x06: //less_equal + __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match( If cmp (CmpL src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_long" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = R0; + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */); + break; + + case 0x04: //greater_equal + __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */); + break; + + case 0x05: //less + __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */); + break; + + case 0x06: //less_equal + __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match( If cmp (CmpUL src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_long" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = R0; + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */); + break; + + case 0x04: //greater_equal + __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */); + break; + + case 0x05: //less + __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */); + break; + + case 0x06: //less_equal + __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + __ li(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */); + break; + + case 0x04: //greater_equal + __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */); + break; + + case 0x05: //less + __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */); + break; + + case 0x06: //less_equal + __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match( If cmp (CmpUL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + __ li(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */); + break; + + case 0x04: //greater_equal + __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */); + break; + + case 0x05: //less + __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */); + break; + + case 0x06: //less_equal + __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +//FIXME +instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ + match( If cmp (CmpF src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x02: //not_equal + __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x03: //greater + __ fcmp_cule_s(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x04: //greater_equal + __ fcmp_cult_s(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x05: //less + __ fcmp_cult_s(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x06: //less_equal + __ fcmp_cule_s(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_slow); +%} + +instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ + match( If cmp (CmpD src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x02: //not_equal + // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. + __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x03: //greater + __ fcmp_cule_d(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x04: //greater_equal + __ fcmp_cult_d(FCC0, reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x05: //less + __ fcmp_cult_d(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x06: //less_equal + __ fcmp_cule_d(FCC0, reg_op1, reg_op2); + __ bc1t_long(*L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_slow); +%} + + +// ============================================================================ +// Branch Instructions -- short offset versions + +// Jump Direct +instruct jmpDir_short(label labl) %{ + match(Goto); + effect(USE labl); + + ins_cost(300); + format %{ "JMP $labl #@jmpDir_short" %} + + ins_encode %{ + Label &L = *($labl$$label); + if(&L) + __ b(L); + else + __ b(int(0)); + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cop$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + if (&L) + __ blt(op2, op1, L); + else + __ blt(op2, op1, (int)0); + break; + case 0x04: //above_equal + if (&L) + __ bge(op1, op2, L); + else + __ bge(op1, op2, (int)0); + break; + case 0x05: //below + if (&L) + __ blt(op1, op2, L); + else + __ blt(op1, op2, (int)0); + break; + case 0x06: //below_equal + if (&L) + __ bge(op2, op1, L); + else + __ bge(op2, op1, (int)0); + break; + default: + Unimplemented(); + } + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = AT; + Label &L = *($labl$$label); + int flag = $cop$$cmpcode; + + __ li(op2, $src2$$constant); + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + if (&L) + __ blt(op2, op1, L); + else + __ blt(op2, op1, (int)0); + break; + case 0x04: //above_equal + if (&L) + __ bge(op1, op2, L); + else + __ bge(op1, op2, (int)0); + break; + case 0x05: //below + if (&L) + __ blt(op1, op2, L); + else + __ blt(op1, op2, (int)0); + break; + case 0x06: //below_equal + if (&L) + __ bge(op2, op1, L); + else + __ bge(op2, op1, (int)0); + break; + default: + Unimplemented(); + } + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + + +// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! +instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{ + match(If cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %} + + ins_encode %{ + Label &L = *($labl$$label); + switch($cop$$cmpcode) { + case 0x01: //equal + if (&L) + __ bnez($cr$$Register, L); + else + __ bnez($cr$$Register, (int)0); + break; + case 0x02: //not equal + if (&L) + __ beqz($cr$$Register, L); + else + __ beqz($cr$$Register, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +// Conditional jumps +instruct branchConP_0_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP op1 zero)); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConP_0_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beqz(op1, L); + else + __ beqz(op1, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bnez(op1, L); + else + __ bnez(op1, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConN2P_0_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP (DecodeN op1) zero)); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConN2P_0_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) + { + case 0x01: //equal + if (&L) + __ beqz(op1, L); + else + __ beqz(op1, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bnez(op1, L); + else + __ bnez(op1, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ + match(If cmp (CmpP op1 op2)); +// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + ins_cost(200); + format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + if (&L) + __ bltu(op2, op1, L); + else + __ bltu(op2, op1, (int)0); + break; + case 0x04: //above_equal + if (&L) + __ bgeu(op1, op2, L); + else + __ bgeu(op1, op2, (int)0); + break; + case 0x05: //below + if (&L) + __ bltu(op1, op2, L); + else + __ bltu(op1, op2, (int)0); + break; + case 0x06: //below_equal + if (&L) + __ bgeu(op2, op1, L); + else + __ bgeu(op2, op1, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_null_branch_short" %} + ins_encode %{ + Register op1 = $op1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beqz(op1, L); + else + __ beqz(op1, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bnez(op1, L); + else + __ bnez(op1, (int)0); + break; + default: + Unimplemented(); + } + %} +//TODO: pipe_branchP or create pipe_branchN LEE + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_reg_branch_short" %} + ins_encode %{ + Register op1_reg = $op1$$Register; + Register op2_reg = $op2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1_reg, op2_reg, L); + else + __ beq(op1_reg, op2_reg, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1_reg, op2_reg, L); + else + __ bne(op1_reg, op2_reg, (int)0); + break; + case 0x03: //above + if (&L) + __ bltu(op2_reg, op1_reg, L); + else + __ bltu(op2_reg, op1_reg, (int)0); + break; + case 0x04: //above_equal + if (&L) + __ bgeu(op1_reg, op2_reg, L); + else + __ bgeu(op1_reg, op2_reg, (int)0); + break; + case 0x05: //below + if (&L) + __ bltu(op1_reg, op2_reg, L); + else + __ bltu(op1_reg, op2_reg, (int)0); + break; + case 0x06: //below_equal + if (&L) + __ bgeu(op2_reg, op1_reg, L); + else + __ bgeu(op2_reg, op1_reg, (int)0); + break; + default: + Unimplemented(); + } + %} + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + if (&L) + __ bltu(op2, op1, L); + else + __ bltu(op2, op1, (int)0); + break; + case 0x04: //above_equal + if (&L) + __ bgeu(op1, op2, L); + else + __ bgeu(op1, op2, (int)0); + break; + case 0x05: //below + if (&L) + __ bltu(op1, op2, L); + else + __ bltu(op1, op2, (int)0); + break; + case 0x06: //below_equal + if (&L) + __ bgeu(op2, op1, L); + else + __ bgeu(op2, op1, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ li(AT, val); + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, AT, L); + else + __ beq(op1, AT, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, AT, L); + else + __ bne(op1, AT, (int)0); + break; + case 0x03: //above + if (&L) + __ bltu(AT, op1, L); + else + __ bltu(AT, op1, (int)0); + break; + case 0x04: //above_equal + if (&L) + __ bgeu(op1, AT, L); + else + __ bgeu(op1, AT, (int)0); + break; + case 0x05: //below + if (&L) + __ bltu(op1, AT, L); + else + __ bltu(op1, AT, (int)0); + break; + case 0x06: //below_equal + if (&L) + __ bgeu(AT, op1, L); + else + __ bgeu(AT, op1, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + if (&L) + __ blt(op2, op1, L); + else + __ blt(op2, op1, (int)0); + break; + case 0x04: //above_equal + if (&L) + __ bge(op1, op2, L); + else + __ bge(op1, op2, (int)0); + break; + case 0x05: //below + if (&L) + __ blt(op1, op2, L); + else + __ blt(op1, op2, (int)0); + break; + case 0x06: //below_equal + if (&L) + __ bge(op2, op1, L); + else + __ bge(op2, op1, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(170); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beqz(op1, L); + else + __ beqz(op1, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bnez(op1, L); + else + __ bnez(op1, (int)0); + break; + case 0x03: //greater + if (&L) + __ blt(R0, op1, L); + else + __ blt(R0, op1, (int)0); + break; + case 0x04: //greater_equal + if (&L) + __ bge(op1, R0, L); + else + __ bge(op1, R0, (int)0); + break; + case 0x05: //less + if (&L) + __ blt(op1, R0, L); + else + __ blt(op1, R0, (int)0); + break; + case 0x06: //less_equal + if (&L) + __ bge(R0, op1, L); + else + __ bge(R0, op1, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(200); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ li(AT, val); + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, AT, L); + else + __ beq(op1, AT, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, AT, L); + else + __ bne(op1, AT, (int)0); + break; + case 0x03: //greater + if (&L) + __ blt(AT, op1, L); + else + __ blt(AT, op1, (int)0); + break; + case 0x04: //greater_equal + if (&L) + __ bge(op1, AT, L); + else + __ bge(op1, AT, (int)0); + break; + case 0x05: //less + if (&L) + __ blt(op1, AT, L); + else + __ blt(op1, AT, (int)0); + break; + case 0x06: //less_equal + if (&L) + __ bge(AT, op1, L); + else + __ bge(AT, op1, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ + match( If cmp (CmpU src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beqz(op1, L); + else + __ beqz(op1, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bnez(op1, L); + else + __ bnez(op1, (int)0); + break; + case 0x03: //above + if (&L) + __ bnez(op1, L); + else + __ bnez(op1, (int)0); + break; + case 0x04: //above_equal + if (&L) + __ b(L); + else + __ b((int)0); + break; + case 0x05: //below + return; + break; + case 0x06: //below_equal + if (&L) + __ beqz(op1, L); + else + __ beqz(op1, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + break; + case 0x02: //not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + break; + case 0x03: //greater + if (&target) + __ blt(opr2_reg, opr1_reg, target); + else + __ blt(opr2_reg, opr1_reg, (int)0); + break; + case 0x04: //greater_equal + if (&target) + __ bge(opr1_reg, opr2_reg, target); + else + __ bge(opr1_reg, opr2_reg, (int)0); + break; + case 0x05: //less + if (&target) + __ blt(opr1_reg, opr2_reg, target); + else + __ blt(opr1_reg, opr2_reg, (int)0); + break; + case 0x06: //less_equal + if (&target) + __ bge(opr2_reg, opr1_reg, target); + else + __ bge(opr2_reg, opr1_reg, (int)0); + break; + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{ + match( If cmp (CmpUL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + break; + case 0x02: //not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + break; + case 0x03: //greater + if (&target) + __ bltu(opr2_reg, opr1_reg, target); + else + __ bltu(opr2_reg, opr1_reg, (int)0); + break; + case 0x04: //greater_equal + if (&target) + __ bgeu(opr1_reg, opr2_reg, target); + else + __ bgeu(opr1_reg, opr2_reg, (int)0); + break; + case 0x05: //less + if (&target) + __ bltu(opr1_reg, opr2_reg, target); + else + __ bltu(opr1_reg, opr2_reg, (int)0); + break; + case 0x06: //less_equal + if (&target) + __ bgeu(opr2_reg, opr1_reg, target); + else + __ bgeu(opr2_reg, opr1_reg, (int)0); + break; + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match( If cmp (CmpL src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_short" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&target) + __ beqz(opr1_reg, target); + else + __ beqz(opr1_reg, int(0)); + break; + + case 0x02: //not_equal + if (&target) + __ bnez(opr1_reg, target); + else + __ bnez(opr1_reg, (int)0); + break; + + case 0x03: //greater + if (&target) + __ blt(R0, opr1_reg, target); + else + __ blt(R0, opr1_reg, (int)0); + break; + + case 0x04: //greater_equal + if (&target) + __ bge(opr1_reg, R0, target); + else + __ bge(opr1_reg, R0, (int)0); + break; + + case 0x05: //less + if (&target) + __ blt(opr1_reg, R0, target); + else + __ blt(opr1_reg, R0, (int)0); + break; + + case 0x06: //less_equal + if (&target) + __ bge(R0, opr1_reg, target); + else + __ bge(R0, opr1_reg, int(0)); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match( If cmp (CmpUL src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_short" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&target) + __ beqz(opr1_reg, target); + else + __ beqz(opr1_reg, int(0)); + break; + + case 0x02: //not_equal + if (&target) + __ bnez(opr1_reg, target); + else + __ bnez(opr1_reg, (int)0); + break; + + case 0x03: //greater + if (&target) + __ bltu(R0, opr1_reg, target); + else + __ bltu(R0, opr1_reg, (int)0); + break; + + case 0x04: //greater_equal + if (&target) + __ bgeu(opr1_reg, R0, target); + else + __ bgeu(opr1_reg, R0, (int)0); + break; + + case 0x05: //less + if (&target) + __ bltu(opr1_reg, R0, target); + else + __ bltu(opr1_reg, R0, (int)0); + break; + + case 0x06: //less_equal + if (&target) + __ bgeu(R0, opr1_reg, target); + else + __ bgeu(R0, opr1_reg, int(0)); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ li(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: //equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + break; + + case 0x02: //not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + break; + + case 0x03: //greater + if (&target) + __ blt(opr2_reg, opr1_reg, target); + else + __ blt(opr2_reg, opr1_reg, (int)0); + break; + + case 0x04: //greater_equal + if (&target) + __ bge(opr1_reg, opr2_reg, target); + else + __ bge(opr1_reg, opr2_reg, (int)0); + break; + + case 0x05: //less + if (&target) + __ blt(opr1_reg, opr2_reg, target); + else + __ blt(opr1_reg, opr2_reg, (int)0); + break; + + case 0x06: //less_equal + if (&target) + __ bge(opr2_reg, opr1_reg, target); + else + __ bge(opr2_reg, opr1_reg, (int)0); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match( If cmp (CmpUL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ li(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: //equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + break; + + case 0x02: //not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + break; + + case 0x03: //greater + if (&target) + __ bltu(opr2_reg, opr1_reg, target); + else + __ bltu(opr2_reg, opr1_reg, (int)0); + break; + + case 0x04: //greater_equal + if (&target) + __ bgeu(opr1_reg, opr2_reg, target); + else + __ bgeu(opr1_reg, opr2_reg, (int)0); + break; + + case 0x05: //less + if (&target) + __ bltu(opr1_reg, opr2_reg, target); + else + __ bltu(opr1_reg, opr2_reg, (int)0); + break; + + case 0x06: //less_equal + if (&target) + __ bgeu(opr2_reg, opr1_reg, target); + else + __ bgeu(opr2_reg, opr1_reg, (int)0); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +//FIXME +instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ + match( If cmp (CmpF src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); + if (&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + case 0x02: //not_equal + __ fcmp_ceq_s(FCC0, reg_op1, reg_op2); + if (&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x03: //greater + __ fcmp_cule_s(FCC0, reg_op1, reg_op2); + if(&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x04: //greater_equal + __ fcmp_cult_s(FCC0, reg_op1, reg_op2); + if(&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x05: //less + __ fcmp_cult_s(FCC0, reg_op1, reg_op2); + if(&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + case 0x06: //less_equal + __ fcmp_cule_s(FCC0, reg_op1, reg_op2); + if(&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_fpu_branch); + ins_short_branch(1); +%} + +instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ + match( If cmp (CmpD src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); + if (&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + case 0x02: //not_equal + // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. + __ fcmp_ceq_d(FCC0, reg_op1, reg_op2); + if (&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x03: //greater + __ fcmp_cule_d(FCC0, reg_op1, reg_op2); + if(&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x04: //greater_equal + __ fcmp_cult_d(FCC0, reg_op1, reg_op2); + if(&L) + __ bceqz(FCC0, L); + else + __ bceqz(FCC0, (int)0); + break; + case 0x05: //less + __ fcmp_cult_d(FCC0, reg_op1, reg_op2); + if(&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + case 0x06: //less_equal + __ fcmp_cule_d(FCC0, reg_op1, reg_op2); + if(&L) + __ bcnez(FCC0, L); + else + __ bcnez(FCC0, (int)0); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_fpu_branch); + ins_short_branch(1); +%} + +// =================== End of branch instructions ========================== + +// Call Runtime Instruction +instruct CallRuntimeDirect(method meth) %{ + match(CallRuntime ); + effect(USE meth); + + ins_cost(300); + format %{ "CALL,runtime #@CallRuntimeDirect" %} + ins_encode( Java_To_Runtime( meth ) ); + ins_pipe( pipe_slow ); + ins_alignment(4); +%} + + + +//------------------------MemBar Instructions------------------------------- +//Memory barrier flavors + +instruct membar_acquire() %{ + match(MemBarAcquire); + ins_cost(400); + + format %{ "MEMBAR-acquire @ membar_acquire" %} + ins_encode %{ + __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); + %} + ins_pipe(empty); +%} + +instruct load_fence() %{ + match(LoadFence); + ins_cost(400); + + format %{ "MEMBAR @ load_fence" %} + ins_encode %{ + __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore)); + %} + ins_pipe(pipe_slow); +%} + +instruct membar_acquire_lock() +%{ + match(MemBarAcquireLock); + ins_cost(0); + + size(0); + format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} + ins_encode(); + ins_pipe(empty); +%} + +instruct membar_release() %{ + match(MemBarRelease); + ins_cost(400); + + format %{ "MEMBAR-release @ membar_release" %} + + ins_encode %{ + // Attention: DO NOT DELETE THIS GUY! + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); + %} + + ins_pipe(pipe_slow); +%} + +instruct store_fence() %{ + match(StoreFence); + ins_cost(400); + + format %{ "MEMBAR @ store_fence" %} + + ins_encode %{ + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); + %} + + ins_pipe(pipe_slow); +%} + +instruct membar_release_lock() +%{ + match(MemBarReleaseLock); + ins_cost(0); + + size(0); + format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} + ins_encode(); + ins_pipe(empty); +%} + + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(400); + + format %{ "MEMBAR-volatile" %} + ins_encode %{ + if( !os::is_MP() ) return; // Not needed on single CPU + __ membar(__ StoreLoad); + + %} + ins_pipe(pipe_slow); +%} + +instruct unnecessary_membar_volatile() %{ + match(MemBarVolatile); + predicate(Matcher::post_store_load_barrier(n)); + ins_cost(0); + + size(0); + format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %} + ins_encode( ); + ins_pipe(empty); +%} + +instruct membar_storestore() %{ + match(MemBarStoreStore); + + ins_cost(400); + format %{ "MEMBAR-storestore @ membar_storestore" %} + ins_encode %{ + __ membar(__ StoreStore); + %} + ins_pipe(empty); +%} + +//----------Move Instructions-------------------------------------------------- +instruct castX2P(mRegP dst, mRegL src) %{ + match(Set dst (CastX2P src)); + format %{ "castX2P $dst, $src @ castX2P" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + if(src != dst) + __ move(dst, src); + %} + ins_cost(10); + ins_pipe( ialu_regI_mov ); +%} + +instruct castP2X(mRegL dst, mRegP src ) %{ + match(Set dst (CastP2X src)); + + format %{ "mov $dst, $src\t #@castP2X" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + if(src != dst) + __ move(dst, src); + %} + ins_pipe( ialu_regI_mov ); +%} + +instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ movfr2gr_s(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ + match(Set dst (MoveI2F src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ movgr2fr_w(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ movfr2gr_d(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + Register src = as_Register($src$$reg); + + __ movgr2fr_d(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +//----------Conditional Move--------------------------------------------------- +// Conditional move +instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + Label L; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ + match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" + "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); + FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ + match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" + "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); + FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{ + match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" + "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister tmp1 = as_FloatRegister($tmp3$$reg); + FloatRegister tmp2 = as_FloatRegister($tmp4$$reg); + int flag = $cop$$cmpcode; + + // Use signed comparison here, because the most significant bit of the + // user-space virtual address must be 0. + __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag); + %} + + ins_pipe( pipe_slow ); +%} + +//FIXME +instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{ + match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + effect(TEMP tmp3, TEMP tmp4); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + FloatRegister tmp1 = $tmp3$$FloatRegister; + FloatRegister tmp2 = $tmp4$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +// Manifest a CmpL result in an integer register. Very painful. +// This is the test to avoid. +instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ + match(Set dst (CmpL3 src1 src2)); + ins_cost(1000); + format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} + ins_encode %{ + Register opr1 = as_Register($src1$$reg); + Register opr2 = as_Register($src2$$reg); + Register dst = as_Register($dst$$reg); + + __ slt(AT, opr1, opr2); + __ slt(dst, opr2, opr1); + __ sub_d(dst, dst, AT); + %} + ins_pipe( pipe_slow ); +%} + +// +// less_rsult = -1 +// greater_result = 1 +// equal_result = 0 +// nan_result = -1 +// +instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ + match(Set dst (CmpF3 src1 src2)); + ins_cost(1000); + format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + Register dst = as_Register($dst$$reg); + + __ fcmp_clt_s(FCC0, src2, src1); + __ fcmp_cult_s(FCC1, src1, src2); + __ movcf2gr(dst, FCC0); + __ movcf2gr(AT, FCC1); + __ sub_d(dst, dst, AT); + + %} + ins_pipe( pipe_slow ); +%} + +instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ + match(Set dst (CmpD3 src1 src2)); + ins_cost(1000); + format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + Register dst = as_Register($dst$$reg); + + __ fcmp_clt_d(FCC0, src2, src1); + __ fcmp_cult_d(FCC1, src1, src2); + __ movcf2gr(dst, FCC0); + __ movcf2gr(AT, FCC1); + __ sub_d(dst, dst, AT); + %} + ins_pipe( pipe_slow ); +%} + +instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{ + match(Set dummy (ClearArray cnt base)); + format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} + ins_encode %{ + //Assume cnt is the number of bytes in an array to be cleared, + //and base points to the starting address of the array. + Register base = $base$$Register; + Register num = $cnt$$Register; + Label Loop, done; + + __ add_d(AT, base, R0); + __ beq(num, R0, done); + + __ move(T4, num); /* T4 = words */ + + __ bind(Loop); + __ st_d(R0, AT, 0); + __ addi_d(T4, T4, -1); + __ addi_d(AT, AT, wordSize); + __ bne(T4, R0, Loop); + + __ bind(done); + %} + ins_pipe( pipe_slow ); +%} + +instruct string_compare(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compare" %} + ins_encode %{ + // Get the first character position in both strings + // [8] char array, [12] offset, [16] count + Register str1 = $str1$$Register; + Register str2 = $str2$$Register; + Register cnt1 = $cnt1$$Register; + Register cnt2 = $cnt2$$Register; + Register result = $result$$Register; + + Label L, Loop, haveResult, done; + + // compute the and difference of lengths (in result) + __ sub_d(result, cnt1, cnt2); // result holds the difference of two lengths + + // compute the shorter length (in cnt1) + __ bge(cnt2, cnt1, Loop); + __ move(cnt1, cnt2); + + // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register + __ bind(Loop); // Loop begin + __ beq(cnt1, R0, done); + __ ld_hu(AT, str1, 0); + // compare current character + __ ld_hu(cnt2, str2, 0); + __ addi_d(str1, str1, 2); + __ bne(AT, cnt2, haveResult); + __ addi_d(str2, str2, 2); + __ addi_d(cnt1, cnt1, -1); // Loop end + __ b(Loop); + + __ bind(haveResult); + __ sub_d(result, AT, cnt2); + + __ bind(done); + %} + + ins_pipe( pipe_slow ); +%} + +// intrinsic optimization +instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{ + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp); + + format %{ "String Equal $str1, $str2, len:$cnt tmp:$temp -> $result @ string_equals" %} + ins_encode %{ + // Get the first character position in both strings + // [8] char array, [12] offset, [16] count + Register str1 = $str1$$Register; + Register str2 = $str2$$Register; + Register cnt = $cnt$$Register; + Register tmp = $temp$$Register; + Register result = $result$$Register; + + Label Loop, True, False; + + __ addi_d(result, R0, 1); + __ beq(str1, str2, True); // same char[] ? + + __ beq(cnt, R0, True); + + __ bind(Loop); + + // compare current character + __ ld_hu(AT, str1, 0); + __ ld_hu(tmp, str2, 0); + __ addi_d(str1, str1, 2); + __ bne(AT, tmp, False); + __ addi_d(cnt, cnt, -1); + __ addi_d(str2, str2, 2); + __ bne(cnt, R0, Loop); + + __ b(True); + + __ bind(False); + __ addi_d(result, R0, 0); + + __ bind(True); + %} + + ins_pipe( pipe_slow ); +%} + +//----------Arithmetic Instructions------------------------------------------- +//----------Addition Instructions--------------------------------------------- +instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (AddI src1 src2)); + + format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ add_w(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct addI_Reg_imm(mRegI dst, mRegI src1, immI12 src2) %{ + match(Set dst (AddI src1 src2)); + + format %{ "add $dst, $src1, $src2 #@addI_Reg_imm12" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + int imm = $src2$$constant; + + __ addi_w(dst, src1, imm); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{ + match(Set dst (AddI src1 (LShiftI src2 shift))); + + format %{ "alsl $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + int sh = $shift$$constant; + __ alsl_w(dst, src2, src1, sh - 1); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "ADD $dst, $src1, $src2 #@addP_reg_reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ add_d(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{ + match(Set dst (AddP src1 (AndL src2 M8))); + format %{ "ADD $dst, $src1, $src2 #@addP_reg_reg_M8" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ bstrins_d(src2, R0, 2, 0); + __ add_d(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct addP_reg_imm12(mRegP dst, mRegP src1, immL12 src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "ADD $dst, $src1, $src2 #@addP_reg_imm12" %} + ins_encode %{ + Register src1 = $src1$$Register; + long src2 = $src2$$constant; + Register dst = $dst$$Register; + + __ addi_d(dst, src1, src2); + %} + ins_pipe( ialu_regI_imm16 ); +%} + +instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{ + match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift))); + + format %{ "alsl $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + int sh = $shift$$constant; + __ alsl_d(dst, src2, src1, sh - 1); + %} + + ins_pipe(ialu_regI_regI); +%} + +// Add Long Register with Register +instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ + match(Set dst (AddL src1 src2)); + ins_cost(200); + format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} + + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ add_d(dst_reg, src1_reg, src2_reg); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2) +%{ + match(Set dst (AddL src1 src2)); + + format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + int src2_imm = $src2$$constant; + + __ addi_d(dst_reg, src1_reg, src2_imm); + %} + + ins_pipe( ialu_regL_regL ); +%} + +//----------Subtraction Instructions------------------------------------------- +// Integer Subtraction Instructions +instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (SubI src1 src2)); + ins_cost(100); + + format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ sub_w(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegI src1, immI_M2047_2048 src2) %{ + match(Set dst (SubI src1 src2)); + ins_cost(80); + + format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ addi_w(dst, src1, -1 * $src2$$constant); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct negI_Reg(mRegI dst, immI_0 zero, mRegI src) %{ + match(Set dst (SubI zero src)); + ins_cost(80); + + format %{ "neg $dst, $src #@negI_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ sub_w(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct negL_Reg(mRegL dst, immL_0 zero, mRegLorI2L src) %{ + match(Set dst (SubL zero src)); + ins_cost(80); + + format %{ "neg $dst, $src #@negL_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ sub_d(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1, immL_M2047_2048 src2) %{ + match(Set dst (SubL src1 src2)); + ins_cost(80); + + format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ addi_d(dst, src1, -1 * $src2$$constant); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Subtract Long Register with Register. +instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ + match(Set dst (SubL src1 src2)); + ins_cost(100); + format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + + __ sub_d(dst, src1, src2); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Integer MOD with Register +instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (ModI src1 src2)); + ins_cost(300); + format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ mod_w(dst, src1, src2); + %} + + //ins_pipe( ialu_mod ); + ins_pipe( ialu_regI_regI ); +%} + +instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ + match(Set dst (ModL src1 src2)); + format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} + + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + __ mod_d(dst, op1, op2); + %} + ins_pipe( pipe_slow ); +%} + +instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (MulI src1 src2)); + + ins_cost(300); + format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register dst = $dst$$Register; + + __ mul_w(dst, src1, src2); + %} + ins_pipe( ialu_mult ); +%} + +instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (DivI src1 src2)); + + ins_cost(300); + format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register dst = $dst$$Register; + + __ div_w(dst, src1, src2); + + %} + ins_pipe( ialu_mod ); +%} + +instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ + match(Set dst (DivF src1 src2)); + + ins_cost(300); + format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ fdiv_s(dst, src1, src2); + %} + ins_pipe( pipe_slow ); +%} + +instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ + match(Set dst (DivD src1 src2)); + + ins_cost(300); + format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ fdiv_d(dst, src1, src2); + %} + ins_pipe( pipe_slow ); +%} + +instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ + match(Set dst (MulL src1 src2)); + format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + __ mul_d(dst, op1, op2); + %} + ins_pipe( pipe_slow ); +%} + +instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (MulHiL src1 src2)); + format %{ "mulHiL $dst, $src1, $src2 @mulL_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + __ mulh_d(dst, op1, op2); + %} + ins_pipe( pipe_slow ); +%} + +instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (DivL src1 src2)); + format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} + + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + __ div_d(dst, op1, op2); + %} + ins_pipe( pipe_slow ); +%} + +instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (AddF src1 src2)); + format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fadd_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (SubF src1 src2)); + format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fsub_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} +instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (AddD src1 src2)); + format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fadd_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (SubD src1 src2)); + format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fsub_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct negF_reg(regF dst, regF src) %{ + match(Set dst (NegF src)); + format %{ "negF $dst, $src @negF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fneg_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct negD_reg(regD dst, regD src) %{ + match(Set dst (NegD src)); + format %{ "negD $dst, $src @negD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fneg_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (MulF src1 src2)); + format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ fmul_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ + match(Set dst (AddF (MulF src1 src2) src3)); + // For compatibility reason (e.g. on the Loongson platform), disable this guy. + ins_cost(44444); + format %{ "maddF $dst, $src1, $src2, $src3 @maddF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister src3 = $src3$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ fmadd_s(dst, src1, src2, src3); + %} + ins_pipe( fpu_regF_regF ); +%} + +// Mul two double precision floating piont number +instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (MulD src1 src2)); + format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ fmul_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ + match(Set dst (AddD (MulD src1 src2) src3)); + // For compatibility reason (e.g. on the Loongson platform), disable this guy. + ins_cost(44444); + format %{ "maddD $dst, $src1, $src2, $src3 @maddD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister src3 = $src3$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ fmadd_d(dst, src1, src2, src3); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct absF_reg(regF dst, regF src) %{ + match(Set dst (AbsF src)); + ins_cost(100); + format %{ "absF $dst, $src @absF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fabs_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +// intrinsics for math_native. +// AbsD SqrtD CosD SinD TanD LogD Log10D + +instruct absD_reg(regD dst, regD src) %{ + match(Set dst (AbsD src)); + ins_cost(100); + format %{ "absD $dst, $src @absD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fabs_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct sqrtD_reg(regD dst, regD src) %{ + match(Set dst (SqrtD src)); + ins_cost(100); + format %{ "SqrtD $dst, $src @sqrtD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fsqrt_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct sqrtF_reg(regF dst, regF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + ins_cost(100); + format %{ "SqrtF $dst, $src @sqrtF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ fsqrt_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} +//----------------------------------Logical Instructions---------------------- +//__________________________________Integer Logical Instructions------------- + +//And Instuctions +// And Register with Immediate +instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ + match(Set dst (AndI src1 src2)); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ andi(dst, src, val); + + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ + match(Set dst (AndI src1 mask)); + ins_cost(60); + + format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int size = Assembler::is_int_mask($mask$$constant); + + __ bstrpick_w(dst, src, size-1, 0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ + match(Set dst (AndL src1 mask)); + ins_cost(60); + + format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int size = Assembler::is_jlong_mask($mask$$constant); + + __ bstrpick_d(dst, src, size-1, 0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1, immI_0_4095 src2) %{ + match(Set dst (XorI src1 src2)); + ins_cost(60); + + format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ xori(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1, immI_M1 M1) %{ + match(Set dst (XorI src1 M1)); + ins_cost(60); + + format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + + __ orn(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1, immI_M1 M1) %{ + match(Set dst (XorI (ConvL2I src1) M1)); + ins_cost(60); + + format %{ "xor $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + + __ orn(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ + match(Set dst (XorL src1 src2)); + ins_cost(60); + + format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ xori(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI mask (LoadB mem))); + ins_cost(60); + + format %{ "lhu $dst, $mem #@lbu_and_lmask" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI (LoadB mem) mask)); + ins_cost(60); + + format %{ "lhu $dst, $mem #@lbu_and_rmask" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (AndI src1 src2)); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ andr(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (AndI src1 (XorI src2 M1))); + + format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ andn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (OrI src1 (XorI src2 M1))); + + format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ orn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (AndI (XorI src1 M1) src2)); + + format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ andn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (OrI (XorI src1 M1) src2)); + + format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ orn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} + +// And Long Register with Register +instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{ + match(Set dst (AndL src1 src2)); + format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ andr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1, immL_0_4095 src2) %{ + match(Set dst (AndL src1 src2)); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + long val = $src2$$constant; + + __ andi(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1, immL_0_4095 src2) %{ + match(Set dst (ConvL2I (AndL src1 src2))); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + long val = $src2$$constant; + + __ andi(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ + match(Set dst (AndL dst M8)); + ins_cost(60); + + format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ bstrins_d(dst, R0, 2, 0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ + match(Set dst (AndL dst M5)); + ins_cost(60); + + format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ bstrins_d(dst, R0, 2, 2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ + match(Set dst (AndL dst M7)); + ins_cost(60); + + format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ bstrins_d(dst, R0, 2, 1); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ + match(Set dst (AndL dst M4)); + ins_cost(60); + + format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ bstrins_d(dst, R0, 1, 0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ + match(Set dst (AndL dst M121)); + ins_cost(60); + + format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ bstrins_d(dst, R0, 6, 3); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Or Long Register with Register +instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{ + match(Set dst (OrL src1 src2)); + format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + Register src1_reg = $src1$$Register; + Register src2_reg = $src2$$Register; + + __ orr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{ + match(Set dst (OrL (CastP2X src1) src2)); + format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + Register src1_reg = $src1$$Register; + Register src2_reg = $src2$$Register; + + __ orr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Xor Long Register with Register +instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (XorL src1 src2)); + format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ xorr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Left by 5-bit immediate +instruct salI_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ + match(Set dst (LShiftI src shift)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ slli_w(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct salL2I_Reg_imm(mRegI dst, mRegL src, immIU5 shift) %{ + match(Set dst (LShiftI (ConvL2I src) shift)); + + format %{ "SHL $dst, $src, $shift #@salL2I_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ slli_w(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ + match(Set dst (AndI (LShiftI src shift) mask)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ slli_w(dst, src, 16); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) +%{ + match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); + + format %{ "andi $dst, $src, 7\t# @land7_2_s" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ andi(dst, src, 7); + %} + ins_pipe(ialu_regI_regI); +%} + +// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. +// This idiom is used by the compiler the i2s bytecode. +instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) +%{ + match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); + + format %{ "i2s $dst, $src\t# @i2s" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ ext_w_h(dst, src); + %} + ins_pipe(ialu_regI_regI); +%} + +// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. +// This idiom is used by the compiler for the i2b bytecode. +instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) +%{ + match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); + + format %{ "i2b $dst, $src\t# @i2b" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ ext_w_b(dst, src); + %} + ins_pipe(ialu_regI_regI); +%} + + +instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{ + match(Set dst (LShiftI (ConvL2I src) shift)); + + format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ slli_w(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Shift Left by 8-bit immediate +instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (LShiftI src shift)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shamt = $shift$$Register; + __ sll_w(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + + +// Shift Left Long 6-bit immI +instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ + match(Set dst (LShiftL src shift)); + ins_cost(100); + format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ slli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Left Long +instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ + match(Set dst (LShiftL src shift)); + ins_cost(100); + format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ sll_d(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long 6-bit +instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{ + match(Set dst (RShiftL src shift)); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srai_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ + match(Set dst (ConvL2I (RShiftL src shift))); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srai_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long arithmetically +instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{ + match(Set dst (RShiftL src shift)); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ sra_d(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long logically +instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(100); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ srl_d(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{ + match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); + ins_cost(80); + format %{ "bstrpick_d $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ + match(Set dst (URShiftL (CastP2X src) shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{ + match(Set dst (ConvL2I (URShiftL src shift))); + predicate(n->in(1)->in(2)->get_int() > 32); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ + match(Set dst (URShiftL (CastP2X src) shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ srli_d(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Xor Instructions +// Xor Register with Register +instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (XorI src1 src2)); + + format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ xorr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +// Or Instructions +instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} + ins_encode %{ + __ ori($dst$$Register, $src1$$Register, $src2$$constant); + %} + + ins_pipe( ialu_regI_regI ); +%} + +// Or Register with Register +instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ orr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ + match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); + predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); + + format %{ "rotri_w $dst, $src, 1 ...\n\t" + "srli_w $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int rshift = $rshift$$constant; + + __ rotri_w(dst, src, 1); + if (rshift - 1) { + __ srli_w(dst, dst, rshift - 1); + } + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ + match(Set dst (OrI src1 (CastP2X src2))); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ orr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +// Logical Shift Right by 5-bit immediate +instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ + match(Set dst (URShiftI src shift)); + //effect(KILL cr); + + format %{ "SRLI_W $dst, $src, $shift #@shr_logical_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shift = $shift$$constant; + + __ srli_w(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ + match(Set dst (AndI (URShiftI src shift) mask)); + + format %{ "bstrpick_w $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int pos = $shift$$constant; + int size = Assembler::is_int_mask($mask$$constant); + + __ bstrpick_w(dst, src, pos+size-1, pos); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift))); + + ins_cost(100); + format %{ "rotri_w $dst, $src, $rshift #@rolI_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_w(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); + + ins_cost(100); + format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_d(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); + + ins_cost(100); + format %{ "rotri_d $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_d(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); + + ins_cost(100); + format %{ "rotri_w $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_w(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); + + ins_cost(100); + format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_d(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); + + ins_cost(100); + format %{ "rotri_d $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotri_d(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Logical Shift Right +instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (URShiftI src shift)); + + format %{ "SRL_W $dst, $src, $shift #@shr_logical_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shift = $shift$$Register; + __ srl_w(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{ + match(Set dst (RShiftI src shift)); + // effect(KILL cr); + + format %{ "SRAI_W $dst, $src, $shift #@shr_arith_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shift = $shift$$constant; + __ srai_w(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (RShiftI src shift)); + // effect(KILL cr); + + format %{ "SRA_W $dst, $src, $shift #@shr_arith_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shift = $shift$$Register; + __ sra_w(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +//----------Convert Int to Boolean--------------------------------------------- + +instruct convI2B(mRegI dst, mRegI src) %{ + match(Set dst (Conv2B src)); + + ins_cost(100); + format %{ "convI2B $dst, $src @ convI2B" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if (dst != src) { + __ addi_d(dst, R0, 1); + __ maskeqz(dst, dst, src); + } else { + __ move(AT, src); + __ addi_d(dst, R0, 1); + __ maskeqz(dst, dst, AT); + } + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct convI2L_reg( mRegL dst, mRegI src) %{ + match(Set dst (ConvI2L src)); + + ins_cost(100); + format %{ "SLLI_W $dst, $src @ convI2L_reg\t" %} + + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if(dst != src) __ slli_w(dst, src, 0); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{ + match(Set dst (ConvL2I src)); + + format %{ "MOV $dst, $src @ convL2I_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + __ slli_w(dst, src, 0); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct convL2D_reg( regD dst, mRegL src ) %{ + match(Set dst (ConvL2D src)); + format %{ "convL2D $dst, $src @ convL2D_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ movgr2fr_d(dst, src); + __ ffint_d_l(dst, dst); + %} + + ins_pipe( pipe_slow ); +%} + + +// Convert double to int. +// If the double is NaN, stuff a zero in instead. +instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{ + match(Set dst (ConvD2I src)); + effect(USE src, TEMP tmp); + + format %{ "convd2i $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %} + + ins_encode %{ + __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister); + __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{ + match(Set dst (ConvD2L src)); + effect(USE src, TEMP tmp); + + format %{ "convd2l $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %} + + ins_encode %{ + __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister); + __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); + %} + + ins_pipe( pipe_slow ); +%} + +// Convert float to int. +// If the float is NaN, stuff a zero in instead. +instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{ + match(Set dst (ConvF2I src)); + effect(USE src, TEMP tmp); + + format %{ "convf2i $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %} + + ins_encode %{ + __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister); + __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{ + match(Set dst (ConvF2L src)); + effect(USE src, TEMP tmp); + + format %{ "convf2l $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %} + + ins_encode %{ + __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister); + __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convL2F_reg( regF dst, mRegL src ) %{ + match(Set dst (ConvL2F src)); + format %{ "convl2f $dst, $src @ convL2F_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + Register src = as_Register($src$$reg); + Label L; + + __ movgr2fr_d(dst, src); + __ ffint_s_l(dst, dst); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convI2F_reg( regF dst, mRegI src ) %{ + match(Set dst (ConvI2F src)); + format %{ "convi2f $dst, $src @ convI2F_reg" %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + + __ movgr2fr_w(dst, src); + __ ffint_s_w(dst, dst); + %} + + ins_pipe( fpu_regF_regF ); +%} + +instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ + match(Set dst (CmpLTMask p zero)); + ins_cost(100); + + format %{ "srai_w $dst, $p, 31 @ cmpLTMask_immI_0" %} + ins_encode %{ + Register src = $p$$Register; + Register dst = $dst$$Register; + + __ srai_w(dst, src, 31); + %} + ins_pipe( pipe_slow ); +%} + + +instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ + match(Set dst (CmpLTMask p q)); + ins_cost(400); + + format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} + ins_encode %{ + Register p = $p$$Register; + Register q = $q$$Register; + Register dst = $dst$$Register; + + __ slt(dst, p, q); + __ sub_d(dst, R0, dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct convP2B(mRegI dst, mRegP src) %{ + match(Set dst (Conv2B src)); + + ins_cost(100); + format %{ "convP2B $dst, $src @ convP2B" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if (dst != src) { + __ addi_d(dst, R0, 1); + __ maskeqz(dst, dst, src); + } else { + __ move(AT, src); + __ addi_d(dst, R0, 1); + __ maskeqz(dst, dst, AT); + } + %} + + ins_pipe( ialu_regL_regL ); +%} + + +instruct convI2D_reg_reg(regD dst, mRegI src) %{ + match(Set dst (ConvI2D src)); + format %{ "conI2D $dst, $src @convI2D_reg" %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + __ movgr2fr_w(dst ,src); + __ ffint_d_w(dst, dst); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct convF2D_reg_reg(regD dst, regF src) %{ + match(Set dst (ConvF2D src)); + format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + + __ fcvt_d_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct convD2F_reg_reg(regF dst, regD src) %{ + match(Set dst (ConvD2F src)); + format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + + __ fcvt_s_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +// Convert oop pointer into compressed form +instruct encodeHeapOop(mRegN dst, mRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop $dst,$src" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ encode_heap_oop(dst, src); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} + ins_encode %{ + __ encode_heap_oop_not_null($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeHeapOop(mRegP dst, mRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && + n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + + __ decode_heap_oop(d, s); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + if (s != d) { + __ decode_heap_oop_not_null(d, s); + } else { + __ decode_heap_oop_not_null(d); + } + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ + match(Set dst (EncodePKlass src)); + format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} + ins_encode %{ + __ encode_klass_not_null($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ + match(Set dst (DecodeNKlass src)); + format %{ "decode_heap_klass_not_null $dst,$src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + if (s != d) { + __ decode_klass_not_null(d, s); + } else { + __ decode_klass_not_null(d); + } + %} + ins_pipe( ialu_regL_regL ); +%} + +//FIXME +instruct tlsLoadP(mRegP dst) %{ + match(Set dst (ThreadLocal)); + + ins_cost(0); + format %{ " get_thread in $dst #@tlsLoadP" %} + ins_encode %{ + Register dst = $dst$$Register; +#ifdef OPT_THREAD + __ move(dst, TREG); +#else + __ get_thread(dst); +#endif + %} + + ins_pipe( ialu_loadI ); +%} + + +instruct checkCastPP( mRegP dst ) %{ + match(Set dst (CheckCastPP dst)); + + format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} + ins_encode( /*empty encoding*/ ); + ins_pipe( empty ); +%} + +instruct castPP(mRegP dst) +%{ + match(Set dst (CastPP dst)); + + size(0); + format %{ "# castPP of $dst" %} + ins_encode(/* empty encoding */); + ins_pipe(empty); +%} + +instruct castII( mRegI dst ) %{ + match(Set dst (CastII dst)); + format %{ "#castII of $dst empty encoding" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe( empty ); +%} + +// Return Instruction +// Remove the return address & jump to it. +instruct Ret() %{ + match(Return); + format %{ "RET #@Ret" %} + + ins_encode %{ + __ jr(RA); + %} + + ins_pipe( pipe_jump ); +%} + + + +// Tail Jump; remove the return address; jump to target. +// TailCall above leaves the return address around. +// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). +// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a +// "restore" before this instruction (in Epilogue), we need to materialize it +// in %i0. +//FIXME +instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{ + match( TailJump jump_target ex_oop ); + ins_cost(200); + format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} + ins_encode %{ + Register target = $jump_target$$Register; + + // V0, V1 are indicated in: + // [stubGenerator_loongarch.cpp] generate_forward_exception() + // [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob() + // + Register oop = $ex_oop$$Register; + Register exception_oop = V0; + Register exception_pc = V1; + + __ move(exception_pc, RA); + __ move(exception_oop, oop); + + __ jr(target); + %} + ins_pipe( pipe_jump ); +%} + +// ============================================================================ +// Procedure Call/Return Instructions +// Call Java Static Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallStaticJavaDirect(method meth) %{ + match(CallStaticJava); + effect(USE meth); + + ins_cost(300); + format %{ "CALL,static #@CallStaticJavaDirect " %} + ins_encode( Java_Static_Call( meth ) ); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(4); +%} + +// Call Java Dynamic Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallDynamicJavaDirect(method meth) %{ + match(CallDynamicJava); + effect(USE meth); + + ins_cost(300); + format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" + "CallDynamic @ CallDynamicJavaDirect" %} + ins_encode( Java_Dynamic_Call( meth ) ); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(4); +%} + +instruct CallLeafNoFPDirect(method meth) %{ + match(CallLeafNoFP); + effect(USE meth); + + ins_cost(300); + format %{ "CALL_LEAF_NOFP,runtime " %} + ins_encode(Java_To_Runtime(meth)); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(4); +%} + +// Prefetch instructions. + +instruct prefetchr( memory mem ) %{ + match(PrefetchRead mem); + ins_cost(125); + + format %{ "pref $mem\t# Prefetch into temporal cache for read @ prefetchr" %} + ins_encode %{ + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (scale == 0) { + __ add_d(AT, as_Register(base), as_Register(index)); + } else { + __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1); + } + } else { + __ move(AT, as_Register(base)); + } + if( Assembler::is_simm(disp, 12) ) { + __ addi_d(AT, AT, disp); + } else { + __ li(T4, disp); + __ add_d(AT, AT, T4); + } + __ preld(0, AT, 0); //hint: 0:load + %} + ins_pipe(pipe_slow); +%} + +instruct prefetchw( memory mem ) %{ + match(PrefetchWrite mem); + ins_cost(125); + format %{ "pref $mem\t# Prefetch to temporal cache for write @ prefetchw" %} + ins_encode %{ + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (scale == 0) { + __ add_d(AT, as_Register(base), as_Register(index)); + } else { + __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1); + } + } else { + __ move(AT, as_Register(base)); + } + if( Assembler::is_simm(disp, 12) ) { + __ addi_d(AT, AT, disp); + } else { + __ li(T4, disp); + __ add_d(AT, AT, T4); + } + __ preld(8, AT, 0); //hint: 8:store + %} + ins_pipe(pipe_slow); +%} + +// Prefetch instructions for allocation. + +instruct prefetchAlloc(memory mem) %{ + match(PrefetchAllocation mem); + ins_cost(125); + format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %} + ins_encode %{ + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if (index != 0) { + if (scale == 0) { + __ add_d(AT, as_Register(base), as_Register(index)); + } else { + __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1); + } + + if (Assembler::is_simm(disp, 12)) { + __ preld(8, AT, disp); + } else { + __ li(T4, disp); + __ add_d(AT, AT, T4); + __ preld(8, AT, 0); + } + } else { + if (Assembler::is_simm(disp, 12)) { + __ preld(8, as_Register(base), disp); + } else { + __ li(T4, disp); + __ add_d(AT, as_Register(base), T4); + __ preld(8, AT, 0); + } + } + %} + ins_pipe(pipe_slow); +%} + + +// Call runtime without safepoint +instruct CallLeafDirect(method meth) %{ + match(CallLeaf); + effect(USE meth); + + ins_cost(300); + format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} + ins_encode(Java_To_Runtime(meth)); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(4); +%} + +// Load Char (16bit unsigned) +instruct loadUS(mRegI dst, memory mem) %{ + match(Set dst (LoadUS mem)); + + ins_cost(125); + format %{ "loadUS $dst,$mem @ loadC" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +instruct loadUS_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); + + ins_cost(125); + format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} + ins_encode %{ + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + +// Store Char (16bit unsigned) +instruct storeC(memory mem, mRegI src) %{ + match(Set mem (StoreC mem src)); + + ins_cost(125); + format %{ "storeC $src, $mem @ storeC" %} + ins_encode %{ + __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR); + %} + ins_pipe( ialu_loadI ); +%} + +instruct storeC_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreC mem zero)); + + ins_cost(125); + format %{ "storeC $zero, $mem @ storeC_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT); + %} + ins_pipe( ialu_loadI ); +%} + + +instruct loadConF_immF_0(regF dst, immF_0 zero) %{ + match(Set dst zero); + ins_cost(100); + + format %{ "mov $dst, zero @ loadConF_immF_0\n"%} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + + __ movgr2fr_w(dst, R0); + %} + ins_pipe( fpu_loadF ); +%} + + +instruct loadConF(regF dst, immF src) %{ + match(Set dst src); + ins_cost(125); + + format %{ "fld_s $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} + ins_encode %{ + int con_offset = $constantoffset($src); + + if (Assembler::is_simm(con_offset, 12)) { + __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset); + } else { + __ li(AT, con_offset); + __ fldx_s($dst$$FloatRegister, $constanttablebase, AT); + } + %} + ins_pipe( fpu_loadF ); +%} + + +instruct loadConD_immD_0(regD dst, immD_0 zero) %{ + match(Set dst zero); + ins_cost(100); + + format %{ "mov $dst, zero @ loadConD_immD_0"%} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ movgr2fr_d(dst, R0); + %} + ins_pipe( fpu_loadF ); +%} + +instruct loadConD(regD dst, immD src) %{ + match(Set dst src); + ins_cost(125); + + format %{ "fld_d $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} + ins_encode %{ + int con_offset = $constantoffset($src); + + if (Assembler::is_simm(con_offset, 12)) { + __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset); + } else { + __ li(AT, con_offset); + __ fldx_d($dst$$FloatRegister, $constanttablebase, AT); + } + %} + ins_pipe( fpu_loadF ); +%} + +// Store register Float value (it is faster than store from FPU register) +instruct storeF_reg( memory mem, regF src) %{ + match(Set mem (StoreF mem src)); + + ins_cost(50); + format %{ "store $mem, $src\t# store float @ storeF_reg" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT); + %} + ins_pipe( fpu_storeF ); +%} + +instruct storeF_immF_0( memory mem, immF_0 zero) %{ + match(Set mem (StoreF mem zero)); + + ins_cost(40); + format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); + %} + ins_pipe( ialu_storeI ); +%} + +// Load Double +instruct loadD(regD dst, memory mem) %{ + match(Set dst (LoadD mem)); + + ins_cost(150); + format %{ "loadD $dst, $mem #@loadD" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); + %} + ins_pipe( ialu_loadI ); +%} + +// Load Double - UNaligned +instruct loadD_unaligned(regD dst, memory mem ) %{ + match(Set dst (LoadD_unaligned mem)); + ins_cost(250); + // FIXME: Need more effective ldl/ldr + format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); + %} + ins_pipe( ialu_loadI ); +%} + +instruct storeD_reg( memory mem, regD src) %{ + match(Set mem (StoreD mem src)); + + ins_cost(50); + format %{ "store $mem, $src\t# store float @ storeD_reg" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); + %} + ins_pipe( fpu_storeF ); +%} + +instruct storeD_immD_0( memory mem, immD_0 zero) %{ + match(Set mem (StoreD mem zero)); + + ins_cost(40); + format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); + %} + ins_pipe( ialu_storeI ); +%} + +instruct loadSSI(mRegI dst, stackSlotI src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ld_w $dst, $src\t# int stk @ loadSSI" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !"); + __ ld_w($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSI(stackSlotI dst, mRegI src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "st_w $dst, $src\t# int stk @ storeSSI" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !"); + __ st_w($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSL(mRegL dst, stackSlotL src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ld_d $dst, $src\t# long stk @ loadSSL" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !"); + __ ld_d($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSL(stackSlotL dst, mRegL src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "st_d $dst, $src\t# long stk @ storeSSL" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !"); + __ st_d($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSP(mRegP dst, stackSlotP src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ld_d $dst, $src\t# ptr stk @ loadSSP" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !"); + __ ld_d($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSP(stackSlotP dst, mRegP src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !"); + __ st_d($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSF(regF dst, stackSlotF src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "fld_s $dst, $src\t# float stk @ loadSSF" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !"); + __ fld_s($dst$$FloatRegister, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSF(stackSlotF dst, regF src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "fst_s $dst, $src\t# float stk @ storeSSF" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !"); + __ fst_s($src$$FloatRegister, SP, $dst$$disp); + %} + ins_pipe(fpu_storeF); +%} + +// Use the same format since predicate() can not be used here. +instruct loadSSD(regD dst, stackSlotD src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "fld_d $dst, $src\t# double stk @ loadSSD" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !"); + __ fld_d($dst$$FloatRegister, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSD(stackSlotD dst, regD src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sdc1 $dst, $src\t# double stk @ storeSSD" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !"); + __ fst_d($src$$FloatRegister, SP, $dst$$disp); + %} + ins_pipe(fpu_storeF); +%} + +instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ + match(Set cr (FastLock object box)); + effect(TEMP tmp, TEMP scr); + ins_cost(300); + format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); + %} + + ins_pipe( pipe_slow ); + ins_pc_relative(1); +%} + +instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ + match(Set cr (FastUnlock object box)); + effect(TEMP tmp, TEMP scr); + ins_cost(300); + format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); + %} + + ins_pipe( pipe_slow ); + ins_pc_relative(1); +%} + +// Store CMS card-mark Immediate 0 +instruct storeImmCM_order(memory mem, immI_0 zero) %{ + match(Set mem (StoreCM mem zero)); + predicate(UseConcMarkSweepGC && !UseCondCardMark); + ins_cost(100); + format %{ "StoreCM MEMBAR storestore\n\t" + "st_b $mem, zero\t! card-mark imm0" %} + ins_encode %{ + __ membar(__ StoreStore); + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +instruct storeImmCM(memory mem, immI_0 zero) %{ + match(Set mem (StoreCM mem zero)); + + ins_cost(150); + format %{ "st_b $mem, zero\t! card-mark imm0" %} + ins_encode %{ + __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); + %} + ins_pipe( ialu_storeI ); +%} + +// Die now +instruct ShouldNotReachHere( ) +%{ + match(Halt); + ins_cost(300); + + // Use the following format syntax + format %{ "ILLTRAP ;#@ShouldNotReachHere" %} + ins_encode %{ + // Here we should emit illtrap! + __ brk(18); + %} + ins_pipe( pipe_jump ); +%} + +instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem) +%{ + predicate(Universe::narrow_oop_shift() == 0); + match(Set dst mem); + + ins_cost(110); + format %{ "leaq $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %} + ins_encode %{ + Register dst = $dst$$Register; + Register base = as_Register($mem$$base); + int disp = $mem$$disp; + + __ addi_d(dst, base, disp); + %} + ins_pipe( ialu_regI_imm16 ); +%} + +instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale) +%{ + match(Set dst (AddP reg (LShiftL lreg scale))); + + ins_cost(110); + format %{ "leaq $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %} + ins_encode %{ + Register dst = $dst$$Register; + Register base = $reg$$Register; + Register index = $lreg$$Register; + int scale = $scale$$constant; + + if (scale == 0) { + __ add_d($dst$$Register, $reg$$Register, index); + } else { + __ alsl_d(dst, index, base, scale - 1); + } + %} + + ins_pipe( ialu_regI_imm16 ); +%} + + +// ============================================================================ +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass +// array for an instance of the superklass. Set a hidden internal cache on a +// hit (cache is checked with exposed code in gen_subtype_check()). Return +// NZ for a miss or zero for a hit. The encoding ALSO sets flags. +instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ + match(Set result (PartialSubtypeCheck sub super)); + effect(KILL tmp); + ins_cost(1100); // slightly larger than the next version + format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} + + ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); + ins_pipe( pipe_slow ); +%} + +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. + +instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ + match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); + + format %{ "move AT, $newval\n\t" + "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" + "move $cr, AT\n" %} + ins_encode%{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); + + int index = $heap_top_ptr$$index; + int scale = $heap_top_ptr$$scale; + int disp = $heap_top_ptr$$disp; + + guarantee(Assembler::is_simm(disp, 12), ""); + + if (index != 0) { + __ stop("in storePConditional: index != 0"); + } else { + __ move(AT, newval); + __ sc_d(AT, addr); + __ move($cr$$Register, AT); + } + %} + ins_pipe(long_memory_op); +%} + +// Conditional-store of an int value. +// AT flag is set on success, reset otherwise. +instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ + match(Set cr (StoreIConditional mem (Binary oldval newval))); + format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} + + ins_encode %{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Register cr = $cr$$Register; + Address addr(as_Register($mem$$base), $mem$$disp); + + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + guarantee(Assembler::is_simm(disp, 12), ""); + + if (index != 0) { + __ stop("in storeIConditional: index != 0"); + } else { + if (cr != addr.base() && cr != oldval && cr != newval) { + __ cmpxchg32(addr, oldval, newval, cr, true, false, true); + } else { + __ cmpxchg32(addr, oldval, newval, AT, true, false, true); + __ move(cr, AT); + } + } + %} + + ins_pipe(long_memory_op); +%} + +// Conditional-store of a long value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. +instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) +%{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); + + format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} + ins_encode%{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Register cr = $cr$$Register; + Address addr(as_Register($mem$$base), $mem$$disp); + + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + guarantee(Assembler::is_simm(disp, 12), ""); + + if (index != 0) { + __ stop("in storeIConditional: index != 0"); + } else { + if (cr != addr.base() && cr != oldval && cr != newval) { + __ cmpxchg(addr, oldval, newval, cr, false, true); + } else { + __ cmpxchg(addr, oldval, newval, AT, false, true); + __ move(cr, AT); + } + } + %} + ins_pipe(long_memory_op); +%} + +// Implement LoadPLocked. Must be ordered against changes of the memory location +// by storePConditional. +instruct loadPLocked(mRegP dst, memory mem) %{ + match(Set dst (LoadPLocked mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "ll_d $dst, $mem #@loadPLocked\n\t" %} + size(12); + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG); + %} + ins_pipe( ialu_loadI ); +%} + + +instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ + match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg32(addr, oldval, newval, res, true, false, true); + } else { + __ cmpxchg32(addr, oldval, newval, AT, true, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ + predicate(VM_Version::supports_cx8()); + match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg(addr, oldval, newval, res, false, true); + } else { + __ cmpxchg(addr, oldval, newval, AT, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg(addr, oldval, newval, res, false, true); + } else { + __ cmpxchg(addr, oldval, newval, AT, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + + if (res != addr.base() && res != oldval && res != newval) { + __ cmpxchg32(addr, oldval, newval, res, false, false, true); + } else { + __ cmpxchg32(addr, oldval, newval, AT, false, false, true); + __ move(res, AT); + } + %} + ins_pipe(long_memory_op); +%} + +//----------Max and Min-------------------------------------------------------- + +// Min Register with Register (generic version) +instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ + match(Set dst (MinI dst src)); + //effect(KILL flags); + ins_cost(80); + + format %{ "MIN $dst, $src @minI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ slt(AT, src, dst); + __ masknez(dst, dst, AT); + __ maskeqz(AT, src, AT); + __ OR(dst, dst, AT); + %} + + ins_pipe( pipe_slow ); +%} + +// Max Register with Register (generic version) +instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ + match(Set dst (MaxI dst src)); + ins_cost(80); + + format %{ "MAX $dst, $src @maxI_Reg_Reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ slt(AT, dst, src); + __ masknez(dst, dst, AT); + __ maskeqz(AT, src, AT); + __ OR(dst, dst, AT); + %} + + ins_pipe( pipe_slow ); +%} + +instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ + match(Set dst (MaxI dst zero)); + ins_cost(50); + + format %{ "MAX $dst, 0 @maxI_Reg_zero" %} + + ins_encode %{ + Register dst = $dst$$Register; + + __ slt(AT, dst, R0); + __ masknez(dst, dst, AT); + %} + + ins_pipe( pipe_slow ); +%} + +instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) +%{ + match(Set dst (AndL src mask)); + + format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ bstrpick_d(dst, src, 31, 0); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) +%{ + match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); + + format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + if (src1 == dst) { + __ bstrins_d(dst, src2, 63, 32); + } else if (src2 == dst) { + __ slli_d(dst, dst, 32); + __ bstrins_d(dst, src1, 31, 0); + } else { + __ bstrpick_d(dst, src1, 31, 0); + __ bstrins_d(dst, src2, 63, 32); + } + %} + ins_pipe(ialu_regI_regI); +%} + +// Zero-extend convert int to long +instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) +%{ + match(Set dst (AndL (ConvI2L src) mask)); + + format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ bstrpick_d(dst, src, 31, 0); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) +%{ + match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); + + format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ bstrpick_d(dst, src, 31, 0); + %} + ins_pipe(ialu_regI_regI); +%} + +// Match loading integer and casting it to unsigned int in long register. +// LoadI + ConvI2L + AndL 0xffffffff. +instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + + format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe(ialu_loadI); +%} + +instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ + match(Set dst (AndL mask (ConvI2L (LoadI mem)))); + + format %{ "ld_wu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} + ins_encode %{ + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); + %} + ins_pipe(ialu_loadI); +%} + + +// ============================================================================ +// Safepoint Instruction +instruct safePoint_poll_reg(mRegP poll) %{ + match(SafePoint poll); + predicate(false); + effect(USE poll); + + ins_cost(125); + format %{ "Safepoint @ [$poll] : poll for GC @ safePoint_poll_reg" %} + + ins_encode %{ + Register poll_reg = $poll$$Register; + + __ block_comment("Safepoint:"); + __ relocate(relocInfo::poll_type); + __ ld_w(AT, poll_reg, 0); + %} + + ins_pipe( ialu_storeI ); +%} + +instruct safePoint_poll() %{ + match(SafePoint); + + ins_cost(105); + format %{ "poll for GC @ safePoint_poll" %} + + ins_encode %{ + __ block_comment("Safepoint:"); + __ li(T4, (long)os::get_polling_page()); + __ relocate(relocInfo::poll_type); + __ ld_w(AT, T4, 0); + %} + + ins_pipe( ialu_storeI ); +%} + +//----------Arithmetic Conversion Instructions--------------------------------- + +instruct roundFloat_nop(regF dst) +%{ + match(Set dst (RoundFloat dst)); + + ins_cost(0); + ins_encode(); + ins_pipe(empty); +%} + +instruct roundDouble_nop(regD dst) +%{ + match(Set dst (RoundDouble dst)); + + ins_cost(0); + ins_encode(); + ins_pipe(empty); +%} + +//---------- Zeros Count Instructions ------------------------------------------ +// CountLeadingZerosINode CountTrailingZerosINode +instruct countLeadingZerosI(mRegI dst, mRegI src) %{ + match(Set dst (CountLeadingZerosI src)); + + format %{ "clz_w $dst, $src\t# count leading zeros (int)" %} + ins_encode %{ + __ clz_w($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countLeadingZerosL(mRegI dst, mRegL src) %{ + match(Set dst (CountLeadingZerosL src)); + + format %{ "clz_d $dst, $src\t# count leading zeros (long)" %} + ins_encode %{ + __ clz_d($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countTrailingZerosI(mRegI dst, mRegI src) %{ + match(Set dst (CountTrailingZerosI src)); + + format %{ "ctz_w $dst, $src\t# count trailing zeros (int)" %} + ins_encode %{ + __ ctz_w($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countTrailingZerosL(mRegI dst, mRegL src) %{ + match(Set dst (CountTrailingZerosL src)); + + format %{ "ctz_d $dst, $src\t# count trailing zeros (long)" %} + ins_encode %{ + __ ctz_d($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// ====================VECTOR INSTRUCTIONS===================================== + +// --------------------------------- Load ------------------------------------- + +instruct loadV16(vecX dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + format %{ "vload $dst, $mem\t# @loadV16" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORX); + %} + ins_pipe( pipe_slow ); +%} + +instruct loadV32(vecY dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 32); + match(Set dst (LoadVector mem)); + format %{ "xvload $dst, $mem\t# @loadV32" %} + ins_encode %{ + __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORY); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- Store ------------------------------------ + +instruct storeV16(memory mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + format %{ "vstore $src, $mem\t# @storeV16" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORX); + %} + ins_pipe( pipe_slow ); +%} + +instruct storeV32(memory mem, vecY src) %{ + predicate(n->as_StoreVector()->memory_size() == 32); + match(Set mem (StoreVector mem src)); + format %{ "xvstore $src, $mem\t# @storeV32" %} + ins_encode %{ + __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORY); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------------------- Replicate ---------------------------------- + +instruct repl16B(vecX dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB src)); + format %{ "vreplgr2vr.b $dst, $src\t# @repl16B" %} + ins_encode %{ + __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateB imm)); + format %{ "vldi $dst, $imm\t# @repl16B_imm" %} + ins_encode %{ + __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl8S(vecX dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS src)); + format %{ "vreplgr2vr.h $dst, $src\t# @repl8S" %} + ins_encode %{ + __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl8S_imm(vecX dst, immI10 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateS imm)); + format %{ "vldi $dst, $imm\t# @repl8S_imm" %} + ins_encode %{ + __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4I(vecX dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + format %{ "vreplgr2vr.w $dst, $src\t# @repl4I" %} + ins_encode %{ + __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4I_imm(vecX dst, immI10 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI imm)); + format %{ "vldi $dst, $imm\t# @repl4I_imm" %} + ins_encode %{ + __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl2L(vecX dst, mRegL src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + format %{ "vreplgr2vr.d $dst, $src\t# @repl2L" %} + ins_encode %{ + __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl2L_imm(vecX dst, immL10 imm) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL imm)); + format %{ "vldi $dst, $imm\t# @repl2L_imm" %} + ins_encode %{ + __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4F(vecX dst, regF src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + format %{ "vreplvei.w $dst, $src, 0\t# @repl4F" %} + ins_encode %{ + __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl2D(vecX dst, regD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + format %{ "vreplvei.d $dst, $src, 0\t# @repl2D" %} + ins_encode %{ + __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl32B(vecY dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateB src)); + format %{ "xvreplgr2vr.b $dst, $src\t# @repl32B" %} + ins_encode %{ + __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (ReplicateB imm)); + format %{ "xvldi $dst, $imm\t# @repl32B_imm" %} + ins_encode %{ + __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl16S(vecY dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateS src)); + format %{ "xvreplgr2vr.h $dst, $src\t# @repl16S" %} + ins_encode %{ + __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl16S_imm(vecY dst, immI10 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (ReplicateS imm)); + format %{ "xvldi $dst, $imm\t# @repl16S_imm" %} + ins_encode %{ + __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl8I(vecY dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI src)); + format %{ "xvreplgr2vr.w $dst, $src\t# @repl8I" %} + ins_encode %{ + __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl8I_imm(vecY dst, immI10 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateI imm)); + format %{ "xvldi $dst, $imm\t# @repl8I_imm" %} + ins_encode %{ + __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4L(vecY dst, mRegL src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL src)); + format %{ "xvreplgr2vr.d $dst, $src\t# @repl4L" %} + ins_encode %{ + __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4L_imm(vecY dst, immL10 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateL imm)); + format %{ "xvldi $dst, $imm\t# @repl4L_imm" %} + ins_encode %{ + __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff)); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl8F(vecY dst, regF src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateF src)); + format %{ "xvreplve0.w $dst, $src\t# @repl8F" %} + ins_encode %{ + __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct repl4D(vecY dst, regD src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateD src)); + format %{ "xvreplve0.d $dst, $src\t# @repl4D" %} + ins_encode %{ + __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- ADD -------------------------------------- + +instruct add16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + format %{ "vadd.b $dst, $src1, $src2\t# @add16B" %} + ins_encode %{ + __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB src (ReplicateB imm))); + format %{ "vaddi.bu $dst, $src, $imm\t# @add16B_imm" %} + ins_encode %{ + __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + format %{ "vadd.h $dst, $src1, $src2\t# @add8S" %} + ins_encode %{ + __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS src (ReplicateS imm))); + format %{ "vaddi.hu $dst, $src, $imm\t# @add8S_imm" %} + ins_encode %{ + __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI src1 src2)); + format %{ "vadd.w $dst, $src1, src2\t# @add4I" %} + ins_encode %{ + __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI src (ReplicateI imm))); + format %{ "vaddi.wu $dst, $src, $imm\t# @add4I_imm" %} + ins_encode %{ + __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL src1 src2)); + format %{ "vadd.d $dst, $src1, $src2\t# @add2L" %} + ins_encode %{ + __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL src (ReplicateL imm))); + format %{ "vaddi.du $dst, $src, $imm\t# @add2L_imm" %} + ins_encode %{ + __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVF src1 src2)); + format %{ "vfadd.s $dst, $src1, $src2\t# @add4F" %} + ins_encode %{ + __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVD src1 src2)); + format %{ "vfadd.d $dst, $src1, $src2\t# @add2D" %} + ins_encode %{ + __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add32B(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (AddVB src1 src2)); + format %{ "xvadd.b $dst, $src1, $src2\t# @add32B" %} + ins_encode %{ + __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (AddVB src (ReplicateB imm))); + format %{ "xvaddi.bu $dst, $src, $imm\t# @add32B_imm" %} + ins_encode %{ + __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add16S(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVS src1 src2)); + format %{ "xvadd.h $dst, $src1, $src2\t# @add16S" %} + ins_encode %{ + __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVS src (ReplicateS imm))); + format %{ "xvaddi.hu $dst, $src, $imm\t# @add16S_imm" %} + ins_encode %{ + __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVI src1 src2)); + format %{ "xvadd.wu $dst, $src1, $src2\t# @add8I" %} + ins_encode %{ + __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVI src (ReplicateI imm))); + format %{ "xvaddi.wu $dst, $src, $imm\t# @add8I_imm" %} + ins_encode %{ + __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4L(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVL src1 src2)); + format %{ "xvadd.d $dst, $src1, $src2\t# @add4L" %} + ins_encode %{ + __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVL src (ReplicateL imm))); + format %{ "xvaddi.du $dst, $src, $imm\t# @add4L_imm" %} + ins_encode %{ + __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct add8F(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVF src1 src2)); + format %{ "xvfadd.s $dst, $src1, $src2\t# @add8F" %} + ins_encode %{ + __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct add4D(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVD src1 src2)); + format %{ "xvfadd.d $dst, $src1, $src2\t# @add4D" %} + ins_encode %{ + __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- SUB -------------------------------------- + +instruct sub16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + format %{ "vsub.b $dst, $src1, $src2\t# @sub16B" %} + ins_encode %{ + __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB src (ReplicateB imm))); + format %{ "vsubi.bu $dst, $src, $imm\t# @sub16B_imm" %} + ins_encode %{ + __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS src1 src2)); + format %{ "vsub.h $dst, $src1, $src2\t# @sub8S" %} + ins_encode %{ + __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS src (ReplicateS imm))); + format %{ "vsubi.hu $dst, $src, $imm\t# @sub8S_imm" %} + ins_encode %{ + __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI src1 src2)); + format %{ "vsub.w $dst, $src1, src2\t# @sub4I" %} + ins_encode %{ + __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI src (ReplicateI imm))); + format %{ "vsubi.wu $dst, $src, $imm\t# @sub4I_imm" %} + ins_encode %{ + __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL src1 src2)); + format %{ "vsub.d $dst, $src1, $src2\t# @sub2L" %} + ins_encode %{ + __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL src (ReplicateL imm))); + format %{ "vsubi.du $dst, $src, $imm\t# @sub2L_imm" %} + ins_encode %{ + __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVF src1 src2)); + format %{ "vfsub.s $dst, $src1, $src2\t# @sub4F" %} + ins_encode %{ + __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVD src1 src2)); + format %{ "vfsub.d $dst, $src1, $src2\t# @sub2D" %} + ins_encode %{ + __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub32B(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (SubVB src1 src2)); + format %{ "xvsub.b $dst, $src1, $src2\t# @sub32B" %} + ins_encode %{ + __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (SubVB src (ReplicateB imm))); + format %{ "xvsubi.bu $dst, $src, $imm\t# @sub32B_imm" %} + ins_encode %{ + __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub16S(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVS src1 src2)); + format %{ "xvsub.h $dst, $src1, $src2\t# @sub16S" %} + ins_encode %{ + __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVS src (ReplicateS imm))); + format %{ "xvsubi.hu $dst, $src, $imm\t# @sub16S_imm" %} + ins_encode %{ + __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVI src1 src2)); + format %{ "xvsub.w $dst, $src1, $src2\t# @sub8I" %} + ins_encode %{ + __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVI src (ReplicateI imm))); + format %{ "xvsubi.wu $dst, $src, $imm\t# @sub8I_imm" %} + ins_encode %{ + __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4L(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVL src1 src2)); + format %{ "xvsub.d $dst, $src1, $src2\t# @sub4L" %} + ins_encode %{ + __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVL src (ReplicateL imm))); + format %{ "xvsubi.du $dst, $src, $imm\t# @sub4L_imm" %} + ins_encode %{ + __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub8F(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVF src1 src2)); + format %{ "xvfsub.s $dst, $src1, $src2\t# @sub8F" %} + ins_encode %{ + __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sub4D(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVD src1 src2)); + format %{ "xvfsub.d $dst,$src1,$src2\t# @sub4D" %} + ins_encode %{ + __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- MUL -------------------------------------- +instruct mul8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVS src1 src2)); + format %{ "vmul.h $dst, $src1, $src2\t# @mul8S" %} + ins_encode %{ + __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVI src1 src2)); + format %{ "vmul.w $dst, $src1, $src2\t# @mul4I" %} + ins_encode %{ + __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVF src1 src2)); + format %{ "vfmul.s $dst, $src1, $src2\t# @mul4F" %} + ins_encode %{ + __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVD src1 src2)); + format %{ "vfmul.d $dst, $src1, $src2\t# @mul2D" %} + ins_encode %{ + __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul16S(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (MulVS src1 src2)); + format %{ "xvmul.h $dst, $src1, $src2\t# @mul16S" %} + ins_encode %{ + __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul8I(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVI src1 src2)); + format %{ "xvmul.w $dst, $src1, $src2\t# @mul8I" %} + ins_encode %{ + __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul8F(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVF src1 src2)); + format %{ "xvfmul.s $dst, $src1, $src2\t# @mul8F" %} + ins_encode %{ + __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct mul4D(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVD src1 src2)); + format %{ "xvfmul.d $dst, $src1, $src2\t# @mul4D" %} + ins_encode %{ + __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- DIV -------------------------------------- +instruct div4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (DivVF src1 src2)); + format %{ "vfdiv.s $dst, $src1, $src2\t# @div4F" %} + ins_encode %{ + __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct div2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVD src1 src2)); + format %{ "vfdiv.d $dst, $src1, $src2\t# @div2D" %} + ins_encode %{ + __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct div8F(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (DivVF src1 src2)); + format %{ "xvfdiv.s $dst, $src1, $src2\t# @div8F" %} + ins_encode %{ + __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct div4D(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (DivVD src1 src2)); + format %{ "xvfdiv.d $dst, $src1, $src2\t# @div4D" %} + ins_encode %{ + __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------------------ Shift --------------------------------------- + +instruct shiftcntX(vecX dst, mRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "vreplgr2vr.b $dst, $cnt\t# @shiftcntX" %} + ins_encode %{ + __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); + %} + ins_pipe( pipe_slow ); +%} + +instruct shiftcntY(vecY dst, mRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (LShiftCntV cnt)); + match(Set dst (RShiftCntV cnt)); + format %{ "xvreplgr2vr.b $dst, $cnt\t# @shiftcntY" %} + ins_encode %{ + __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------------------ LeftShift ----------------------------------- + +instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll16B" %} + ins_encode %{ + __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); + __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + format %{ "vslli.b $dst, $src, $shift\t# @sll16B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vsll $dst, $src, $shift\t# TEMP($tmp) @sll8S" %} + ins_encode %{ + __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); + __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + format %{ "vslli.h $dst, $src, $shift\t# @sll8S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sll4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + format %{ "vsll.w $dst, $src, $shift\t# @sll4I" %} + ins_encode %{ + __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVI src shift)); + format %{ "vslli.w $dst, $src, $shift\t# @sll4I_imm" %} + ins_encode %{ + __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + format %{ "vsll.d $dst, $src, $shift\t# @sll2L" %} + ins_encode %{ + __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + format %{ "vslli.d $dst, $src, $shift\t# @sll2L_imm" %} + ins_encode %{ + __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (LShiftVB src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll32B" %} + ins_encode %{ + __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); + __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll32B_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (LShiftVB src shift)); + format %{ "xvslli.b $dst, $src, $shift\t# @sll32B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "xvsll $dst, $src, $shift\t# TEMP($tmp) @sll16S" %} + ins_encode %{ + __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); + __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll16S_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVS src shift)); + format %{ "xvslli.h $dst, $src, $shift\t# @sll16S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sll8I(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVI src shift)); + format %{ "xvsll.w $dst, $src, $shift\t# @sll8I" %} + ins_encode %{ + __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll8I_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVI src shift)); + format %{ "xvslli.w $dst, $src, $shift\t# @sll8I_imm" %} + ins_encode %{ + __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll4L(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVL src shift)); + format %{ "xvsll.d $dst, $src, $shift\t# @sll4L" %} + ins_encode %{ + __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sll4L_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVL src shift)); + format %{ "xvslli.d $dst, $src, $shift\t# @sll4L_imm" %} + ins_encode %{ + __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// ----------------------- LogicalRightShift ---------------------------------- + +instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVB src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl16B" %} + ins_encode %{ + __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); + __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVB src shift)); + format %{ "vsrli.b $dst, $src, $shift\t# @srl16B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "vsrl $dst, $src, $shift\t# TEMP($tmp) @srl8S" %} + ins_encode %{ + __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); + __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + format %{ "vsrli.h $dst, $src, $shift\t# @srl8S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct srl4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + format %{ "vsrl.w $dst, $src, $shift\t# @srl4I" %} + ins_encode %{ + __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVI src shift)); + format %{ "vsrli.w $dst, $src, $shift\t# @srl4I_imm" %} + ins_encode %{ + __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + format %{ "vsrl.d $dst, $src, $shift\t# @srl2L" %} + ins_encode %{ + __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + format %{ "vsrli.d $dst, $src, $shift\t# @srl2L_imm" %} + ins_encode %{ + __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (URShiftVB src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl32B" %} + ins_encode %{ + __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8); + __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl32B_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (URShiftVB src shift)); + format %{ "xvsrli.b $dst, $src, $shift\t# @srl32B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVS src shift)); + effect(TEMP dst, TEMP tmp); + format %{ "xvsrl $dst, $src, $shift\t# TEMP($tmp) @srl16S" %} + ins_encode %{ + __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10); + __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl16S_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (URShiftVS src shift)); + format %{ "xvsrli.h $dst, $src, $shift\t# @srl16S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister); + } else { + __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct srl8I(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVI src shift)); + format %{ "xvsrl.w $dst, $src, $shift\t# @srl8I" %} + ins_encode %{ + __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl8I_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVI src shift)); + format %{ "xvsrli.w $dst, $src, $shift\t# @srl8I_imm" %} + ins_encode %{ + __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl4L(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVL src shift)); + format %{ "xvsrl.d $dst, $src, $shift\t# @srl4L" %} + ins_encode %{ + __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct srl4L_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVL src shift)); + format %{ "xvsrli.d $dst, $src, $shift\t# @srl4L_imm" %} + ins_encode %{ + __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// ------------------------- ArithmeticRightShift ----------------------------- + +instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + effect(TEMP tmp); + format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra16B" %} + ins_encode %{ + __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); + __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); + __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + format %{ "vsrai.b $dst, $src, $shift\t# @sra16B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); + } else { + __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + effect(TEMP tmp); + format %{ "vsra $dst, $src, $shift\t# TEMP($tmp) @sra8S" %} + ins_encode %{ + __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); + __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); + __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + format %{ "vsrai.h $dst, $src, $shift\t# @sra8S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); + } else { + __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sra4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + format %{ "vsra.w $dst, $src, $shift\t# @sra4I" %} + ins_encode %{ + __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + format %{ "vsrai.w $dst, $src, $shift\t# @sra4I_imm" %} + ins_encode %{ + __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + format %{ "vsra.d $dst, $src, $shift\t# @sra2L" %} + ins_encode %{ + __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + format %{ "vsrai.d $dst, $src, $shift\t# @sra2L_imm" %} + ins_encode %{ + __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (RShiftVB src shift)); + effect(TEMP tmp); + format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra32B" %} + ins_encode %{ + __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8); + __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); + __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra32B_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (RShiftVB src shift)); + format %{ "xvsrai.b $dst, $src, $shift\t# @sra32B_imm" %} + ins_encode %{ + if ($shift$$constant >= 8) { + __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7); + } else { + __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVS src shift)); + effect(TEMP tmp); + format %{ "xvsra $dst, $src, $shift\t# TEMP($tmp) @sra16S" %} + ins_encode %{ + __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10); + __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister); + __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra16S_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVS src shift)); + format %{ "xvsrai.h $dst, $src, $shift\t# @sra16S_imm" %} + ins_encode %{ + if ($shift$$constant >= 16) { + __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15); + } else { + __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct sra8I(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVI src shift)); + format %{ "xvsra.w $dst, $src, $shift\t# @sra8I" %} + ins_encode %{ + __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra8I_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVI src shift)); + format %{ "xvsrai.w $dst, $src, $shift\t# @sra8I_imm" %} + ins_encode %{ + __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra4L(vecY dst, vecY src, vecY shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVL src shift)); + format %{ "xvsra.d $dst, $src, $shift\t# @sra4L" %} + ins_encode %{ + __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct sra4L_imm(vecY dst, vecY src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVL src shift)); + format %{ "xvsrai.d $dst, $src, $shift\t# @sra4L_imm" %} + ins_encode %{ + __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- AND -------------------------------------- + +instruct andV16(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src1 src2)); + format %{ "vand.v $dst, $src1, $src2\t# @andV16" %} + ins_encode %{ + __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AndV src (ReplicateB imm))); + format %{ "vandi.b $dst, $src, $imm\t# @and16B_imm" %} + ins_encode %{ + __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct andV32(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (AndV src1 src2)); + format %{ "xvand.v $dst, $src1, $src2\t# @andV32" %} + ins_encode %{ + __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (AndV src (ReplicateB imm))); + format %{ "xvandi.b $dst, $src, $imm\t# @and32B_imm" %} + ins_encode %{ + __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- OR --------------------------------------- + +instruct orV16(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src1 src2)); + format %{ "vor.v $dst, $src1, $src2\t# @orV16" %} + ins_encode %{ + __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (OrV src (ReplicateB imm))); + format %{ "vori.b $dst, $src, $imm\t# @or16B_imm" %} + ins_encode %{ + __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct orV32(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (OrV src1 src2)); + format %{ "xvor.v $dst, $src1, $src2\t# @orV32" %} + ins_encode %{ + __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (OrV src (ReplicateB imm))); + format %{ "xvori.b $dst, $src, $imm\t# @or32B_imm" %} + ins_encode %{ + __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- XOR -------------------------------------- + +instruct xorV16(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src1 src2)); + format %{ "vxor.v $dst, $src1, $src2\t# @xorV16" %} + ins_encode %{ + __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (XorV src (ReplicateB imm))); + format %{ "vxori.b $dst, $src, $imm\t# @xor16B_imm" %} + ins_encode %{ + __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct xorV32(vecY dst, vecY src1, vecY src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (XorV src1 src2)); + format %{ "xvxor.v $dst, $src1, $src2\t# @xorV32" %} + ins_encode %{ + __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (XorV src (ReplicateB imm))); + format %{ "xvxori.b $dst, $src, $imm\t# @xor32B_imm" %} + ins_encode %{ + __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- NOR -------------------------------------- + +instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); + match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); + match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); + format %{ "vnor.v $dst, $src1, $src2\t# @norV16" %} + ins_encode %{ + __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); + format %{ "vnori.b $dst, $src, $imm\t# @nor16B_imm" %} + ins_encode %{ + __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (XorV (OrV src1 src2) (ReplicateB m1))); + match(Set dst (XorV (OrV src1 src2) (ReplicateS m1))); + match(Set dst (XorV (OrV src1 src2) (ReplicateI m1))); + format %{ "xvnor.v $dst, $src1, $src2\t# @norV32" %} + ins_encode %{ + __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{ + predicate(n->as_Vector()->length() == 32); + match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1))); + format %{ "xvnori.b $dst, $src, $imm\t# @nor32B_imm" %} + ins_encode %{ + __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- ANDN ------------------------------------- + +instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); + match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); + match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); + format %{ "vandn.v $dst, $src1, $src2\t# @andnV16" %} + ins_encode %{ + __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (AndV src2 (XorV src1 (ReplicateB m1)))); + match(Set dst (AndV src2 (XorV src1 (ReplicateS m1)))); + match(Set dst (AndV src2 (XorV src1 (ReplicateI m1)))); + format %{ "xvandn.v $dst, $src1, $src2\t# @andnV32" %} + ins_encode %{ + __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// --------------------------------- ORN -------------------------------------- + +instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); + match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); + match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); + format %{ "vorn.v $dst, $src1, $src2\t# @ornV16" %} + ins_encode %{ + __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{ + predicate(n->as_Vector()->length_in_bytes() == 32); + match(Set dst (OrV src1 (XorV src2 (ReplicateB m1)))); + match(Set dst (OrV src1 (XorV src2 (ReplicateS m1)))); + match(Set dst (OrV src1 (XorV src2 (ReplicateI m1)))); + format %{ "xvorn.v $dst, $src1, $src2\t# @ornV32" %} + ins_encode %{ + __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceeding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == EAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(eRegI dst, eRegI src) %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceeded by register-register move +// peepmatch ( incI_eReg movI ); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint ( 0.dst == 1.dst ); +// // construct a replacement instruction that sets +// // the destination to ( move's source register + one ) +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// Implementation no longer uses movX instructions since +// machine-independent system no longer uses CopyX nodes. +// +// peephole %{ +// peepmatch ( incI_eReg movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( decI_eReg movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( addI_eReg_imm movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( addP_eReg_imm movP ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); +// %} + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, eRegI src) %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(eRegI dst, memory mem) %{ +// match(Set dst (LoadI mem)); +// %} +// +//peephole %{ +// peepmatch ( loadI storeI ); +// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); +// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); +//%} + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. + diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp new file mode 100644 index 00000000000..89295343ce0 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp @@ -0,0 +1,3895 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +#ifdef COMPILER2 +#include "opto/compile.hpp" +#include "opto/node.hpp" +#endif + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +// Implementation of MacroAssembler + +intptr_t MacroAssembler::i[32] = {0}; +float MacroAssembler::f[32] = {0.0}; + +void MacroAssembler::print(outputStream *s) { + unsigned int k; + for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); + } + s->cr(); + + for(k=0; kprint_cr("f%d = %f", k, f[k]); + } + s->cr(); +} + +int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } +int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } + +void MacroAssembler::save_registers(MacroAssembler *masm) { +#define __ masm-> + for(int k=0; k<32; k++) { + __ st_w (as_Register(k), A0, i_offset(k)); + } + + for(int k=0; k<32; k++) { + __ fst_s (as_FloatRegister(k), A0, f_offset(k)); + } +#undef __ +} + +void MacroAssembler::restore_registers(MacroAssembler *masm) { +#define __ masm-> + for(int k=0; k<32; k++) { + __ ld_w (as_Register(k), A0, i_offset(k)); + } + + for(int k=0; k<32; k++) { + __ fld_s (as_FloatRegister(k), A0, f_offset(k)); + } +#undef __ +} + + +void MacroAssembler::pd_patch_instruction(address branch, address target) { + jint& stub_inst = *(jint*)branch; + jint* pc = (jint*)branch; + + if (high(stub_inst, 7) == pcaddu18i_op) { + // far: + // pcaddu18i reg, si20 + // jirl r0, reg, si18 + + assert(high(pc[1], 6) == jirl_op, "Not a branch label patch"); + jlong offs = target - branch; + CodeBuffer cb(branch, 2 * BytesPerInstWord); + MacroAssembler masm(&cb); + if (reachable_from_branch_short(offs)) { + // convert far to short +#define __ masm. + __ b(target); + __ nop(); +#undef __ + } else { + masm.patchable_jump_far(R0, offs); + } + return; + } else if (high(stub_inst, 7) == pcaddi_op) { + // see MacroAssembler::set_last_Java_frame: + // pcaddi reg, si20 + + jint offs = (target - branch) >> 2; + guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); + CodeBuffer cb(branch, 1 * BytesPerInstWord); + MacroAssembler masm(&cb); + masm.pcaddi(as_Register(low(stub_inst, 5)), offs); + return; + } + + stub_inst = patched_branch(target - branch, stub_inst, 0); +} + +bool MacroAssembler::reachable_from_branch_short(jlong offs) { + if (ForceUnreachable) { + return false; + } + return is_simm(offs >> 2, 26); +} + +void MacroAssembler::patchable_jump_far(Register ra, jlong offs) { + jint si18, si20; + guarantee(is_simm(offs, 38), "Not signed 38-bit offset"); + split_simm38(offs, si18, si20); + pcaddu18i(T4, si20); + jirl(ra, T4, si18); +} + +void MacroAssembler::patchable_jump(address target, bool force_patchable) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(target) != NULL, + "destination of jump not found in code cache"); + if (force_patchable || patchable_branches()) { + jlong offs = target - pc(); + if (reachable_from_branch_short(offs)) { // Short jump + b(offset26(target)); + nop(); + } else { // Far jump + patchable_jump_far(R0, offs); + } + } else { // Real short jump + b(offset26(target)); + } +} + +void MacroAssembler::patchable_call(address target, address call_site) { + jlong offs = target - (call_site ? call_site : pc()); + if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call + nop(); + bl((offs - BytesPerInstWord) >> 2); + } else { // Far call + patchable_jump_far(RA, offs); + } +} + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. + +address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { + assert(JavaThread::current()->is_Compiler_thread(), "just checking"); + assert(entry.rspec().type() == relocInfo::runtime_call_type + || entry.rspec().type() == relocInfo::opt_virtual_call_type + || entry.rspec().type() == relocInfo::static_call_type + || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + + // We need a trampoline if branches are far. + if (far_branches()) { + bool in_scratch_emit_size = false; +#ifdef COMPILER2 + // We don't want to emit a trampoline if C2 is generating dummy + // code during its branch shortening phase. + CompileTask* task = ciEnv::current()->task(); + in_scratch_emit_size = + (task != NULL && is_c2_compile(task->comp_level()) && + Compile::current()->in_scratch_emit_size()); +#endif + if (!in_scratch_emit_size) { + address stub = emit_trampoline_stub(offset(), entry.target()); + if (stub == NULL) { + return NULL; // CodeCache is full + } + } + } + + if (cbuf) cbuf->set_insts_mark(); + relocate(entry.rspec()); + if (!far_branches()) { + bl(entry.target()); + } else { + bl(pc()); + } + // just need to return a non-null address + return pc(); +} + +// Emit a trampoline stub for a call to a target which is too far away. +// +// code sequences: +// +// call-site: +// branch-and-link to or +// +// Related trampoline stub for this call site in the stub section: +// load the call target from the constant pool +// branch (RA still points to the call site above) + +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { + // Start the stub + address stub = start_a_stub(NativeInstruction::nop_instruction_size + + NativeCallTrampolineStub::instruction_size); + if (stub == NULL) { + return NULL; // CodeBuffer::expand failed + } + + // Create a trampoline stub relocation which relates this trampoline stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + align(wordSize); + relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + + insts_call_instruction_offset)); + const int stub_start_offset = offset(); + + // Now, create the trampoline stub's code: + // - load the call + // - call + pcaddi(T4, 0); + ld_d(T4, T4, 16); + jr(T4); + nop(); //align + assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, + "should be"); + emit_int64((int64_t)dest); + + const address stub_start_addr = addr_at(stub_start_offset); + + NativeInstruction* ni = nativeInstruction_at(stub_start_addr); + assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline"); + + end_a_stub(); + return stub_start_addr; +} + +void MacroAssembler::beq_far(Register rs, Register rt, address entry) { + if (is_simm16((entry - pc()) >> 2)) { // Short jump + beq(rs, rt, offset16(entry)); + } else { // Far jump + Label not_jump; + bne(rs, rt, not_jump); + b_far(entry); + bind(not_jump); + } +} + +void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { + if (L.is_bound()) { + beq_far(rs, rt, target(L)); + } else { + Label not_jump; + bne(rs, rt, not_jump); + b_far(L); + bind(not_jump); + } +} + +void MacroAssembler::bne_far(Register rs, Register rt, address entry) { + if (is_simm16((entry - pc()) >> 2)) { // Short jump + bne(rs, rt, offset16(entry)); + } else { // Far jump + Label not_jump; + beq(rs, rt, not_jump); + b_far(entry); + bind(not_jump); + } +} + +void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { + if (L.is_bound()) { + bne_far(rs, rt, target(L)); + } else { + Label not_jump; + beq(rs, rt, not_jump); + b_far(L); + bind(not_jump); + } +} + +void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) { + if (is_simm16((entry - pc()) >> 2)) { // Short jump + if (is_signed) { + blt(rs, rt, offset16(entry)); + } else { + bltu(rs, rt, offset16(entry)); + } + } else { // Far jump + Label not_jump; + if (is_signed) { + bge(rs, rt, not_jump); + } else { + bgeu(rs, rt, not_jump); + } + b_far(entry); + bind(not_jump); + } +} + +void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) { + if (L.is_bound()) { + blt_far(rs, rt, target(L), is_signed); + } else { + Label not_jump; + if (is_signed) { + bge(rs, rt, not_jump); + } else { + bgeu(rs, rt, not_jump); + } + b_far(L); + bind(not_jump); + } +} + +void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) { + if (is_simm16((entry - pc()) >> 2)) { // Short jump + if (is_signed) { + bge(rs, rt, offset16(entry)); + } else { + bgeu(rs, rt, offset16(entry)); + } + } else { // Far jump + Label not_jump; + if (is_signed) { + blt(rs, rt, not_jump); + } else { + bltu(rs, rt, not_jump); + } + b_far(entry); + bind(not_jump); + } +} + +void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) { + if (L.is_bound()) { + bge_far(rs, rt, target(L), is_signed); + } else { + Label not_jump; + if (is_signed) { + blt(rs, rt, not_jump); + } else { + bltu(rs, rt, not_jump); + } + b_far(L); + bind(not_jump); + } +} + +void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { + Label not_taken; + bne(rs, rt, not_taken); + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { + Label not_taken; + beq(rs, rt, not_taken); + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) { + Label not_taken; + if (is_signed) { + bge(rs, rt, not_taken); + } else { + bgeu(rs, rt, not_taken); + } + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) { + Label not_taken; + if (is_signed) { + blt(rs, rt, not_taken); + } else { + bltu(rs, rt, not_taken); + } + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::bc1t_long(Label& L) { + Label not_taken; + bceqz(FCC0, not_taken); + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::bc1f_long(Label& L) { + Label not_taken; + bcnez(FCC0, not_taken); + jmp_far(L); + bind(not_taken); +} + +void MacroAssembler::b_far(Label& L) { + if (L.is_bound()) { + b_far(target(L)); + } else { + L.add_patch_at(code(), locator()); + if (ForceUnreachable) { + patchable_jump_far(R0, 0); + } else { + b(0); + } + } +} + +void MacroAssembler::b_far(address entry) { + jlong offs = entry - pc(); + if (reachable_from_branch_short(offs)) { // Short jump + b(offset26(entry)); + } else { // Far jump + patchable_jump_far(R0, offs); + } +} + +void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { + ldx_d(rt, base, offset); +} + +void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { + stx_d(rt, base, offset); +} + +void MacroAssembler::ld_long(Register rt, Register offset, Register base) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +#if 0 + add_d(AT, base, offset); + ld_long(rt, 0, AT); +#endif +} + +void MacroAssembler::st_long(Register rt, Register offset, Register base) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +#if 0 + add_d(AT, base, offset); + st_long(rt, 0, AT); +#endif +} + +Address MacroAssembler::as_Address(AddressLiteral adr) { + return Address(adr.target(), adr.rspec()); +} + +Address MacroAssembler::as_Address(ArrayAddress adr) { + return Address::make_array(adr); +} + +// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). +void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { + li(tmp_reg1, inc); + li(tmp_reg2, counter_addr); + amadd_w(R0, tmp_reg1, tmp_reg2); +} + +int MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + bool need_tmp_reg = false; + if (tmp_reg == noreg) { + need_tmp_reg = true; + tmp_reg = T4; + } + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + Address saved_mark_addr(lock_reg, 0); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + ld_ptr(swap_reg, mark_addr); + } + + if (need_tmp_reg) { + push(tmp_reg); + } + move(tmp_reg, swap_reg); + andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); + addi_d(AT, R0, markOopDesc::biased_lock_pattern); + sub_d(AT, AT, tmp_reg); + if (need_tmp_reg) { + pop(tmp_reg); + } + + bne(AT, R0, cas_label); + + + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + // Note that because there is no current thread register on LA we + // need to store off the mark word we read out of the object to + // avoid reloading it and needing to recheck invariants below. This + // store is unfortunate but it makes the overall code shorter and + // simpler. + st_ptr(swap_reg, saved_mark_addr); + if (need_tmp_reg) { + push(tmp_reg); + } + if (swap_reg_contains_mark) { + null_check_offset = offset(); + } + load_prototype_header(tmp_reg, obj_reg); + xorr(tmp_reg, tmp_reg, swap_reg); + get_thread(swap_reg); + xorr(swap_reg, swap_reg, tmp_reg); + + li(AT, ~((int) markOopDesc::age_mask_in_place)); + andr(swap_reg, swap_reg, AT); + + if (PrintBiasedLockingStatistics) { + Label L; + bne(swap_reg, R0, L); + push(tmp_reg); + push(A0); + atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); + pop(A0); + pop(tmp_reg); + bind(L); + } + if (need_tmp_reg) { + pop(tmp_reg); + } + beq(swap_reg, R0, done); + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + + li(AT, markOopDesc::biased_lock_mask_in_place); + andr(AT, swap_reg, AT); + bne(AT, R0, try_revoke_bias); + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + + li(AT, markOopDesc::epoch_mask_in_place); + andr(AT,swap_reg, AT); + bne(AT, R0, try_rebias); + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + + ld_ptr(swap_reg, saved_mark_addr); + + li(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + andr(swap_reg, swap_reg, AT); + + if (need_tmp_reg) { + push(tmp_reg); + } + get_thread(tmp_reg); + orr(tmp_reg, tmp_reg, swap_reg); + cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + push(tmp_reg); + push(A0); + atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); + pop(A0); + pop(tmp_reg); + bind(L); + } + if (slow_case != NULL) { + beq_far(AT, R0, *slow_case); + } + b(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); + get_thread(swap_reg); + orr(tmp_reg, tmp_reg, swap_reg); + ld_ptr(swap_reg, saved_mark_addr); + + cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + push(AT); + push(tmp_reg); + atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); + pop(tmp_reg); + pop(AT); + bind(L); + } + if (slow_case != NULL) { + beq_far(AT, R0, *slow_case); + } + + b(done); + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + ld_ptr(swap_reg, saved_mark_addr); + + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); + cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); + if (need_tmp_reg) { + pop(tmp_reg); + } + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + push(AT); + push(tmp_reg); + atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); + pop(tmp_reg); + pop(AT); + bind(L); + } + + bind(cas_label); + return null_check_offset; +} + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); + addi_d(AT, R0, markOopDesc::biased_lock_pattern); + + beq(AT, temp_reg, done); +} + +// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf +// this method will handle the stack problem, you need not to preserve the stack space for the argument now +void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { + Label L, E; + + assert(number_of_arguments <= 4, "just check"); + + andi(AT, SP, 0xf); + beq(AT, R0, L); + addi_d(SP, SP, -8); + call(entry_point, relocInfo::runtime_call_type); + addi_d(SP, SP, 8); + b(E); + + bind(L); + call(entry_point, relocInfo::runtime_call_type); + bind(E); +} + + +void MacroAssembler::jmp(address entry) { + jlong offs = entry - pc(); + if (reachable_from_branch_short(offs)) { // Short jump + b(offset26(entry)); + } else { // Far jump + patchable_jump_far(R0, offs); + } +} + +void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::none: + jmp(entry); + break; + default: + { + InstructionMark im(this); + relocate(rtype); + patchable_jump(entry); + } + break; + } +} + +void MacroAssembler::jmp_far(Label& L) { + if (L.is_bound()) { + assert(target(L) != NULL, "jmp most probably wrong"); + patchable_jump(target(L), true /* force patchable */); + } else { + L.add_patch_at(code(), locator()); + patchable_jump_far(R0, 0); + } +} + +void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { + int oop_index; + if (obj) { + oop_index = oop_recorder()->find_index(obj); + } else { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } + relocate(metadata_Relocation::spec(oop_index)); + patchable_li52(AT, (long)obj); + st_d(AT, dst); +} + +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + int oop_index; + if (obj) { + oop_index = oop_recorder()->find_index(obj); + } else { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } + relocate(metadata_Relocation::spec(oop_index)); + patchable_li52(dst, (long)obj); +} + +void MacroAssembler::call(address entry) { + jlong offs = entry - pc(); + if (reachable_from_branch_short(offs)) { // Short call (pc-rel) + bl(offset26(entry)); + } else if (is_simm(offs, 38)) { // Far call (pc-rel) + patchable_jump_far(RA, offs); + } else { // Long call (absolute) + call_long(entry); + } +} + +void MacroAssembler::call(address entry, relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::none: + call(entry); + break; + case relocInfo::runtime_call_type: + if (!is_simm(entry - pc(), 38)) { + call_long(entry); + break; + } + // fallthrough + default: + { + InstructionMark im(this); + relocate(rtype); + patchable_call(entry); + } + break; + } +} + +void MacroAssembler::call(address entry, RelocationHolder& rh) { + switch (rh.type()) { + case relocInfo::none: + call(entry); + break; + case relocInfo::runtime_call_type: + if (!is_simm(entry - pc(), 38)) { + call_long(entry); + break; + } + // fallthrough + default: + { + InstructionMark im(this); + relocate(rh); + patchable_call(entry); + } + break; + } +} + +void MacroAssembler::call_long(address entry) { + jlong value = (jlong)entry; + lu12i_w(T4, split_low20(value >> 12)); + lu32i_d(T4, split_low20(value >> 32)); + jirl(RA, T4, split_low12(value)); +} + +address MacroAssembler::ic_call(address entry) { + RelocationHolder rh = virtual_call_Relocation::spec(pc()); + patchable_li52(IC_Klass, (long)Universe::non_oop_word()); + assert(entry != NULL, "call most probably wrong"); + InstructionMark im(this); + return trampoline_call(AddressLiteral(entry, rh)); +} + +void MacroAssembler::c2bool(Register r) { + sltu(r, R0, r); +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { + if ( ShowMessageBoxOnError ) { + JavaThreadState saved_state = JavaThread::current()->thread_state(); + JavaThread::current()->set_thread_state(_thread_in_vm); + { + // In order to get locks work, we need to fake a in_VM state + ttyLocker ttyl; + ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + BytecodeCounter::print(); + } + + } + ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); + } + else + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); +} + + +void MacroAssembler::stop(const char* msg) { + li(A0, (long)msg); + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + brk(17); +} + +void MacroAssembler::warn(const char* msg) { + pushad(); + li(A0, (long)msg); + push(S2); + li(AT, -(StackAlignmentInBytes)); + move(S2, SP); // use S2 as a sender SP holder + andr(SP, SP, AT); // align stack as required by ABI + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + move(SP, S2); // use S2 as a sender SP holder + pop(S2); + popad(); +} + +void MacroAssembler::increment(Register reg, int imm) { + if (!imm) return; + if (is_simm(imm, 12)) { + addi_d(reg, reg, imm); + } else { + li(AT, imm); + add_d(reg, reg, AT); + } +} + +void MacroAssembler::decrement(Register reg, int imm) { + increment(reg, -imm); +} + +void MacroAssembler::increment(Address addr, int imm) { + if (!imm) return; + assert(is_simm(imm, 12), "must be"); + ld_ptr(AT, addr); + addi_d(AT, AT, imm); + st_ptr(AT, addr); +} + +void MacroAssembler::decrement(Address addr, int imm) { + increment(addr, -imm); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + bool check_exceptions) { + call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + if (arg_2!=A2) move(A2, arg_2); + assert(arg_2 != A1, "smashed argument"); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T2; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + + assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); + + // set last Java frame before call + Label before_call; + bind(before_call); + set_last_Java_frame(java_thread, last_java_sp, FP, before_call); + + // do the call + move(A0, java_thread); + call(entry_point, relocInfo::runtime_call_type); + + // restore the thread (cannot use the pushed argument since arguments + // may be overwritten by C code generated by an optimizing compiler); + // however can use the register value directly if it is callee saved. +#ifndef OPT_THREAD + get_thread(java_thread); +#else +#ifdef ASSERT + { + Label L; + get_thread(AT); + beq(java_thread, AT, L); + stop("MacroAssembler::call_VM_base: TREG not callee saved?"); + bind(L); + } +#endif +#endif + + // discard thread and arguments + ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + // reset last Java frame + reset_last_Java_frame(java_thread, false); + + check_and_handle_popframe(java_thread); + check_and_handle_earlyret(java_thread); + if (check_exceptions) { + // check for pending exceptions (java_thread is set upon return) + Label L; + ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); + beq(AT, R0, L); + li(AT, target(before_call)); + push(AT); + jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + bind(L); + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); + st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); + verify_oop(oop_result); + } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + move(V0, SP); + //we also reserve space for java_thread here + li(AT, -(StackAlignmentInBytes)); + andr(SP, SP, AT); + call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { + call_VM_leaf_base(entry_point, number_of_arguments); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { + if (arg_0 != A0) move(A0, arg_0); + call_VM_leaf(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + if (arg_0 != A0) move(A0, arg_0); + if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); + call_VM_leaf(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + if (arg_0 != A0) move(A0, arg_0); + if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); + call_VM_leaf(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 0); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1) { + if (arg_1 != A0) move(A0, arg_1); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1, + Register arg_2) { + if (arg_1 != A0) move(A0, arg_1); + if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1, + Register arg_2, + Register arg_3) { + if (arg_1 != A0) move(A0, arg_1); + if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); + if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { +} + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check(offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any (non-CC) registers + // NOTE: cmpl is plenty here to provoke a segv + ld_w(AT, reg, 0); + } else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +void MacroAssembler::enter() { + push2(RA, FP); + move(FP, SP); +} + +void MacroAssembler::leave() { + move(SP, FP); + pop2(RA, FP); +} + +void MacroAssembler::build_frame(int framesize) { + assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); + assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment"); + if (Assembler::is_simm(-framesize, 12)) { + addi_d(SP, SP, -framesize); + st_ptr(FP, Address(SP, framesize - 2 * wordSize)); + st_ptr(RA, Address(SP, framesize - 1 * wordSize)); + if (PreserveFramePointer) + addi_d(FP, SP, framesize - 2 * wordSize); + } else { + addi_d(SP, SP, -2 * wordSize); + st_ptr(FP, Address(SP, 0 * wordSize)); + st_ptr(RA, Address(SP, 1 * wordSize)); + if (PreserveFramePointer) + move(FP, SP); + li(SCR1, framesize - 2 * wordSize); + sub_d(SP, SP, SCR1); + } +} + +void MacroAssembler::remove_frame(int framesize) { + assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA"); + assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment"); + if (Assembler::is_simm(framesize, 12)) { + ld_ptr(FP, Address(SP, framesize - 2 * wordSize)); + ld_ptr(RA, Address(SP, framesize - 1 * wordSize)); + addi_d(SP, SP, framesize); + } else { + li(SCR1, framesize - 2 * wordSize); + add_d(SP, SP, SCR1); + ld_ptr(FP, Address(SP, 0 * wordSize)); + ld_ptr(RA, Address(SP, 1 * wordSize)); + addi_d(SP, SP, 2 * wordSize); + } +} + +void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T1; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + // we must set sp to zero to clear frame + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + // must clear fp, so that compiled frames are not confused; it is possible + // that we need it only for debugging + if(clear_fp) { + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); +} + +void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + Register thread = TREG; +#ifndef OPT_THREAD + get_thread(thread); +#endif + // we must set sp to zero to clear frame + st_d(R0, thread, in_bytes(JavaThread::last_Java_sp_offset())); + // must clear fp, so that compiled frames are not confused; it is + // possible that we need it only for debugging + if (clear_fp) { + st_d(R0, thread, in_bytes(JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + st_d(R0, thread, in_bytes(JavaThread::last_Java_pc_offset())); +} + +// Write serialization page so VM thread can do a pseudo remote membar. +// We use the current thread pointer to calculate a thread specific +// offset to write to within the page. This minimizes bus traffic +// due to cache line collision. +void MacroAssembler::serialize_memory(Register thread, Register tmp) { + assert_different_registers(AT, tmp); + juint sps = os::get_serialize_page_shift_count(); + juint lsb = sps + 2; + juint msb = sps + log2_uint(os::vm_page_size()) - 1; + bstrpick_w(AT, thread, msb, lsb); + li(tmp, os::get_memory_serialize_page()); + alsl_d(tmp, AT, tmp, Address::times_2 - 1); + st_w(R0, tmp, 0); +} + +// Calls to C land +// +// When entering C land, the fp, & sp of the last Java frame have to be recorded +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp +// has to be reset to 0. This is required to allow proper stack traversal. +void MacroAssembler::set_last_Java_frame(Register java_thread, + Register last_java_sp, + Register last_java_fp, + Label& last_java_pc) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T2; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + + // last_java_fp is optional + if (last_java_fp->is_valid()) { + st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); + } + + // last_java_pc + lipc(AT, last_java_pc); + st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset())); + + st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label& last_java_pc) { + set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc); +} + +////////////////////////////////////////////////////////////////////////////////// +#if INCLUDE_ALL_GCS + +void MacroAssembler::g1_write_barrier_pre(Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + + assert(thread == TREG, "must be"); + + Label done; + Label runtime; + + assert(pre_val != noreg, "check this code"); + + if (obj != noreg) { + assert_different_registers(obj, pre_val, tmp); + assert(pre_val != V0, "check this code"); + } + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + // Is marking active? + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + ld_w(AT, in_progress); + } else { + assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); + ld_b(AT, in_progress); + } + beqz(AT, done); + + // Do we need to load the previous value? + if (obj != noreg) { + load_heap_oop(pre_val, Address(obj, 0)); + } + + // Is the previous value null? + beqz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + ld_d(tmp, index); + beqz(tmp, runtime); + + addi_d(tmp, tmp, -1 * wordSize); + st_d(tmp, index); + ld_d(AT, buffer); + + // Record the previous value + stx_d(pre_val, tmp, AT); + b(done); + + bind(runtime); + // save the live input values + if (tosca_live) push(V0); + + if (obj != noreg && obj != V0) push(obj); + + if (pre_val != V0) push(pre_val); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then fp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + + if (expand_call) { + assert(pre_val != A1, "smashed arg"); + if (thread != A1) move(A1, thread); + if (pre_val != A0) move(A0, pre_val); + MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); + } else { + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); + } + + // save the live input values + if (pre_val != V0) + pop(pre_val); + + if (obj != noreg && obj != V0) + pop(obj); + + if(tosca_live) pop(V0); + + bind(done); +} + +void MacroAssembler::g1_write_barrier_post(Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { + assert(tmp != AT, "must be"); + assert(tmp2 != AT, "must be"); + assert(thread == TREG, "must be"); + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + Label done; + Label runtime; + + // Does store cross heap regions? + xorr(AT, store_addr, new_val); + srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes); + beqz(AT, done); + + + // crosses regions, storing NULL? + beq(new_val, R0, done); + + // storing region crossing non-NULL, is card already dirty? + const Register card_addr = tmp; + const Register cardtable = tmp2; + + move(card_addr, store_addr); + srli_d(card_addr, card_addr, CardTableModRefBS::card_shift); + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT + // a valid address and therefore is not properly handled by the relocation code. + li(cardtable, (intptr_t)ct->byte_map_base); + add_d(card_addr, card_addr, cardtable); + + ld_b(AT, card_addr, 0); + addi_d(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val()); + beqz(AT, done); + + membar(StoreLoad); + ld_b(AT, card_addr, 0); + addi_d(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val()); + beqz(AT, done); + + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + li(AT, (int)CardTableModRefBS::dirty_card_val()); + st_b(AT, card_addr, 0); + + ld_w(AT, queue_index); + beqz(AT, runtime); + addi_d(AT, AT, -1 * wordSize); + st_w(AT, queue_index); + ld_d(tmp2, buffer); + ld_d(AT, queue_index); + stx_d(card_addr, tmp2, AT); + b(done); + + bind(runtime); + // save the live input values + push(store_addr); + push(new_val); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG); + pop(new_val); + pop(store_addr); + + bind(done); +} + +#endif // INCLUDE_ALL_GCS +////////////////////////////////////////////////////////////////////////////////// + + +void MacroAssembler::store_check(Register obj) { + // Does a store check for the oop in register obj. The content of + // register obj is destroyed afterwards. + store_check_part_1(obj); + store_check_part_2(obj); +} + +void MacroAssembler::store_check(Register obj, Address dst) { + store_check(obj); +} + + +// split the store check operation so that other instructions can be scheduled inbetween +void MacroAssembler::store_check_part_1(Register obj) { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + srli_d(obj, obj, CardTableModRefBS::card_shift); +} + +void MacroAssembler::store_check_part_2(Register obj) { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + li(AT, (long)ct->byte_map_base); + add_d(AT, AT, obj); + if (UseConcMarkSweepGC) membar(StoreStore); + st_b(R0, AT, 0); +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, + Register t1, Register t2, Label& slow_case) { + assert_different_registers(obj, t2); + assert_different_registers(obj, var_size_in_bytes); + + Register end = t2; + // verify_tlab(); + + ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset())); + if (var_size_in_bytes == noreg) { + lea(end, Address(obj, con_size_in_bytes)); + } else { + lea(end, Address(obj, var_size_in_bytes, Address::times_1, 0)); + } + + ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset())); + blt_far(SCR1, end, slow_case, false); + + // update the tlab top pointer + st_ptr(end, Address(TREG, JavaThread::tlab_top_offset())); + + // recover var_size_in_bytes if necessary + if (var_size_in_bytes == end) { + sub_d(var_size_in_bytes, var_size_in_bytes, obj); + } + // verify_tlab(); +} + +// Defines obj, preserves var_size_in_bytes +void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, + Register t1, Label& slow_case) { + assert_different_registers(obj, var_size_in_bytes, t1, AT); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + b_far(slow_case); + } else { + Register end = t1; + Register heap_end = SCR2; + Label retry; + bind(retry); + + li(SCR1, (address)Universe::heap()->end_addr()); + ld_d(heap_end, SCR1, 0); + + // Get the current top of the heap + li(SCR1, (address) Universe::heap()->top_addr()); + ll_d(obj, SCR1, 0); + + // Adjust it my the size of our new object + if (var_size_in_bytes == noreg) + addi_d(end, obj, con_size_in_bytes); + else + add_d(end, obj, var_size_in_bytes); + + // if end < obj then we wrapped around high memory + blt_far(end, obj, slow_case, false); + blt_far(heap_end, end, slow_case, false); + + // If heap top hasn't been changed by some other thread, update it. + sc_d(end, SCR1, 0); + beqz(end, retry); + + incr_allocated_bytes(TREG, var_size_in_bytes, con_size_in_bytes, t1); + } +} + +void MacroAssembler::incr_allocated_bytes(Register thread, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1) { + if (!thread->is_valid()) { +#ifndef OPT_THREAD + assert(t1->is_valid(), "need temp reg"); + thread = t1; + get_thread(thread); +#else + thread = TREG; +#endif + } + + ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); + if (var_size_in_bytes->is_valid()) { + add_d(AT, AT, var_size_in_bytes); + } else { + addi_d(AT, AT, con_size_in_bytes); + } + st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); +} + +static const double pi_4 = 0.7853981633974483; + +// must get argument(a double) in FA0/FA1 +//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) { +//We need to preseve the register which maybe modified during the Call +void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { + // save all modified register here + // FIXME, in the disassembly of tirgfunc, only used V0, V1, T4, SP, RA, so we ony save V0, V1, T4 + guarantee(0, "LA not implemented yet"); +#if 0 + pushad(); + // we should preserve the stack space before we call + addi_d(SP, SP, -wordSize * 2); + switch (trig){ + case 's' : + call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); + break; + case 'c': + call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type ); + break; + case 't': + call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type ); + break; + default:assert (false, "bad intrinsic"); + break; + + } + + addi_d(SP, SP, wordSize * 2); + popad(); +#endif +} + +void MacroAssembler::li(Register rd, jlong value) { + jlong hi12 = bitfield(value, 52, 12); + jlong lo52 = bitfield(value, 0, 52); + + if (hi12 != 0 && lo52 == 0) { + lu52i_d(rd, R0, hi12); + } else { + jlong hi20 = bitfield(value, 32, 20); + jlong lo20 = bitfield(value, 12, 20); + jlong lo12 = bitfield(value, 0, 12); + + if (lo20 == 0) { + ori(rd, R0, lo12); + } else if (bitfield(simm12(lo12), 12, 20) == lo20) { + addi_w(rd, R0, simm12(lo12)); + } else { + lu12i_w(rd, lo20); + if (lo12 != 0) + ori(rd, rd, lo12); + } + if (hi20 != bitfield(simm20(lo20), 20, 20)) + lu32i_d(rd, hi20); + if (hi12 != bitfield(simm20(hi20), 20, 12)) + lu52i_d(rd, rd, hi12); + } +} + +void MacroAssembler::patchable_li52(Register rd, jlong value) { + int count = 0; + + if (value <= max_jint && value >= min_jint) { + if (is_simm(value, 12)) { + addi_d(rd, R0, value); + count++; + } else { + lu12i_w(rd, split_low20(value >> 12)); + count++; + if (split_low12(value)) { + ori(rd, rd, split_low12(value)); + count++; + } + } + } else if (is_simm(value, 52)) { + lu12i_w(rd, split_low20(value >> 12)); + count++; + if (split_low12(value)) { + ori(rd, rd, split_low12(value)); + count++; + } + lu32i_d(rd, split_low20(value >> 32)); + count++; + } else { + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 3) { + nop(); + count++; + } +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert(UseCompressedClassPointers, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + long narrowKlass = (long)Klass::encode_klass(k); + + relocate(rspec, Assembler::narrow_oop_operand); + patchable_li52(dst, narrowKlass); +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { + assert(UseCompressedOops, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + relocate(rspec, Assembler::narrow_oop_operand); + patchable_li52(dst, oop_index); +} + +void MacroAssembler::lipc(Register rd, Label& L) { + if (L.is_bound()) { + jint offs = (target(L) - pc()) >> 2; + guarantee(is_simm(offs, 20), "Not signed 20-bit offset"); + pcaddi(rd, offs); + } else { + InstructionMark im(this); + L.add_patch_at(code(), locator()); + pcaddi(rd, 0); + } +} + +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) return; + const char * b = NULL; + stringStream ss; + ss.print("verify_oop: %s: %s", reg->name(), s); + b = code_string(ss.as_string()); + pushad(); + move(A1, reg); + patchable_li52(A0, (long)b); + li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); + ld_d(T4, AT, 0); + jalr(T4); + popad(); +} + +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +#if 0 + if (!VerifyOops) { + nop(); + return; + } + // Pass register number to verify_oop_subroutine + const char * b = NULL; + stringStream ss; + ss.print("verify_oop_addr: %s", s); + b = code_string(ss.as_string()); + + st_ptr(T0, SP, - wordSize); + st_ptr(T1, SP, - 2*wordSize); + st_ptr(RA, SP, - 3*wordSize); + st_ptr(A0, SP, - 4*wordSize); + st_ptr(A1, SP, - 5*wordSize); + st_ptr(AT, SP, - 6*wordSize); + st_ptr(T9, SP, - 7*wordSize); + ld_ptr(A1, addr); // addr may use SP, so load from it before change SP + addiu(SP, SP, - 7 * wordSize); + + patchable_li52(A0, (long)b); + // call indirectly to solve generation ordering problem + li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); + ld_ptr(T9, AT, 0); + jalr(T9); + delayed()->nop(); + ld_ptr(T0, SP, 6* wordSize); + ld_ptr(T1, SP, 5* wordSize); + ld_ptr(RA, SP, 4* wordSize); + ld_ptr(A0, SP, 3* wordSize); + ld_ptr(A1, SP, 2* wordSize); + ld_ptr(AT, SP, 1* wordSize); + ld_ptr(T9, SP, 0* wordSize); + addiu(SP, SP, 7 * wordSize); +#endif +} + +// used registers : T0, T1 +void MacroAssembler::verify_oop_subroutine() { + // RA: ra + // A0: char* error message + // A1: oop object to verify + Label exit, error; + // increment counter + li(T0, (long)StubRoutines::verify_oop_count_addr()); + ld_w(AT, T0, 0); + addi_d(AT, AT, 1); + st_w(AT, T0, 0); + + // make sure object is 'reasonable' + beq(A1, R0, exit); // if obj is NULL it is ok + + // Check if the oop is in the right area of memory + // const int oop_mask = Universe::verify_oop_mask(); + // const int oop_bits = Universe::verify_oop_bits(); + const uintptr_t oop_mask = Universe::verify_oop_mask(); + const uintptr_t oop_bits = Universe::verify_oop_bits(); + li(AT, oop_mask); + andr(T0, A1, AT); + li(AT, oop_bits); + bne(T0, AT, error); + + // make sure klass is 'reasonable' + // add for compressedoops + reinit_heapbase(); + // add for compressedoops + load_klass(T0, A1); + beq(T0, R0, error); // if klass is NULL it is broken + // return if everything seems ok + bind(exit); + + jr(RA); + + // handle errors + bind(error); + pushad(); + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + popad(); + jr(RA); +} + +void MacroAssembler::verify_tlab(Register t1, Register t2) { +#ifdef ASSERT + assert_different_registers(t1, t2, AT); + if (UseTLAB && VerifyOops) { + Label next, ok; + + get_thread(t1); + + ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); + ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); + bgeu(t2, AT, next); + + stop("assert(top >= start)"); + + bind(next); + ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); + bgeu(AT, t2, ok); + + stop("assert(top <= end)"); + + bind(ok); + + } +#endif +} + +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + //TODO: LA + guarantee(0, "LA not implemented yet"); + return RegisterOrConstant(tmp); +} + +void MacroAssembler::hswap(Register reg) { + // TODO LA opt + //short + srli_w(AT, reg, 8); + slli_w(reg, reg, 24); + srai_w(reg, reg, 16); + orr(reg, reg, AT); +} + +void MacroAssembler::huswap(Register reg) { + // TODO LA opt + srli_d(AT, reg, 8); + slli_d(reg, reg, 24); + srli_d(reg, reg, 16); + orr(reg, reg, AT); + bstrpick_d(reg, reg, 15, 0); +} + +// something funny to do this will only one more register AT +// 32 bits +void MacroAssembler::swap(Register reg) { + //TODO: LA opt + srli_w(AT, reg, 8); + slli_w(reg, reg, 24); + orr(reg, reg, AT); + //reg : 4 1 2 3 + srli_w(AT, AT, 16); + xorr(AT, AT, reg); + andi(AT, AT, 0xff); + //AT : 0 0 0 1^3); + xorr(reg, reg, AT); + //reg : 4 1 2 1 + slli_w(AT, AT, 16); + xorr(reg, reg, AT); + //reg : 4 3 2 1 +} + +void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, + Register resflag, bool retold, bool barrier) { + assert(oldval != resflag, "oldval != resflag"); + assert(newval != resflag, "newval != resflag"); + Label again, succ, fail; + + bind(again); + ll_d(resflag, addr); + bne(resflag, oldval, fail); + move(resflag, newval); + sc_d(resflag, addr); + beqz(resflag, again); + b(succ); + + bind(fail); + if (barrier) + dbar(0x700); + if (retold && oldval != R0) + move(oldval, resflag); + move(resflag, R0); + bind(succ); +} + +void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, + Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { + assert(oldval != tmp, "oldval != tmp"); + assert(newval != tmp, "newval != tmp"); + Label again, neq; + + bind(again); + ll_d(tmp, addr); + bne(tmp, oldval, neq); + move(tmp, newval); + sc_d(tmp, addr); + beqz(tmp, again); + b(succ); + + bind(neq); + if (barrier) + dbar(0x700); + if (retold && oldval != R0) + move(oldval, tmp); + if (fail) + b(*fail); +} + +void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, + Register resflag, bool sign, bool retold, bool barrier) { + assert(oldval != resflag, "oldval != resflag"); + assert(newval != resflag, "newval != resflag"); + Label again, succ, fail; + + bind(again); + ll_w(resflag, addr); + if (!sign) + lu32i_d(resflag, 0); + bne(resflag, oldval, fail); + move(resflag, newval); + sc_w(resflag, addr); + beqz(resflag, again); + b(succ); + + bind(fail); + if (barrier) + dbar(0x700); + if (retold && oldval != R0) + move(oldval, resflag); + move(resflag, R0); + bind(succ); +} + +void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, + bool sign, bool retold, bool barrier, Label& succ, Label* fail) { + assert(oldval != tmp, "oldval != tmp"); + assert(newval != tmp, "newval != tmp"); + Label again, neq; + + bind(again); + ll_w(tmp, addr); + if (!sign) + lu32i_d(tmp, 0); + bne(tmp, oldval, neq); + move(tmp, newval); + sc_w(tmp, addr); + beqz(tmp, again); + b(succ); + + bind(neq); + if (barrier) + dbar(0x700); + if (retold && oldval != R0) + move(oldval, tmp); + if (fail) + b(*fail); +} + +// be sure the three register is different +void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +// be sure the three register is different +void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +// Fast_Lock and Fast_Unlock used by C2 + +// Because the transitions from emitted code to the runtime +// monitorenter/exit helper stubs are so slow it's critical that +// we inline both the stack-locking fast-path and the inflated fast path. +// +// See also: cmpFastLock and cmpFastUnlock. +// +// What follows is a specialized inline transliteration of the code +// in slow_enter() and slow_exit(). If we're concerned about I$ bloat +// another option would be to emit TrySlowEnter and TrySlowExit methods +// at startup-time. These methods would accept arguments as +// (Obj, Self, box, Scratch) and return success-failure +// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply +// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. +// In practice, however, the # of lock sites is bounded and is usually small. +// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer +// if the processor uses simple bimodal branch predictors keyed by EIP +// Since the helper routines would be called from multiple synchronization +// sites. +// +// An even better approach would be write "MonitorEnter()" and "MonitorExit()" +// in java - using j.u.c and unsafe - and just bind the lock and unlock sites +// to those specialized methods. That'd give us a mostly platform-independent +// implementation that the JITs could optimize and inline at their pleasure. +// Done correctly, the only time we'd need to cross to native could would be +// to park() or unpark() threads. We'd also need a few more unsafe operators +// to (a) prevent compiler-JIT reordering of non-volatile accesses, and +// (b) explicit barriers or fence operations. +// +// TODO: +// +// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). +// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. +// Given TLAB allocation, Self is usually manifested in a register, so passing it into +// the lock operators would typically be faster than reifying Self. +// +// * Ideally I'd define the primitives as: +// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. +// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED +// Unfortunately ADLC bugs prevent us from expressing the ideal form. +// Instead, we're stuck with a rather awkward and brittle register assignments below. +// Furthermore the register assignments are overconstrained, possibly resulting in +// sub-optimal code near the synchronization site. +// +// * Eliminate the sp-proximity tests and just use "== Self" tests instead. +// Alternately, use a better sp-proximity test. +// +// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. +// Either one is sufficient to uniquely identify a thread. +// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. +// +// * Intrinsify notify() and notifyAll() for the common cases where the +// object is locked by the calling thread but the waitlist is empty. +// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). +// +// * use jccb and jmpb instead of jcc and jmp to improve code density. +// But beware of excessive branch density on AMD Opterons. +// +// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success +// or failure of the fast-path. If the fast-path fails then we pass +// control to the slow-path, typically in C. In Fast_Lock and +// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 +// will emit a conditional branch immediately after the node. +// So we have branches to branches and lots of ICC.ZF games. +// Instead, it might be better to have C2 pass a "FailureLabel" +// into Fast_Lock and Fast_Unlock. In the case of success, control +// will drop through the node. ICC.ZF is undefined at exit. +// In the case of failure, the node will branch directly to the +// FailureLabel + +// obj: object to lock +// box: on-stack box address (displaced header location) +// tmp: tmp -- KILLED +// scr: tmp -- KILLED +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, + Register tmpReg, Register scrReg) { + Label IsInflated, DONE, DONE_SET; + + // Ensure the register assignents are disjoint + guarantee(objReg != boxReg, ""); + guarantee(objReg != tmpReg, ""); + guarantee(objReg != scrReg, ""); + guarantee(boxReg != tmpReg, ""); + guarantee(boxReg != scrReg, ""); + + block_comment("FastLock"); + + if (PrintBiasedLockingStatistics) { + atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); + } + + if (EmitSync & 1) { + move(AT, R0); + return; + } else + if (EmitSync & 2) { + Label DONE_LABEL ; + if (UseBiasedLocking) { + // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); + } + + ld_d(tmpReg, Address(objReg, 0)) ; // fetch markword + ori(tmpReg, tmpReg, 0x1); + st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS + + cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg + + // Recursive locking + sub_d(tmpReg, tmpReg, SP); + li(AT, (7 - os::vm_page_size() )); + andr(tmpReg, tmpReg, AT); + st_d(tmpReg, Address(boxReg, 0)); + bind(DONE_LABEL) ; + } else { + // Possible cases that we'll encounter in fast_lock + // ------------------------------------------------ + // * Inflated + // -- unlocked + // -- Locked + // = by self + // = by other + // * biased + // -- by Self + // -- by other + // * neutral + // * stack-locked + // -- by self + // = sp-proximity test hits + // = sp-proximity test generates false-negative + // -- by other + // + + // TODO: optimize away redundant LDs of obj->mark and improve the markword triage + // order to reduce the number of conditional branches in the most common cases. + // Beware -- there's a subtle invariant that fetch of the markword + // at [FETCH], below, will never observe a biased encoding (*101b). + // If this invariant is not held we risk exclusion (safety) failure. + if (UseBiasedLocking && !UseOptoBiasInlining) { + Label succ, fail; + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); + b(fail); + bind(succ); + li(resReg, 1); + b(DONE); + bind(fail); + } + + ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. + andi(AT, tmpReg, markOopDesc::monitor_value); + bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias + + // Attempt stack-locking ... + ori(tmpReg, tmpReg, markOopDesc::unlocked_value); + st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS + + if (PrintBiasedLockingStatistics) { + Label SUCC, FAIL; + cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg + bind(SUCC); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); + li(resReg, 1); + b(DONE); + bind(FAIL); + } else { + // If cmpxchg is succ, then scrReg = 1 + cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg + } + + // Recursive locking + // The object is stack-locked: markword contains stack pointer to BasicLock. + // Locked by current thread if difference with current SP is less than one page. + sub_d(tmpReg, tmpReg, SP); + li(AT, 7 - os::vm_page_size()); + andr(tmpReg, tmpReg, AT); + st_d(tmpReg, Address(boxReg, 0)); + + if (PrintBiasedLockingStatistics) { + Label L; + // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ + bnez(tmpReg, L); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); + bind(L); + } + + sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 + b(DONE); + + bind(IsInflated); + // The object's monitor m is unlocked iff m->owner == NULL, + // otherwise m->owner may contain a thread or a stack address. + + // TODO: someday avoid the ST-before-CAS penalty by + // relocating (deferring) the following ST. + // We should also think about trying a CAS without having + // fetched _owner. If the CAS is successful we may + // avoid an RTO->RTS upgrade on the $line. + // Without cast to int32_t a movptr will destroy r10 which is typically obj + li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); + st_d(AT, Address(boxReg, 0)); + + ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); + // if (m->owner != 0) => AT = 0, goto slow path. + move(scrReg, R0); + bnez(AT, DONE_SET); + +#ifndef OPT_THREAD + get_thread(TREG) ; +#endif + // It's inflated and appears unlocked + addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2); + cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false); + // Intentional fall-through into DONE ... + + bind(DONE_SET); + move(resReg, scrReg); + + // DONE is a hot target - we'd really like to place it at the + // start of cache line by padding with NOPs. + // See the AMD and Intel software optimization manuals for the + // most efficient "long" NOP encodings. + // Unfortunately none of our alignment mechanisms suffice. + bind(DONE); + // At DONE the resReg is set as follows ... + // Fast_Unlock uses the same protocol. + // resReg == 1 -> Success + // resREg == 0 -> Failure - force control through the slow-path + + // Avoid branch-to-branch on AMD processors + // This appears to be superstition. + if (EmitSync & 32) nop() ; + + } +} + +// obj: object to unlock +// box: box address (displaced header location), killed. +// tmp: killed tmp; cannot be obj nor box. +// +// Some commentary on balanced locking: +// +// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. +// Methods that don't have provably balanced locking are forced to run in the +// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. +// The interpreter provides two properties: +// I1: At return-time the interpreter automatically and quietly unlocks any +// objects acquired the current activation (frame). Recall that the +// interpreter maintains an on-stack list of locks currently held by +// a frame. +// I2: If a method attempts to unlock an object that is not held by the +// the frame the interpreter throws IMSX. +// +// Lets say A(), which has provably balanced locking, acquires O and then calls B(). +// B() doesn't have provably balanced locking so it runs in the interpreter. +// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O +// is still locked by A(). +// +// The only other source of unbalanced locking would be JNI. The "Java Native Interface: +// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter +// should not be unlocked by "normal" java-level locking and vice-versa. The specification +// doesn't specify what will occur if a program engages in such mixed-mode locking, however. + +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, + Register tmpReg, Register scrReg) { + Label DONE, DONE_SET, Stacked, Inflated; + + guarantee(objReg != boxReg, ""); + guarantee(objReg != tmpReg, ""); + guarantee(objReg != scrReg, ""); + guarantee(boxReg != tmpReg, ""); + guarantee(boxReg != scrReg, ""); + + block_comment("FastUnlock"); + + if (EmitSync & 4) { + // Disable - inhibit all inlining. Force control through the slow-path + move(AT, R0); + return; + } else + if (EmitSync & 8) { + Label DONE_LABEL ; + if (UseBiasedLocking) { + biased_locking_exit(objReg, tmpReg, DONE_LABEL); + } + // classic stack-locking code ... + ld_d(tmpReg, Address(boxReg, 0)) ; + assert_different_registers(AT, tmpReg); + li(AT, 0x1); + beq(tmpReg, R0, DONE_LABEL) ; + + cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); + bind(DONE_LABEL); + } else { + Label CheckSucc; + + // Critically, the biased locking test must have precedence over + // and appear before the (box->dhw == 0) recursive stack-lock test. + if (UseBiasedLocking && !UseOptoBiasInlining) { + Label succ, fail; + biased_locking_exit(objReg, tmpReg, succ); + b(fail); + bind(succ); + li(resReg, 1); + b(DONE); + bind(fail); + } + + ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header + sltui(AT, tmpReg, 1); + beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock + + ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword + andi(AT, tmpReg, markOopDesc::monitor_value); + beqz(AT, Stacked); // Inflated? + + bind(Inflated); + // It's inflated. + // Despite our balanced locking property we still check that m->_owner == Self + // as java routines or native JNI code called by this thread might + // have released the lock. + // Refer to the comments in synchronizer.cpp for how we might encode extra + // state in _succ so we can avoid fetching EntryList|cxq. + // + // I'd like to add more cases in fast_lock() and fast_unlock() -- + // such as recursive enter and exit -- but we have to be wary of + // I$ bloat, T$ effects and BP$ effects. + // + // If there's no contention try a 1-0 exit. That is, exit without + // a costly MEMBAR or CAS. See synchronizer.cpp for details on how + // we detect and recover from the race that the 1-0 exit admits. + // + // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier + // before it STs null into _owner, releasing the lock. Updates + // to data protected by the critical section must be visible before + // we drop the lock (and thus before any other thread could acquire + // the lock and observe the fields protected by the lock). +#ifndef OPT_THREAD + get_thread(TREG); +#endif + + // It's inflated + ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); + xorr(scrReg, scrReg, TREG); + + ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)); + orr(scrReg, scrReg, AT); + + move(AT, R0); + bnez(scrReg, DONE_SET); + + ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); + ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); + orr(scrReg, scrReg, AT); + + move(AT, R0); + bnez(scrReg, DONE_SET); + + membar(Assembler::Membar_mask_bits(LoadStore|StoreStore)); // release-store + st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); + li(resReg, 1); + b(DONE); + + bind(Stacked); + ld_d(tmpReg, Address(boxReg, 0)); + cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); + + bind(DONE_SET); + move(resReg, AT); + + if (EmitSync & 65536) { + bind (CheckSucc); + } + + bind(DONE); + + // Avoid branch to branch on AMD processors + if (EmitSync & 32768) { nop() ; } + } +} + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); +} + + +void MacroAssembler::verify_FPU(int stack_depth, const char* s) { + //Unimplemented(); +} + +Register caller_saved_registers[] = {T7, T5, T6, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP}; +Register caller_saved_registers_except_v0[] = {T7, T5, T6, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP}; + + //TODO: LA +//In LA, F0~23 are all caller-saved registers +FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; + +// We preserve all caller-saved register +void MacroAssembler::pushad(){ + int i; + // Fixed-point registers + int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); + addi_d(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + st_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize); + } + + // Floating-point registers + len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + addi_d(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } +}; + +void MacroAssembler::popad(){ + int i; + // Floating-point registers + int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + for (i = 0; i < len; i++) + { + fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } + addi_d(SP, SP, len * wordSize); + + // Fixed-point registers + len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); + for (i = 0; i < len; i++) + { + ld_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize); + } + addi_d(SP, SP, len * wordSize); +}; + +// We preserve all caller-saved register except V0 +void MacroAssembler::pushad_except_v0() { + int i; + // Fixed-point registers + int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); + addi_d(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + st_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); + } + + // Floating-point registers + len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + addi_d(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } +} + +void MacroAssembler::popad_except_v0() { + int i; + // Floating-point registers + int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + for (i = 0; i < len; i++) { + fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } + addi_d(SP, SP, len * wordSize); + + // Fixed-point registers + len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); + for (i = 0; i < len; i++) { + ld_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); + } + addi_d(SP, SP, len * wordSize); +} + +void MacroAssembler::push2(Register reg1, Register reg2) { + addi_d(SP, SP, -16); + st_d(reg1, SP, 8); + st_d(reg2, SP, 0); +} + +void MacroAssembler::pop2(Register reg1, Register reg2) { + ld_d(reg1, SP, 8); + ld_d(reg2, SP, 0); + addi_d(SP, SP, 16); +} + +// for UseCompressedOops Option +void MacroAssembler::load_klass(Register dst, Register src) { + if(UseCompressedClassPointers){ + ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst); + } else { + ld_d(dst, src, oopDesc::klass_offset_in_bytes()); + } +} + +void MacroAssembler::store_klass(Register dst, Register src) { + if(UseCompressedClassPointers){ + encode_klass_not_null(src); + st_w(src, dst, oopDesc::klass_offset_in_bytes()); + } else { + st_d(src, dst, oopDesc::klass_offset_in_bytes()); + } +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ld_d(dst, Address(dst, Klass::prototype_header_offset())); +} + +void MacroAssembler::store_klass_gap(Register dst, Register src) { + if (UseCompressedClassPointers) { + st_w(src, dst, oopDesc::klass_gap_offset_in_bytes()); + } +} + +void MacroAssembler::load_heap_oop(Register dst, Address src) { + if(UseCompressedOops){ + ld_wu(dst, src); + decode_heap_oop(dst); + } else { + ld_d(dst, src); + } +} + +void MacroAssembler::store_heap_oop(Address dst, Register src){ + if(UseCompressedOops){ + assert(!dst.uses(src), "not enough registers"); + encode_heap_oop(src); + st_w(src, dst); + } else { + st_d(src, dst); + } +} + +void MacroAssembler::store_heap_oop_null(Address dst){ + if(UseCompressedOops){ + st_w(R0, dst); + } else { + st_d(R0, dst); + } +} + +#ifdef ASSERT +void MacroAssembler::verify_heapbase(const char* msg) { + assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); +} +#endif + +// Algorithm must match oop.inline.hpp encode_heap_oop. +void MacroAssembler::encode_heap_oop(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); +#endif + verify_oop(r, "broken oop in encode_heap_oop"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } + return; + } + + sub_d(AT, r, S5_heapbase); + maskeqz(r, AT, r); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::encode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); +#endif + verify_oop(src, "broken oop in encode_heap_oop"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + srli_d(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) { + move(dst, src); + } + } + return; + } + + sub_d(AT, src, S5_heapbase); + maskeqz(dst, AT, src); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(dst, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register r) { + assert (UseCompressedOops, "should be compressed"); +#ifdef ASSERT + if (CheckCompressedOops) { + Label ok; + bne(r, R0, ok); + stop("null oop passed to encode_heap_oop_not_null"); + bind(ok); + } +#endif + verify_oop(r, "broken oop in encode_heap_oop_not_null"); + if (Universe::narrow_oop_base() != NULL) { + sub_d(r, r, S5_heapbase); + } + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { + assert (UseCompressedOops, "should be compressed"); +#ifdef ASSERT + if (CheckCompressedOops) { + Label ok; + bne(src, R0, ok); + stop("null oop passed to encode_heap_oop_not_null2"); + bind(ok); + } +#endif + verify_oop(src, "broken oop in encode_heap_oop_not_null2"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + srli_d(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) { + move(dst, src); + } + } + return; + } + sub_d(dst, src, S5_heapbase); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(dst, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::decode_heap_oop(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); +#endif + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shl(r, LogMinObjAlignmentInBytes); + } + return; + } + + move(AT, r); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes <= 4) { + alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); + } else { + shl(r, LogMinObjAlignmentInBytes); + add_d(r, r, S5_heapbase); + } + } else { + add_d(r, r, S5_heapbase); + } + maskeqz(r, r, AT); + verify_oop(r, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); +#endif + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + slli_d(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) { + move(dst, src); + } + } + return; + } + + Register cond; + if (dst == src) { + cond = AT; + move(cond, src); + } else { + cond = src; + } + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes <= 4) { + alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); + } else { + slli_d(dst, src, LogMinObjAlignmentInBytes); + add_d(dst, dst, S5_heapbase); + } + } else { + add_d(dst, src, S5_heapbase); + } + maskeqz(dst, dst, cond); + verify_oop(dst, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + // Note: it will change flags + assert(UseCompressedOops, "should only be used for compressed headers"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { + if (LogMinObjAlignmentInBytes <= 4) { + alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1); + } else { + shl(r, LogMinObjAlignmentInBytes); + add_d(r, r, S5_heapbase); + } + } else { + shl(r, LogMinObjAlignmentInBytes); + } + } else { + assert(Universe::narrow_oop_base() == NULL, "sanity"); + } +} + +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + assert(UseCompressedOops, "should only be used for compressed headers"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { + if (LogMinObjAlignmentInBytes <= 4) { + alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1); + } else { + slli_d(dst, src, LogMinObjAlignmentInBytes); + add_d(dst, dst, S5_heapbase); + } + } else { + slli_d(dst, src, LogMinObjAlignmentInBytes); + } + } else { + assert (Universe::narrow_oop_base() == NULL, "sanity"); + if (dst != src) { + move(dst, src); + } + } +} + +void MacroAssembler::encode_klass_not_null(Register r) { + if (Universe::narrow_klass_base() != NULL) { + assert(r != AT, "Encoding a klass in AT"); + li(AT, (int64_t)Universe::narrow_klass_base()); + sub_d(r, r, AT); + } + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shr(r, LogKlassAlignmentInBytes); + } +} + +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + if (dst == src) { + encode_klass_not_null(src); + } else { + if (Universe::narrow_klass_base() != NULL) { + li(dst, (int64_t)Universe::narrow_klass_base()); + sub_d(dst, src, dst); + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shr(dst, LogKlassAlignmentInBytes); + } + } else { + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + srli_d(dst, src, LogKlassAlignmentInBytes); + } else { + move(dst, src); + } + } + } +} + +// Function instr_size_for_decode_klass_not_null() counts the instructions +// generated by decode_klass_not_null(register r) and reinit_heapbase(), +// when (Universe::heap() != NULL). Hence, if the instructions they +// generate change, then this method needs to be updated. +int MacroAssembler::instr_size_for_decode_klass_not_null() { + assert (UseCompressedClassPointers, "only for compressed klass ptrs"); + if (Universe::narrow_klass_base() != NULL) { + // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). + return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); + } else { + // longest load decode klass function, mov64, leaq + return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); + } +} + +void MacroAssembler::decode_klass_not_null(Register r) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + assert(r != AT, "Decoding a klass in AT"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shl(r, LogKlassAlignmentInBytes); + } + if (Universe::narrow_klass_base() != NULL) { + li(AT, (int64_t)Universe::narrow_klass_base()); + add_d(r, r, AT); + } +} + +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + if (dst == src) { + decode_klass_not_null(dst); + } else { + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + li(dst, (int64_t)Universe::narrow_klass_base()); + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); + alsl_d(dst, src, dst, Address::times_8 - 1); + } else { + add_d(dst, src, dst); + } + } +} + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops || UseCompressedClassPointers) { + if (Universe::heap() != NULL) { + if (Universe::narrow_oop_base() == NULL) { + move(S5_heapbase, R0); + } else { + li(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); + } + } else { + li(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); + ld_d(S5_heapbase, S5_heapbase, 0); + } + } +} + +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success) { +//implement ind gen_subtype_check + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + +SkipIfEqual::SkipIfEqual( + MacroAssembler* masm, const bool* flag_addr, bool value) { + _masm = masm; + _masm->li(AT, (address)flag_addr); + _masm->ld_b(AT, AT, 0); + _masm->addi_d(AT, AT, -value); + _masm->beq(AT, R0, _label); +} + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + assert_different_registers(sub_klass, super_klass, temp_reg); + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + beq(sub_klass, super_klass, *L_success); + // Check the supertype display: + if (must_load_sco) { + ld_wu(temp_reg, super_klass, sco_offset); + super_check_offset = RegisterOrConstant(temp_reg); + } + add_d(AT, sub_klass, super_check_offset.register_or_noreg()); + ld_d(AT, AT, super_check_offset.constant_or_zero()); + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + if (super_check_offset.is_register()) { + beq(super_klass, AT, *L_success); + addi_d(AT, super_check_offset.as_register(), -sc_offset); + if (L_failure == &L_fallthrough) { + beq(AT, R0, *L_slow_path); + } else { + bne_far(AT, R0, *L_failure); + b(*L_slow_path); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + beq(super_klass, AT, *L_success); + } else { + bne(super_klass, AT, *L_slow_path); + b(*L_success); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + beq(super_klass, AT, *L_success); + } else { + bne_far(super_klass, AT, *L_failure); + b(*L_success); + } + } + + bind(L_fallthrough); +} + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + if (temp2_reg == noreg) + temp2_reg = TSR; + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connections with the input regs. + +#ifndef PRODUCT + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; + ExternalAddress pst_counter_addr((address) pst_counter); +#endif //PRODUCT + + // We will consult the secondary-super array. + ld_d(temp_reg, secondary_supers_addr); + // Load the array length. + ld_w(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); + // Skip to start of data. + addi_d(temp_reg, temp_reg, Array::base_offset_in_bytes()); + + Label Loop, subtype; + bind(Loop); + beq(temp2_reg, R0, *L_failure); + ld_d(AT, temp_reg, 0); + addi_d(temp_reg, temp_reg, 1 * wordSize); + beq(AT, super_klass, subtype); + addi_d(temp2_reg, temp2_reg, -1); + b(Loop); + + bind(subtype); + st_d(super_klass, super_cache_addr); + if (L_success != &L_fallthrough) { + b(*L_success); + } + + // Success. Cache the super we found and proceed in triumph. +#undef IS_A_TEMP + + bind(L_fallthrough); +} + +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { + ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset())); + st_d(R0, Address(java_thread, JavaThread::vm_result_offset())); + verify_oop(oop_result, "broken oop in call_VM_base"); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { + ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); + st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset())); +} + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); +#ifdef ASSERT + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); + assert(offset1 - offset == stackElementSize, "correct arithmetic"); +#endif + Register scale_reg = NOREG; + Address::ScaleFactor scale_factor = Address::no_scale; + if (arg_slot.is_constant()) { + offset += arg_slot.as_constant() * stackElementSize; + } else { + scale_reg = arg_slot.as_register(); + scale_factor = Address::times_8; + } + // We don't push RA on stack in prepare_invoke. + // offset += wordSize; // return PC is on stack + if(scale_reg==NOREG) return Address(SP, offset); + else { + alsl_d(scale_reg, scale_reg, SP, scale_factor - 1); + return Address(scale_reg, offset); + } +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { + switch (size_in_bytes) { + case 8: ld_d(dst, src); break; + case 4: ld_w(dst, src); break; + case 2: is_signed ? ld_h(dst, src) : ld_hu(dst, src); break; + case 1: is_signed ? ld_b( dst, src) : ld_bu( dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { + switch (size_in_bytes) { + case 8: st_d(src, dst); break; + case 4: st_w(src, dst); break; + case 2: st_h(src, dst); break; + case 1: st_b(src, dst); break; + default: ShouldNotReachHere(); + } +} + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& L_no_such_interface, + bool return_method) { + assert_different_registers(recv_klass, intf_klass, scan_temp, AT); + assert_different_registers(method_result, intf_klass, scan_temp, AT); + assert(recv_klass != method_result || !return_method, + "recv_klass can be destroyed when method isn't needed"); + + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size() * wordSize; + Address::ScaleFactor times_vte_scale = Address::times_ptr; + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + ld_w(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); + + // %%% Could store the aligned, prescaled offset in the klassoop. + alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1); + addi_d(scan_temp, scan_temp, vtable_base); + if (HeapWordsPerLong > 1) { + // Round up to align_object_offset boundary + // see code for InstanceKlass::start_of_itable! + round_to(scan_temp, BytesPerLong); + } + + if (return_method) { + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + if (itable_index.is_constant()) { + li(AT, (int)itable_index.is_constant()); + alsl_d(AT, AT, recv_klass, (int)Address::times_ptr - 1); + } else { + alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1); + } + addi_d(recv_klass, AT, itentry_off); + } + + Label search, found_method; + + for (int peel = 1; peel >= 0; peel--) { + ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + + if (peel) { + beq(intf_klass, method_result, found_method); + } else { + bne(intf_klass, method_result, search); + // (invert the test to fall through to found_method...) + } + + if (!peel) break; + + bind(search); + + // Check that the previous entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + beq(method_result, R0, L_no_such_interface); + addi_d(scan_temp, scan_temp, scan_step); + } + + bind(found_method); + + if (return_method) { + // Got a hit. + ld_w(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); + ldx_d(method_result, recv_klass, scan_temp); + } +} + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + Register tmp = S8; + push(tmp); + + if (vtable_index.is_constant()) { + assert_different_registers(recv_klass, method_result, tmp); + } else { + assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); + } + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); + if (vtable_index.is_constant()) { + li(AT, vtable_index.as_constant()); + slli_d(AT, AT, (int)Address::times_ptr); + } else { + slli_d(AT, vtable_index.as_register(), (int)Address::times_ptr); + } + li(tmp, base + vtableEntry::method_offset_in_bytes()); + add_d(tmp, tmp, AT); + add_d(tmp, tmp, recv_klass); + ld_d(method_result, tmp, 0); + + pop(tmp); +} + +void MacroAssembler::load_byte_map_base(Register reg) { + jbyte *byte_map_base = + ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base; + + // Strictly speaking the byte_map_base isn't an address at all, and it might + // even be negative. It is thus materialised as a constant. + li(reg, (uint64_t)byte_map_base); +} + +void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { + const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); + STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code + // The inverted mask is sign-extended + li(AT, inverted_jweak_mask); + andr(possibly_jweak, AT, possibly_jweak); +} + +void MacroAssembler::resolve_jobject(Register value, + Register thread, + Register tmp) { + assert_different_registers(value, thread, tmp); + Label done, not_weak; + beq(value, R0, done); // Use NULL as-is. + li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. + andr(AT, value, AT); + beq(AT, R0, not_weak); + // Resolve jweak. + ld_d(value, value, -JNIHandles::weak_tag_value); + verify_oop(value); + #if INCLUDE_ALL_GCS + if (UseG1GC) { + g1_write_barrier_pre(noreg /* obj */, + value /* pre_val */, + thread /* thread */, + tmp /* tmp */, + true /* tosca_live */, + true /* expand_call */); + } + #endif // INCLUDE_ALL_GCS + b(done); + bind(not_weak); + // Resolve (untagged) jobject. + ld_d(value, value, 0); + verify_oop(value); + bind(done); +} + +void MacroAssembler::lea(Register rd, Address src) { + Register dst = rd; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index == noreg) { + if (is_simm(disp, 12)) { + addi_d(dst, base, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + add_d(dst, base, AT); + } + } else { + if (scale == 0) { + if (is_simm(disp, 12)) { + add_d(AT, base, index); + addi_d(dst, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + add_d(AT, base, AT); + add_d(dst, AT, index); + } + } else { + if (is_simm(disp, 12)) { + alsl_d(AT, index, base, scale - 1); + addi_d(dst, AT, disp); + } else { + lu12i_w(AT, split_low20(disp >> 12)); + if (split_low12(disp)) + ori(AT, AT, split_low12(disp)); + add_d(AT, AT, base); + alsl_d(dst, index, AT, scale - 1); + } + } + } +} + +void MacroAssembler::lea(Register dst, AddressLiteral adr) { + code_section()->relocate(pc(), adr.rspec()); + pcaddi(dst, (adr.target() - pc()) >> 2); +} + +int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) { + int v = (dest_pos - inst_pos) >> 2; + switch(high(inst, 6)) { + case beq_op: + case bne_op: + case blt_op: + case bge_op: + case bltu_op: + case bgeu_op: + assert(is_simm16(v), "must be simm16"); +#ifndef PRODUCT + if(!is_simm16(v)) + { + tty->print_cr("must be simm16"); + tty->print_cr("Inst: %x", inst); + } +#endif + + inst &= 0xfc0003ff; + inst |= ((v & 0xffff) << 10); + break; + case beqz_op: + case bnez_op: + case bccondz_op: + assert(is_simm(v, 21), "must be simm21"); +#ifndef PRODUCT + if(!is_simm(v, 21)) + { + tty->print_cr("must be simm21"); + tty->print_cr("Inst: %x", inst); + } +#endif + + inst &= 0xfc0003e0; + inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) ); + break; + case b_op: + case bl_op: + assert(is_simm(v, 26), "must be simm26"); +#ifndef PRODUCT + if(!is_simm(v, 26)) + { + tty->print_cr("must be simm26"); + tty->print_cr("Inst: %x", inst); + } +#endif + + inst &= 0xfc000000; + inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) ); + break; + default: + ShouldNotReachHere(); + break; + } + return inst; +} + +void MacroAssembler::cmp_cmov(Register op1, + Register op2, + Register dst, + Register src, + CMCompare cmp, + bool is_signed) { + switch (cmp) { + case EQ: + sub_d(AT, op1, op2); + maskeqz(dst, dst, AT); + masknez(AT, src, AT); + break; + + case NE: + sub_d(AT, op1, op2); + masknez(dst, dst, AT); + maskeqz(AT, src, AT); + break; + + case GT: + if (is_signed) { + slt(AT, op2, op1); + } else { + sltu(AT, op2, op1); + } + masknez(dst, dst, AT); + maskeqz(AT, src, AT); + break; + + case GE: + if (is_signed) { + slt(AT, op1, op2); + } else { + sltu(AT, op1, op2); + } + maskeqz(dst, dst, AT); + masknez(AT, src, AT); + break; + + case LT: + if (is_signed) { + slt(AT, op1, op2); + } else { + sltu(AT, op1, op2); + } + masknez(dst, dst, AT); + maskeqz(AT, src, AT); + break; + + case LE: + if (is_signed) { + slt(AT, op2, op1); + } else { + sltu(AT, op2, op1); + } + maskeqz(dst, dst, AT); + masknez(AT, src, AT); + break; + + default: + Unimplemented(); + } + OR(dst, dst, AT); +} + + +void MacroAssembler::cmp_cmov(FloatRegister op1, + FloatRegister op2, + Register dst, + Register src, + FloatRegister tmp1, + FloatRegister tmp2, + CMCompare cmp, + bool is_float) { + movgr2fr_d(tmp1, dst); + movgr2fr_d(tmp2, src); + + switch(cmp) { + case EQ: + if (is_float) { + fcmp_ceq_s(FCC0, op1, op2); + } else { + fcmp_ceq_d(FCC0, op1, op2); + } + fsel(tmp1, tmp1, tmp2, FCC0); + break; + + case NE: + if (is_float) { + fcmp_ceq_s(FCC0, op1, op2); + } else { + fcmp_ceq_d(FCC0, op1, op2); + } + fsel(tmp1, tmp2, tmp1, FCC0); + break; + + case GT: + if (is_float) { + fcmp_cule_s(FCC0, op1, op2); + } else { + fcmp_cule_d(FCC0, op1, op2); + } + fsel(tmp1, tmp2, tmp1, FCC0); + break; + + case GE: + if (is_float) { + fcmp_cult_s(FCC0, op1, op2); + } else { + fcmp_cult_d(FCC0, op1, op2); + } + fsel(tmp1, tmp2, tmp1, FCC0); + break; + + case LT: + if (is_float) { + fcmp_cult_s(FCC0, op1, op2); + } else { + fcmp_cult_d(FCC0, op1, op2); + } + fsel(tmp1, tmp1, tmp2, FCC0); + break; + + case LE: + if (is_float) { + fcmp_cule_s(FCC0, op1, op2); + } else { + fcmp_cule_d(FCC0, op1, op2); + } + fsel(tmp1, tmp1, tmp2, FCC0); + break; + + default: + Unimplemented(); + } + + movfr2gr_d(dst, tmp1); +} + +void MacroAssembler::cmp_cmov(FloatRegister op1, + FloatRegister op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp, + bool is_float) { + switch(cmp) { + case EQ: + if (!is_float) { + fcmp_ceq_d(FCC0, op1, op2); + } else { + fcmp_ceq_s(FCC0, op1, op2); + } + fsel(dst, dst, src, FCC0); + break; + + case NE: + if (!is_float) { + fcmp_ceq_d(FCC0, op1, op2); + } else { + fcmp_ceq_s(FCC0, op1, op2); + } + fsel(dst, src, dst, FCC0); + break; + + case GT: + if (!is_float) { + fcmp_cule_d(FCC0, op1, op2); + } else { + fcmp_cule_s(FCC0, op1, op2); + } + fsel(dst, src, dst, FCC0); + break; + + case GE: + if (!is_float) { + fcmp_cult_d(FCC0, op1, op2); + } else { + fcmp_cult_s(FCC0, op1, op2); + } + fsel(dst, src, dst, FCC0); + break; + + case LT: + if (!is_float) { + fcmp_cult_d(FCC0, op1, op2); + } else { + fcmp_cult_s(FCC0, op1, op2); + } + fsel(dst, dst, src, FCC0); + break; + + case LE: + if (!is_float) { + fcmp_cule_d(FCC0, op1, op2); + } else { + fcmp_cule_s(FCC0, op1, op2); + } + fsel(dst, dst, src, FCC0); + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::cmp_cmov(Register op1, + Register op2, + FloatRegister dst, + FloatRegister src, + FloatRegister tmp1, + FloatRegister tmp2, + CMCompare cmp) { + movgr2fr_w(tmp1, R0); + + switch (cmp) { + case EQ: + sub_d(AT, op1, op2); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, dst, src, FCC0); + break; + + case NE: + sub_d(AT, op1, op2); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, src, dst, FCC0); + break; + + case GT: + slt(AT, op2, op1); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, src, dst, FCC0); + break; + + case GE: + slt(AT, op1, op2); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, dst, src, FCC0); + break; + + case LT: + slt(AT, op1, op2); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, src, dst, FCC0); + break; + + case LE: + slt(AT, op2, op1); + movgr2fr_w(tmp2, AT); + fcmp_ceq_s(FCC0, tmp1, tmp2); + fsel(dst, dst, src, FCC0); + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { + switch (type) { + case STORE_BYTE: st_b (reg, base, disp); break; + case STORE_CHAR: + case STORE_SHORT: st_h (reg, base, disp); break; + case STORE_INT: st_w (reg, base, disp); break; + case STORE_LONG: st_d (reg, base, disp); break; + case LOAD_BYTE: ld_b (reg, base, disp); break; + case LOAD_U_BYTE: ld_bu(reg, base, disp); break; + case LOAD_SHORT: ld_h (reg, base, disp); break; + case LOAD_U_SHORT: ld_hu(reg, base, disp); break; + case LOAD_INT: ld_w (reg, base, disp); break; + case LOAD_U_INT: ld_wu(reg, base, disp); break; + case LOAD_LONG: ld_d (reg, base, disp); break; + case LOAD_LINKED_LONG: + ll_d(reg, base, disp); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) { + switch (type) { + case STORE_BYTE: stx_b (reg, base, disp); break; + case STORE_CHAR: + case STORE_SHORT: stx_h (reg, base, disp); break; + case STORE_INT: stx_w (reg, base, disp); break; + case STORE_LONG: stx_d (reg, base, disp); break; + case LOAD_BYTE: ldx_b (reg, base, disp); break; + case LOAD_U_BYTE: ldx_bu(reg, base, disp); break; + case LOAD_SHORT: ldx_h (reg, base, disp); break; + case LOAD_U_SHORT: ldx_hu(reg, base, disp); break; + case LOAD_INT: ldx_w (reg, base, disp); break; + case LOAD_U_INT: ldx_wu(reg, base, disp); break; + case LOAD_LONG: ldx_d (reg, base, disp); break; + case LOAD_LINKED_LONG: + add_d(AT, base, disp); + ll_d(reg, AT, 0); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { + switch (type) { + case STORE_FLOAT: fst_s(reg, base, disp); break; + case STORE_DOUBLE: fst_d(reg, base, disp); break; + case STORE_VECTORX: vst (reg, base, disp); break; + case STORE_VECTORY: xvst (reg, base, disp); break; + case LOAD_FLOAT: fld_s(reg, base, disp); break; + case LOAD_DOUBLE: fld_d(reg, base, disp); break; + case LOAD_VECTORX: vld (reg, base, disp); break; + case LOAD_VECTORY: xvld (reg, base, disp); break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) { + switch (type) { + case STORE_FLOAT: fstx_s(reg, base, disp); break; + case STORE_DOUBLE: fstx_d(reg, base, disp); break; + case STORE_VECTORX: vstx (reg, base, disp); break; + case STORE_VECTORY: xvstx (reg, base, disp); break; + case LOAD_FLOAT: fldx_s(reg, base, disp); break; + case LOAD_DOUBLE: fldx_d(reg, base, disp); break; + case LOAD_VECTORX: vldx (reg, base, disp); break; + case LOAD_VECTORY: xvldx (reg, base, disp); break; + default: + ShouldNotReachHere(); + } +} + +/** + * Emits code to update CRC-32 with a byte value according to constants in table + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); +**/ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + xorr(val, val, crc); + andi(val, val, 0xff); + ld_w(val, Address(table, val, Address::times_4, 0)); + srli_w(crc, crc, 8); + xorr(crc, val, crc); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param tmp scratch register +**/ +void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) { + Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; + assert_different_registers(crc, buf, len, tmp); + + nor(crc, crc, R0); + + addi_d(len, len, -64); + bge(len, R0, CRC_by64_loop); + addi_d(len, len, 64-4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + blt(R0, len, CRC_by1_loop); + b(L_exit); + + bind(CRC_by64_loop); + ld_d(tmp, buf, 0); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 8); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 16); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 24); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 32); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 40); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 48); + crc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 56); + crc_w_d_w(crc, tmp, crc); + addi_d(buf, buf, 64); + addi_d(len, len, -64); + bge(len, R0, CRC_by64_loop); + addi_d(len, len, 64-4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + blt(R0, len, CRC_by1_loop); + b(L_exit); + + bind(CRC_by4_loop); + ld_w(tmp, buf, 0); + crc_w_w_w(crc, tmp, crc); + addi_d(buf, buf, 4); + addi_d(len, len, -4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + bge(R0, len, L_exit); + + bind(CRC_by1_loop); + ld_b(tmp, buf, 0); + crc_w_b_w(crc, tmp, crc); + addi_d(buf, buf, 1); + addi_d(len, len, -1); + blt(R0, len, CRC_by1_loop); + + bind(L_exit); + nor(crc, crc, R0); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param tmp scratch register +**/ +void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) { + Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit; + assert_different_registers(crc, buf, len, tmp); + + addi_d(len, len, -64); + bge(len, R0, CRC_by64_loop); + addi_d(len, len, 64-4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + blt(R0, len, CRC_by1_loop); + b(L_exit); + + bind(CRC_by64_loop); + ld_d(tmp, buf, 0); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 8); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 16); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 24); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 32); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 40); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 48); + crcc_w_d_w(crc, tmp, crc); + ld_d(tmp, buf, 56); + crcc_w_d_w(crc, tmp, crc); + addi_d(buf, buf, 64); + addi_d(len, len, -64); + bge(len, R0, CRC_by64_loop); + addi_d(len, len, 64-4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + blt(R0, len, CRC_by1_loop); + b(L_exit); + + bind(CRC_by4_loop); + ld_w(tmp, buf, 0); + crcc_w_w_w(crc, tmp, crc); + addi_d(buf, buf, 4); + addi_d(len, len, -4); + bge(len, R0, CRC_by4_loop); + addi_d(len, len, 4); + bge(R0, len, L_exit); + + bind(CRC_by1_loop); + ld_b(tmp, buf, 0); + crcc_w_b_w(crc, tmp, crc); + addi_d(buf, buf, 1); + addi_d(len, len, -1); + blt(R0, len, CRC_by1_loop); + + bind(L_exit); +} diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp new file mode 100644 index 00000000000..8b123c2906e --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp @@ -0,0 +1,771 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP + +#include "asm/assembler.hpp" +#include "utilities/macros.hpp" +#include "runtime/rtmLocking.hpp" + + +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +class MacroAssembler: public Assembler { + friend class LIR_Assembler; + friend class Runtime1; // as_Address() + + public: + // Compare code + typedef enum { + EQ = 0x01, + NE = 0x02, + GT = 0x03, + GE = 0x04, + LT = 0x05, + LE = 0x06 + } CMCompare; + + protected: + + Address as_Address(AddressLiteral adr); + Address as_Address(ArrayAddress adr); + + // Support for VM calls + // + // This is the base routine called by the different versions of call_VM_leaf. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). +#ifdef CC_INTERP + // c++ interpreter never wants to use interp_masm version of call_VM + #define VIRTUAL +#else + #define VIRTUAL virtual +#endif + + VIRTUAL void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments // the number of arguments to pop after the call + ); + + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + // + // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base + // returns the register which contains the thread upon return. If a thread register has been + // specified, the return value will correspond to that register. If no last_java_sp is specified + // (noreg) than sp will be used instead. + VIRTUAL void call_VM_base( // returns the register containing the thread upon return + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call + bool check_exceptions // whether to check for pending exceptions after return + ); + + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); + + // helpers for FPU flag access + // tmp is a temporary register, if none is available use noreg + + public: + static intptr_t i[32]; + static float f[32]; + static void print(outputStream *s); + + static int i_offset(unsigned int k); + static int f_offset(unsigned int k); + + static void save_registers(MacroAssembler *masm); + static void restore_registers(MacroAssembler *masm); + + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + void pd_patch_instruction(address branch, address target); + + address emit_trampoline_stub(int insts_call_instruction_offset, address target); + + // Support for inc/dec with optimal instruction selection depending on value + // void incrementl(Register reg, int value = 1); + // void decrementl(Register reg, int value = 1); + + + // Alignment + void align(int modulus); + + + // Stack frame creation/removal + void enter(); + void leave(); + + // Frame creation and destruction shared between JITs. + void build_frame(int framesize); + void remove_frame(int framesize); + + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) + // The pointer will be loaded into the thread register. + void get_thread(Register thread); + + + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + + void call_VM(Register oop_result, + address entry_point, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + // Overloadings with last_Java_sp + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments = 0, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, bool + check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + void get_vm_result (Register oop_result, Register thread); + void get_vm_result_2(Register metadata_result, Register thread); + void call_VM_leaf(address entry_point, + int number_of_arguments = 0); + void call_VM_leaf(address entry_point, + Register arg_1); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2, Register arg_3); + + // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls + void super_call_VM_leaf(address entry_point); + void super_call_VM_leaf(address entry_point, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + + // last Java Frame (fills frame anchor) + void set_last_Java_frame(Register thread, + Register last_java_sp, + Register last_java_fp, + Label& last_java_pc); + + // thread in the default location (S6) + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label& last_java_pc); + + void reset_last_Java_frame(Register thread, bool clear_fp); + + // thread in the default location (S6) + void reset_last_Java_frame(bool clear_fp); + + // Stores + void store_check(Register obj); // store check for obj - register is destroyed afterwards + void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) + + void resolve_jobject(Register value, Register thread, Register tmp); + void clear_jweak_tag(Register possibly_jweak); + +#if INCLUDE_ALL_GCS + + void g1_write_barrier_pre(Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void g1_write_barrier_post(Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2); + +#endif // INCLUDE_ALL_GCS + + // split store_check(Register obj) to enhance instruction interleaving + void store_check_part_1(Register obj); + void store_check_part_2(Register obj); + + // C 'boolean' to Java boolean: x == 0 ? 0 : 1 + void c2bool(Register x); + //add for compressedoops + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + void load_prototype_header(Register dst, Register src); + + void store_klass_gap(Register dst, Register src); + + void load_heap_oop(Register dst, Address src); + void store_heap_oop(Address dst, Register src); + void store_heap_oop_null(Address dst); + void encode_heap_oop(Register r); + void encode_heap_oop(Register dst, Register src); + void decode_heap_oop(Register r); + void decode_heap_oop(Register dst, Register src); + void encode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register r); + void encode_heap_oop_not_null(Register dst, Register src); + void decode_heap_oop_not_null(Register dst, Register src); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register dst, Register src); + void decode_klass_not_null(Register dst, Register src); + + // Returns the byte size of the instructions generated by decode_klass_not_null() + // when compressed klass pointers are being used. + static int instr_size_for_decode_klass_not_null(); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); + + DEBUG_ONLY(void verify_heapbase(const char* msg);) + + void set_narrow_klass(Register dst, Klass* k); + void set_narrow_oop(Register dst, jobject obj); + + // Sign extension + void sign_extend_short(Register reg) { ext_w_h(reg, reg); } + void sign_extend_byte(Register reg) { ext_w_b(reg, reg); } + void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); + void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); + + void trigfunc(char trig, int num_fpu_regs_in_use = 1); + // allocation + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void incr_allocated_bytes(Register thread, + Register var_size_in_bytes, int con_size_in_bytes, + Register t1 = noreg); + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& no_such_interface, + bool return_method = true); + + // virtual method calling + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg and temp2_reg can be noreg, if no temps are available. + // Updates the sub's secondary super cache as necessary. + // If set_cond_codes, condition codes will be Z on success, NZ on failure. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success); + + + // Debugging + + // only if +VerifyOops + void verify_oop(Register reg, const char* s = "broken oop"); + void verify_oop_addr(Address addr, const char * s = "broken oop addr"); + void verify_oop_subroutine(); + // TODO: verify method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + + #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) + #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // only if +VerifyFPU + void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); + + // prints msg, dumps registers and stops execution + void stop(const char* msg); + + // prints msg and continues + void warn(const char* msg); + + static void debug(char* msg/*, RegistersForDebugging* regs*/); + static void debug64(char* msg, int64_t pc, int64_t regs[]); + + void untested() { stop("untested"); } + + void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, sizeof(b), "unimplemented: %s", what); stop(b); } + + void should_not_reach_here() { stop("should not reach here"); } + + void print_CPU_state(); + + // Stack overflow checking + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + if (offset <= 2048) { + st_w(RA0, SP, -offset); + } else if (offset <= 32768 && !(offset & 3)) { + stptr_w(RA0, SP, -offset); + } else { + li(AT, offset); + sub_d(AT, SP, AT); + st_w(RA0, AT, 0); + } + } + + // Writes to stack successive pages until offset reached to check for + // stack overflow + shadow pages. Also, clobbers tmp + void bang_stack_size(Register size, Register tmp); + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); + + // Support for serializing memory accesses between threads + void serialize_memory(Register thread, Register tmp); + + //void verify_tlab(); + void verify_tlab(Register t1, Register t2); + + // Biased locking support + // lock_reg and obj_reg must be loaded up with the appropriate values. + // tmp_reg is optional. If it is supplied (i.e., != noreg) it will + // be killed; if not supplied, push/pop will be used internally to + // allocate a temporary (inefficient, avoid if possible). + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. + // Returns offset of first potentially-faulting instruction for null + // check info (currently consumed only by C1). If + // swap_reg_contains_mark is true then returns -1 as it is assumed + // the calling code has already passed any potential faults. + int biased_locking_enter(Register lock_reg, Register obj_reg, + Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL); + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); +#ifdef COMPILER2 + void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); + void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); +#endif + + void round_to(Register reg, int modulus) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +#if 0 + assert_different_registers(reg, AT); + increment(reg, modulus - 1); + move(AT, - modulus); + andr(reg, reg, AT); +#endif + } + + // the follow two might use AT register, be sure you have no meanful data in AT before you call them + void increment(Register reg, int imm); + void decrement(Register reg, int imm); + void increment(Address addr, int imm = 1); + void decrement(Address addr, int imm = 1); + void shl(Register reg, int sa) { slli_d(reg, reg, sa); } + void shr(Register reg, int sa) { srli_d(reg, reg, sa); } + void sar(Register reg, int sa) { srai_d(reg, reg, sa); } + // Helper functions for statistics gathering. + void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); + + // Calls + void call(address entry); + void call(address entry, relocInfo::relocType rtype); + void call(address entry, RelocationHolder& rh); + void call_long(address entry); + + address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); + + static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); + + static bool far_branches() { + if (ForceUnreachable) { + return true; + } else { + return ReservedCodeCacheSize > branch_range; + } + } + + // Emit the CompiledIC call idiom + address ic_call(address entry); + + // Jumps + void jmp(address entry); + void jmp(address entry, relocInfo::relocType rtype); + void jmp_far(Label& L); // patchable + + /* branches may exceed 16-bit offset */ + void b_far(address entry); + void b_far(Label& L); + + void bne_far (Register rs, Register rt, address entry); + void bne_far (Register rs, Register rt, Label& L); + + void beq_far (Register rs, Register rt, address entry); + void beq_far (Register rs, Register rt, Label& L); + + void blt_far (Register rs, Register rt, address entry, bool is_signed); + void blt_far (Register rs, Register rt, Label& L, bool is_signed); + + void bge_far (Register rs, Register rt, address entry, bool is_signed); + void bge_far (Register rs, Register rt, Label& L, bool is_signed); + + // For C2 to support long branches + void beq_long (Register rs, Register rt, Label& L); + void bne_long (Register rs, Register rt, Label& L); + void blt_long (Register rs, Register rt, Label& L, bool is_signed); + void bge_long (Register rs, Register rt, Label& L, bool is_signed); + void bc1t_long (Label& L); + void bc1f_long (Label& L); + + static bool patchable_branches() { + const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); + return ReservedCodeCacheSize > branch_range; + } + + static bool reachable_from_branch_short(jlong offs); + + void patchable_jump_far(Register ra, jlong offs); + void patchable_jump(address target, bool force_patchable = false); + void patchable_call(address target, address call_size = 0); + + // Floating + // Data + + // Load and store values by size and signed-ness + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs + inline void ld_ptr(Register rt, Address a) { + ld_d(rt, a); + } + + inline void ld_ptr(Register rt, Register base, int offset16) { + ld_d(rt, base, offset16); + } + + // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs + inline void st_ptr(Register rt, Address a) { + st_d(rt, a); + } + + inline void st_ptr(Register rt, Register base, int offset16) { + st_d(rt, base, offset16); + } + + void ld_ptr(Register rt, Register base, Register offset); + void st_ptr(Register rt, Register base, Register offset); + + // ld_long will perform lw for 32 bit VMs and ld for 64 bit VMs + // st_long will perform sw for 32 bit VMs and sd for 64 bit VMs + inline void ld_long(Register rt, Register base, int offset16); + inline void st_long(Register rt, Register base, int offset16); + inline void ld_long(Register rt, Address a); + inline void st_long(Register rt, Address a); + void ld_long(Register rt, Register offset, Register base); + void st_long(Register rt, Register offset, Register base); + + // swap the two byte of the low 16-bit halfword + // this directive will use AT, be sure the high 16-bit of reg is zero + void hswap(Register reg); + void huswap(Register reg); + + // convert big endian integer to little endian integer + void swap(Register reg); + + void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, + bool retold, bool barrier); + void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, + bool retold, bool barrier, Label& succ, Label* fail = NULL); + void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, + bool sign, bool retold, bool barrier); + void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, + bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL); + + void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");} + void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");} + void push (Register reg) { addi_d(SP, SP, -8); st_d (reg, SP, 0); } + void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); } + void pop (Register reg) { ld_d (reg, SP, 0); addi_d(SP, SP, 8); } + void pop (FloatRegister reg) { fld_d (reg, SP, 0); addi_d(SP, SP, 8); } + void pop () { addi_d(SP, SP, 8); } + void pop2 () { addi_d(SP, SP, 16); } + void push2(Register reg1, Register reg2); + void pop2 (Register reg1, Register reg2); + //we need 2 fun to save and resotre general register + void pushad(); + void popad(); + void pushad_except_v0(); + void popad_except_v0(); + + void li(Register rd, jlong value); + void li(Register rd, address addr) { li(rd, (long)addr); } + void patchable_li52(Register rd, jlong value); + void lipc(Register rd, Label& L); + void move(Register rd, Register rs) { orr(rd, rs, R0); } + void move_u32(Register rd, Register rs) { add_w(rd, rs, R0); } + void mov_metadata(Register dst, Metadata* obj); + void mov_metadata(Address dst, Metadata* obj); + + // Load the base of the cardtable byte map into reg. + void load_byte_map_base(Register reg); + + //FIXME + void empty_FPU_stack(){/*need implemented*/}; + + + // method handles (JSR 292) + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + + // LA added: + void jr (Register reg) { jirl(R0, reg, 0); } + void jalr(Register reg) { jirl(RA, reg, 0); } + void nop () { andi(R0, R0, 0); } + void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); } + void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); } + void orr (Register rd, Register rj, Register rk) { OR(rd, rj, rk); } + void lea (Register rd, Address src); + void lea (Register dst, AddressLiteral adr); + static int patched_branch(int dest_pos, int inst, int inst_pos); + + // Conditional move + void cmp_cmov(Register op1, + Register op2, + Register dst, + Register src, + CMCompare cmp = EQ, + bool is_signed = true); + void cmp_cmov(FloatRegister op1, + FloatRegister op2, + Register dst, + Register src, + FloatRegister tmp1, + FloatRegister tmp2, + CMCompare cmp = EQ, + bool is_float = true); + void cmp_cmov(FloatRegister op1, + FloatRegister op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp = EQ, + bool is_float = true); + void cmp_cmov(Register op1, + Register op2, + FloatRegister dst, + FloatRegister src, + FloatRegister tmp1, + FloatRegister tmp2, + CMCompare cmp = EQ); + + // CRC32 code for java.util.zip.CRC32::update() instrinsic. + void update_byte_crc32(Register crc, Register val, Register table); + + // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. + void kernel_crc32(Register crc, Register buf, Register len, Register tmp); + + // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic. + void kernel_crc32c(Register crc, Register buf, Register len, Register tmp); + +#undef VIRTUAL + + public: +// Memory Data Type +#define INT_TYPE 0x100 +#define FLOAT_TYPE 0x200 +#define SIGNED_TYPE 0x10 +#define UNSIGNED_TYPE 0x20 + + typedef enum { + LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, + LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, + LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, + LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, + LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, + STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, + STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, + STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, + STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, + STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, + LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, + + LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, + LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, + LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, + + LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, + LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, + LOAD_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x3, + LOAD_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x4, + STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x5, + STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x6, + STORE_VECTORX = FLOAT_TYPE | SIGNED_TYPE | 0x7, + STORE_VECTORY = FLOAT_TYPE | SIGNED_TYPE | 0x8 + } CMLoadStoreDataType; + + void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { + assert((type & INT_TYPE), "must be General reg type"); + loadstore_t(reg, base, index, scale, disp, type); + } + + void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { + assert((type & FLOAT_TYPE), "must be Float reg type"); + loadstore_t(reg, base, index, scale, disp, type); + } + +private: + template + void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { + if (index != 0) { + assert(((scale==0)&&(disp==0)), "only support base+index"); + loadstore(reg, as_Register(base), as_Register(index), type); + } else { + loadstore(reg, as_Register(base), disp, type); + } + } + void loadstore(Register reg, Register base, int disp, int type); + void loadstore(Register reg, Register base, Register disp, int type); + void loadstore(FloatRegister reg, Register base, int disp, int type); + void loadstore(FloatRegister reg, Register base, Register disp, int type); +}; + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual { + private: + MacroAssembler* _masm; + Label _label; + + public: + SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); + ~SkipIfEqual(); +}; + +#ifdef ASSERT +inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } +#endif + +struct tableswitch { + Register _reg; + int _insn_index; jint _first_key; jint _last_key; + Label _after; + Label _branches; +}; + +#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp new file mode 100644 index 00000000000..0b265a4defb --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP +#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP diff --git a/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp new file mode 100644 index 00000000000..b36216c5337 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "memory/metaspaceShared.hpp" + +// Generate the self-patching vtable method: +// +// This method will be called (as any other Klass virtual method) with +// the Klass itself as the first argument. Example: +// +// oop obj; +// int size = obj->klass()->klass_part()->oop_size(this); +// +// for which the virtual method call is Klass::oop_size(); +// +// The dummy method is called with the Klass object as the first +// operand, and an object as the second argument. +// + +//===================================================================== + +// All of the dummy methods in the vtable are essentially identical, +// differing only by an ordinal constant, and they bear no releationship +// to the original method which the caller intended. Also, there needs +// to be 'vtbl_list_size' instances of the vtable in order to +// differentiate between the 'vtable_list_size' original Klass objects. + +#define __ masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +void MetaspaceShared::generate_vtable_methods(void** vtbl_list, + void** vtable, + char** md_top, + char* md_end, + char** mc_top, + char* mc_end) { + intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*); + *(intptr_t *)(*md_top) = vtable_bytes; + *md_top += sizeof(intptr_t); + void** dummy_vtable = (void**)*md_top; + *vtable = dummy_vtable; + *md_top += vtable_bytes; + + // Get ready to generate dummy methods. + + CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top); + MacroAssembler* masm = new MacroAssembler(&cb); + Label common_code; + for (int i = 0; i < vtbl_list_size; ++i) { + for (int j = 0; j < num_virtuals; ++j) { + dummy_vtable[num_virtuals * i + j] = (void*)masm->pc(); + + // Load T5 with a value indicating vtable/offset pair. + // -- bits[ 7..0] (8 bits) which virtual method in table? + // -- bits[12..8] (5 bits) which virtual method table? + // -- must fit in 13-bit instruction immediate field. + __ li(T5, (i << 8) + j); + __ b(common_code); + } + } + + __ bind(common_code); + + __ srli_d(T4, T5, 8); // isolate vtable identifier. + __ shl(T4, LogBytesPerWord); + __ li(AT, (long)vtbl_list); + __ ldx_d(T4, AT, T4); // get correct vtable address. + __ st_d(T4, A0, 0); // update vtable pointer. + + __ andi(T5, T5, 0x00ff); // isolate vtable method index + __ shl(T5, LogBytesPerWord); + __ ldx_d(T4, T4, T5); // address of real method pointer. + __ jr(T4); // get real method pointer. + + __ flush(); + + *mc_top = (char*)__ pc(); +} diff --git a/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp new file mode 100644 index 00000000000..cb31ca5ad5b --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp @@ -0,0 +1,566 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "prims/methodHandles.hpp" + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#define STOP(error) block_comment(error); __ stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { + if (VerifyMethodHandles) + verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + "MH argument is a Class"); + __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, err_msg("%s should be nonzero", xname)); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //ASSERT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message) { +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { + Label L; + BLOCK_COMMENT("verify_ref_kind {"); + __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); + __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); + __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); + __ andr(temp, temp, AT); + __ li(AT, ref_kind); + __ beq(temp, AT, L); + { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); + jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); + if (ref_kind == JVM_REF_invokeVirtual || + ref_kind == JVM_REF_invokeSpecial) + // could do this for all ref_kinds, but would explode assembly code size + trace_method_handle(_masm, buf); + __ STOP(buf); + } + BLOCK_COMMENT("} verify_ref_kind"); + __ bind(L); +} + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry) { + assert(method == Rmethod, "interpreter calling convention"); + + Label L_no_such_method; + __ beq(method, R0, L_no_such_method); + + __ verify_method_ptr(method); + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + Register rthread = TREG; + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); + __ beq(AT, R0, run_compiled_code); + __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset())); + __ jr(T4); + __ BIND(run_compiled_code); + } + + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + __ ld_d(T4, method, in_bytes(entry_offset)); + __ jr(T4); + + __ bind(L_no_such_method); + address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); + __ jmp(wrong_method, relocInfo::runtime_call_type); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry) { + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2); + assert(recv != noreg, "required register"); + assert(method_temp == Rmethod, "required register for loading method"); + + //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()))); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()))); + __ verify_oop(method_temp); + // the following assumes that a Method* is normally compressed in the vmtarget field: + __ ld_d(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()))); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ld_d(temp2, Address(method_temp, Method::const_offset())); + __ load_sized_value(temp2, + Address(temp2, ConstMethod::size_of_parameters_offset()), + sizeof(u2), false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + Label L; + Address recv_addr = __ argument_address(temp2, -1); + __ ld_d(AT, recv_addr); + __ beq(recv, AT, L); + + recv_addr = __ argument_address(temp2, -1); + __ ld_d(V0, recv_addr); + __ STOP("receiver not on stack"); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all allow an appendix argument. + __ stop("empty stubs make SG sick"); + return NULL; + } + + // Rmethod: Method* + // T4: argument locator (parameter slot count, added to sp) + // S7: used as temp to hold mh or receiver + Register t4_argp = T4; // argument list ptr, live on error paths + Register s7_mh = S7; // MH receiver; dies quickly and is recycled + Register rm_method = Rmethod; // eventual target of this invocation + + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ ld_bu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); + guarantee(Assembler::is_simm(iid, 12), "Oops, iid is not simm16! Change the instructions."); + __ addi_d(AT, AT, -1 * (int) iid); + __ beq(AT, R0, L); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*::intrinsic_id"); + } + __ STOP("bad Method*::intrinsic_id"); + __ bind(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address t4_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ld_d(t4_argp, Address(rm_method, Method::const_offset())); + __ load_sized_value(t4_argp, + Address(t4_argp, ConstMethod::size_of_parameters_offset()), + sizeof(u2), false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + t4_first_arg_addr = __ argument_address(t4_argp, -1); + } else { + DEBUG_ONLY(t4_argp = noreg); + } + + if (!is_signature_polymorphic_static(iid)) { + __ ld_d(s7_mh, t4_first_arg_addr); + DEBUG_ONLY(t4_argp = noreg); + } + + // t4_first_arg_addr is live! + + trace_method_handle_interpreter_entry(_masm, iid); + + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); + + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register r_recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ld_d(r_recv = T2, t4_first_arg_addr); + } + DEBUG_ONLY(t4_argp = noreg); + Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now + __ pop(rm_member); // extract last argument + generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); + } + + return entry_point; +} + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert(is_signature_polymorphic(iid), "expected invoke iid"); + Register rm_method = Rmethod; // eventual target of this invocation + // temps used in this code are not used in *either* compiled or interpreted calling sequences + Register j_rarg0 = T0; + Register j_rarg1 = A0; + Register j_rarg2 = A1; + Register j_rarg3 = A2; + Register j_rarg4 = A3; + Register j_rarg5 = A4; + + Register temp1 = T8; + Register temp2 = T4; + Register temp3 = T5; + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); + assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + } + else { + assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP + } + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + + if (iid == vmIntrinsics::_invokeBasic) { + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); + + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); + Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg); + } else { + // load receiver klass itself + __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop(temp2_defc, member_clazz); + load_klass_from_Class(_masm, temp2_defc); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); + // If we get here, the type check failed! + __ STOP("receiver class disagrees with MemberName.clazz"); + __ bind(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument + // temp1_recv_klass - klass of stacked receiver, if needed + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + } + __ ld_d(rm_method, member_vmtarget); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + } + __ ld_d(rm_method, member_vmtarget); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ ld_d(temp2_index, member_vmindex); + + if (VerifyMethodHandles) { + Label L_index_ok; + __ blt(R0, temp2_index, L_index_ok); + __ STOP("no virtual index"); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + } + + Register temp3_intf = temp3; + __ load_heap_oop(temp3_intf, member_clazz); + load_klass_from_Class(_masm, temp3_intf); + __ verify_klass_ptr(temp3_intf); + + Register rm_index = rm_method; + __ ld_d(rm_index, member_vmindex); + if (VerifyMethodHandles) { + Label L; + __ bge(rm_index, R0, L); + __ STOP("invalid vtable index for MH.invokeInterface"); + __ bind(L); + } + + // given intf, index, and recv klass, dispatch to the implementation method + __ lookup_interface_method(temp1_recv_klass, temp3_intf, + // note: next two args must be the same: + rm_index, rm_method, + temp2, + L_incompatible_class_change_error); + break; + } + + default: + fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + break; + } + + // Live at this point: + // rm_method + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that r_recv be shifted out. + __ verify_method_ptr(rm_method); + jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); + + if (iid == vmIntrinsics::_linkToInterface) { + __ bind(L_incompatible_class_change_error); + address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); + __ jmp(icce_entry, relocInfo::runtime_call_type); + } + } +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oop mh, + intptr_t* saved_regs, + intptr_t* entry_sp) { + // called as a leaf from native code: do not block the JVM! + bool has_mh = (strstr(adaptername, "/static") == NULL && + strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH + const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; + tty->print_cr("MH %s %s="PTR_FORMAT" sp="PTR_FORMAT, + adaptername, mh_reg_name, + p2i(mh), p2i(entry_sp)); + + if (Verbose) { + tty->print_cr("Registers:"); + const int saved_regs_count = RegisterImpl::number_of_registers; + for (int i = 0; i < saved_regs_count; i++) { + Register r = as_Register(i); + // The registers are stored in reverse order on the stack (by pusha). + tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); + if ((i + 1) % 4 == 0) { + tty->cr(); + } else { + tty->print(", "); + } + } + tty->cr(); + + { + // dumping last frame with frame::describe + + JavaThread* p = JavaThread::active(); + + ResourceMark rm; + PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here + FrameValues values; + + // Note: We want to allow trace_method_handle from any call site. + // While trace_method_handle creates a frame, it may be entered + // without a PC on the stack top (e.g. not just after a call). + // Walking that frame could lead to failures due to that invalid PC. + // => carefully detect that frame when doing the stack walking + + // Current C frame + frame cur_frame = os::current_frame(); + + // Robust search of trace_calling_frame (independant of inlining). + // Assumes saved_regs comes from a pusha in the trace_calling_frame. + assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); + frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); + while (trace_calling_frame.fp() < saved_regs) { + trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); + } + + // safely create a frame and call frame::describe + intptr_t *dump_sp = trace_calling_frame.sender_sp(); + intptr_t *dump_fp = trace_calling_frame.link(); + + bool walkable = has_mh; // whether the traced frame shoud be walkable + + if (walkable) { + // The previous definition of walkable may have to be refined + // if new call sites cause the next frame constructor to start + // failing. Alternatively, frame constructors could be + // modified to support the current or future non walkable + // frames (but this is more intrusive and is not considered as + // part of this RFE, which will instead use a simpler output). + frame dump_frame = frame(dump_sp, dump_fp); + dump_frame.describe(values, 1); + } else { + // Stack may not be walkable (invalid PC above FP): + // Add descriptions without building a Java frame to avoid issues + values.describe(-1, dump_fp, "fp for #1 "); + values.describe(-1, dump_sp, "sp for #1"); + } + values.describe(-1, entry_sp, "raw top of stack"); + + tty->print_cr("Stack layout:"); + values.print(p); + } + if (has_mh && mh->is_oop()) { + mh->print(); + if (java_lang_invoke_MethodHandle::is_instance(mh)) { + if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) + java_lang_invoke_MethodHandle::form(mh)->print(); + } + } + } +} + +// The stub wraps the arguments in a struct on the stack to avoid +// dealing with the different calling conventions for passing 6 +// arguments. +struct MethodHandleStubArguments { + const char* adaptername; + oopDesc* mh; + intptr_t* saved_regs; + intptr_t* entry_sp; +}; +void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { + trace_method_handle_stub(args->adaptername, + args->mh, + args->saved_regs, + args->entry_sp); +} + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { +} +#endif //PRODUCT diff --git a/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp new file mode 100644 index 00000000000..f84337424b9 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +enum /* platform_dependent_constants */ { + adapter_code_size = 32000 DEBUG_ONLY(+ 150000) +}; + +// Additional helper methods for MethodHandles code generation: +public: + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { + verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + "reference is a MH"); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry); + + static Register saved_last_sp_register() { + // Should be in sharedRuntime, not here. + return R3; + } diff --git a/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp new file mode 100644 index 00000000000..639ac6cd3e8 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp @@ -0,0 +1,485 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "compiler/disassembler.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif + +#ifndef PRODUCT +#include "compiler/disassembler.hpp" +#endif + +#include + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +void NativeInstruction::wrote(int offset) { + ICache::invalidate_word(addr_at(offset)); +} + +void NativeInstruction::set_long_at(int offset, long i) { + address addr = addr_at(offset); + *(long*)addr = i; + ICache::invalidate_range(addr, 8); +} + +bool NativeInstruction::is_int_branch() { + int op = Assembler::high(insn_word(), 6); + return op == Assembler::beqz_op || op == Assembler::bnez_op || + op == Assembler::beq_op || op == Assembler::bne_op || + op == Assembler::blt_op || op == Assembler::bge_op || + op == Assembler::bltu_op || op == Assembler::bgeu_op; +} + +bool NativeInstruction::is_float_branch() { + return Assembler::high(insn_word(), 6) == Assembler::bccondz_op; +} + +bool NativeCall::is_bl() const { + return Assembler::high(int_at(0), 6) == Assembler::bl_op; +} + +void NativeCall::verify() { + assert(is_bl(), "not a NativeCall"); +} + +address NativeCall::target_addr_for_bl(address orig_addr) const { + address addr = orig_addr ? orig_addr : addr_at(0); + + // bl + if (is_bl()) { + return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | + ((int_at(0) >> 10) & 0xffff)) << 2); + } + + fatal("not a NativeCall"); + return NULL; +} + +address NativeCall::destination() const { + address addr = (address)this; + address destination = target_addr_for_bl(); + // Do we use a trampoline stub for this call? + // Trampoline stubs are located behind the main code. + if (destination > addr) { + // Filter out recursive method invocation (call to verified/unverified entry point). + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. + assert(cb && cb->is_nmethod(), "sanity"); + nmethod *nm = (nmethod *)cb; + NativeInstruction* ni = nativeInstruction_at(destination); + if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) { + // Yes we do, so get the destination from the trampoline stub. + const address trampoline_stub_addr = destination; + destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); + } + } + return destination; +} + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. +// +// Used in the runtime linkage of calls; see class CompiledIC. +// +// Add parameter assert_lock to switch off assertion +// during code generation, where no patching lock is needed. +void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || + (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), + "concurrent code patching"); + + ResourceMark rm; + address addr_call = addr_at(0); + bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call); + assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); + + // Patch the call. + if (!reachable) { + address trampoline_stub_addr = get_trampoline(); + assert (trampoline_stub_addr != NULL, "we need a trampoline"); + guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub"); + + // Patch the constant in the call's trampoline stub. + NativeInstruction* ni = nativeInstruction_at(dest); + assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines"); + nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); + dest = trampoline_stub_addr; + } + set_destination(dest); +} + +address NativeCall::get_trampoline() { + address call_addr = addr_at(0); + + CodeBlob *code = CodeCache::find_blob(call_addr); + assert(code != NULL, "Could not find the containing code blob"); + + address bl_destination + = nativeCall_at(call_addr)->target_addr_for_bl(); + NativeInstruction* ni = nativeInstruction_at(bl_destination); + if (code->contains(bl_destination) && + ni->is_NativeCallTrampolineStub_at()) + return bl_destination; + + // If the codeBlob is not a nmethod, this is because we get here from the + // CodeBlob constructor, which is called within the nmethod constructor. + return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); +} + +void NativeCall::set_destination(address dest) { + address addr_call = addr_at(0); + CodeBuffer cb(addr_call, instruction_size); + MacroAssembler masm(&cb); + assert(is_call_at(addr_call), "unexpected call type"); + jlong offs = dest - addr_call; + masm.bl(offs >> 2); + ICache::invalidate_range(addr_call, instruction_size); +} + +void NativeCall::print() { + tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, + p2i(instruction_address()), p2i(destination())); +} + +// Inserts a native call instruction at a given pc +void NativeCall::insert(address code_pos, address entry) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +// MT-safe patching of a call instruction. +// First patches first word of instruction to two jmp's that jmps to them +// selfs (spinlock). Then patches the last byte, and then atomicly replaces +// the jmp's with the first 4 byte of the new instruction. +void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { + Unimplemented(); +} + +bool NativeFarCall::is_short() const { + return Assembler::high(int_at(0), 10) == Assembler::andi_op && + Assembler::low(int_at(0), 22) == 0 && + Assembler::high(int_at(4), 6) == Assembler::bl_op; +} + +bool NativeFarCall::is_far() const { + return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && + Assembler::high(int_at(4), 6) == Assembler::jirl_op && + Assembler::low(int_at(4), 5) == RA->encoding(); +} + +address NativeFarCall::destination(address orig_addr) const { + address addr = orig_addr ? orig_addr : addr_at(0); + + if (is_short()) { + // short + return addr + BytesPerInstWord + + (Assembler::simm26(((int_at(4) & 0x3ff) << 16) | + ((int_at(4) >> 10) & 0xffff)) << 2); + } + + if (is_far()) { + // far + return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + + (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); + } + + fatal("not a NativeFarCall"); + return NULL; +} + +void NativeFarCall::set_destination(address dest) { + address addr_call = addr_at(0); + CodeBuffer cb(addr_call, instruction_size); + MacroAssembler masm(&cb); + assert(is_far_call_at(addr_call), "unexpected call type"); + masm.patchable_call(dest, addr_call); + ICache::invalidate_range(addr_call, instruction_size); +} + +void NativeFarCall::verify() { + assert(is_short() || is_far(), "not a NativeFarcall"); +} + +//------------------------------------------------------------------- + +bool NativeMovConstReg::is_lu12iw_ori_lu32id() const { + return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && + Assembler::high(int_at(4), 10) == Assembler::ori_op && + Assembler::high(int_at(8), 7) == Assembler::lu32i_d_op; +} + +bool NativeMovConstReg::is_lu12iw_lu32id_nop() const { + return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && + Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op && + Assembler::high(int_at(8), 10) == Assembler::andi_op; +} + +bool NativeMovConstReg::is_lu12iw_2nop() const { + return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && + Assembler::high(int_at(4), 10) == Assembler::andi_op && + Assembler::high(int_at(8), 10) == Assembler::andi_op; +} + +bool NativeMovConstReg::is_lu12iw_ori_nop() const { + return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && + Assembler::high(int_at(4), 10) == Assembler::ori_op && + Assembler::high(int_at(8), 10) == Assembler::andi_op; +} + +bool NativeMovConstReg::is_addid_2nop() const { + return Assembler::high(int_at(0), 10) == Assembler::addi_d_op && + Assembler::high(int_at(4), 10) == Assembler::andi_op && + Assembler::high(int_at(8), 10) == Assembler::andi_op; +} + +void NativeMovConstReg::verify() { + assert(is_li52(), "not a mov reg, imm52"); +} + +void NativeMovConstReg::print() { + tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, + p2i(instruction_address()), data()); +} + +intptr_t NativeMovConstReg::data() const { + if (is_lu12iw_ori_lu32id()) { + return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), + (intptr_t)((int_at(0) >> 5) & 0xfffff), + (intptr_t)((int_at(8) >> 5) & 0xfffff)); + } + + if (is_lu12iw_lu32id_nop()) { + return Assembler::merge((intptr_t)0, + (intptr_t)((int_at(0) >> 5) & 0xfffff), + (intptr_t)((int_at(4) >> 5) & 0xfffff)); + } + + if (is_lu12iw_2nop()) { + return Assembler::merge((intptr_t)0, + (intptr_t)((int_at(0) >> 5) & 0xfffff)); + } + + if (is_lu12iw_ori_nop()) { + return Assembler::merge((intptr_t)((int_at(4) >> 10) & 0xfff), + (intptr_t)((int_at(0) >> 5) & 0xfffff)); + } + + if (is_addid_2nop()) { + return Assembler::simm12((int_at(0) >> 10) & 0xfff); + } + +#ifndef PRODUCT + Disassembler::decode(addr_at(0), addr_at(0) + 16, tty); +#endif + fatal("not a mov reg, imm52"); + return 0; // unreachable +} + +void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { + CodeBuffer cb(addr_at(0), instruction_size); + MacroAssembler masm(&cb); + masm.patchable_li52(as_Register(int_at(0) & 0x1f), x); + ICache::invalidate_range(addr_at(0), instruction_size); + + // Find and replace the oop/metadata corresponding to this + // instruction in oops section. + CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); + nmethod* nm = blob->as_nmethod_or_null(); + if (nm != NULL) { + o = o ? o : x; + RelocIterator iter(nm, instruction_address(), next_instruction_address()); + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop* oop_addr = iter.oop_reloc()->oop_addr(); + *oop_addr = cast_to_oop(o); + break; + } else if (iter.type() == relocInfo::metadata_type) { + Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); + *metadata_addr = (Metadata*)o; + break; + } + } + } +} + +//------------------------------------------------------------------- + +int NativeMovRegMem::offset() const{ + //TODO: LA + guarantee(0, "LA not implemented yet"); + return 0; // mute compiler +} + +void NativeMovRegMem::set_offset(int x) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +void NativeMovRegMem::verify() { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + + +void NativeMovRegMem::print() { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +bool NativeInstruction::is_sigill_zombie_not_entrant() { + return uint_at(0) == NativeIllegalInstruction::instruction_code; +} + +void NativeIllegalInstruction::insert(address code_pos) { + *(juint*)code_pos = instruction_code; + ICache::invalidate_range(code_pos, instruction_size); +} + +void NativeJump::verify() { + assert(is_short() || is_far(), "not a general jump instruction"); +} + +bool NativeJump::is_short() { + return Assembler::high(insn_word(), 6) == Assembler::b_op; +} + +bool NativeJump::is_far() { + return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op && + Assembler::high(int_at(4), 6) == Assembler::jirl_op && + Assembler::low(int_at(4), 5) == R0->encoding(); +} + +address NativeJump::jump_destination(address orig_addr) { + address addr = orig_addr ? orig_addr : addr_at(0); + + // short + if (is_short()) { + return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) | + ((int_at(0) >> 10) & 0xffff)) << 2); + } + + // far + if (is_far()) { + return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) + + (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2); + } + + fatal("not a jump"); + return NULL; +} + +void NativeJump::set_jump_destination(address dest) { + OrderAccess::fence(); + + CodeBuffer cb(addr_at(0), instruction_size); + MacroAssembler masm(&cb); + masm.patchable_jump(dest); + ICache::invalidate_range(addr_at(0), instruction_size); +} + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +// MT-safe patching of a long jump instruction. +// First patches first word of instruction to two jmp's that jmps to them +// selfs (spinlock). Then patches the last byte, and then atomicly replaces +// the jmp's with the first 4 byte of the new instruction. +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + //TODO: LA + guarantee(0, "LA not implemented yet"); +} + +// Must ensure atomicity +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); + jlong offs = dest - verified_entry; + + if (MacroAssembler::reachable_from_branch_short(offs)) { + CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); + MacroAssembler masm(&cb); + masm.b(dest); + } else { + // We use an illegal instruction for marking a method as + // not_entrant or zombie + NativeIllegalInstruction::insert(verified_entry); + } + ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); +} + +bool NativeInstruction::is_dtrace_trap() { + //return (*(int32_t*)this & 0xff) == 0xcc; + Unimplemented(); + return false; +} + +bool NativeInstruction::is_safepoint_poll() { + // + // 390 li T2, 0x0000000000400000 #@loadConP + // 394 st_w [SP + #12], V1 # spill 9 + // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 + // + // 0x000000ffe5815130: lu12i_w t2, 0x400 + // 0x000000ffe5815134: st_w v1, 0xc(sp) ; OopMap{a6=Oop off=920} + // ;*goto + // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) + // + // 0x000000ffe5815138: ld_w at, 0x0(t2) ;*goto <--- PC + // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) + // + + // Since there may be some spill instructions between the safePoint_poll and loadConP, + // we check the safepoint instruction like this. + return Assembler::high(insn_word(), 10) == Assembler::ld_w_op && + Assembler::low(insn_word(), 5) == AT->encoding(); +} diff --git a/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp new file mode 100644 index 00000000000..493239923b5 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp @@ -0,0 +1,513 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP + +#include "asm/assembler.hpp" +#include "memory/allocation.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" +#include "utilities/top.hpp" + +// We have interfaces for the following instructions: +// - NativeInstruction +// - - NativeCall +// - - NativeMovConstReg +// - - NativeMovConstRegPatching +// - - NativeMovRegMem +// - - NativeMovRegMemPatching +// - - NativeIllegalOpCode +// - - NativeGeneralJump +// - - NativePushConst +// - - NativeTstRegMem + +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + +class NativeInstruction VALUE_OBJ_CLASS_SPEC { + friend class Relocation; + + public: + enum loongarch_specific_constants { + nop_instruction_code = 0, + nop_instruction_size = 4, + sync_instruction_code = 0xf + }; + + bool is_nop() { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; } + bool is_sync() { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; } + bool is_dtrace_trap(); + inline bool is_call(); + inline bool is_far_call(); + inline bool is_illegal(); + bool is_jump(); + bool is_safepoint_poll(); + + // LoongArch has no instruction to generate a illegal instrucion exception? + // But `break 11` is not illegal instruction for LoongArch. + static int illegal_instruction(); + + bool is_int_branch(); + bool is_float_branch(); + + inline bool is_NativeCallTrampolineStub_at(); + //We use an illegal instruction for marking a method as not_entrant or zombie. + bool is_sigill_zombie_not_entrant(); + + protected: + address addr_at(int offset) const { return address(this) + offset; } + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(BytesPerInstWord); } + address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } + + s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } + u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } + + jint int_at(int offset) const { return *(jint*) addr_at(offset); } + juint uint_at(int offset) const { return *(juint*) addr_at(offset); } + + intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } + + oop oop_at (int offset) const { return *(oop*) addr_at(offset); } + int long_at(int offset) const { return *(jint*)addr_at(offset); } + + + void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } + void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } + void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } + void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } + void set_long_at(int offset, long i); + + int insn_word() const { return long_at(0); } + + void wrote(int offset); + + public: + + // unit test stuff + static void test() {} // override for testing + + inline friend NativeInstruction* nativeInstruction_at(address address); +}; + +inline NativeInstruction* nativeInstruction_at(address address) { + NativeInstruction* inst = (NativeInstruction*)address; +#ifdef ASSERT + //inst->verify(); +#endif + return inst; +} + +inline NativeCall* nativeCall_at(address address); + +// The NativeCall is an abstraction for accessing/manipulating native call +// instructions (used to manipulate inline caches, primitive & dll calls, etc.). +class NativeCall: public NativeInstruction { + public: + enum loongarch_specific_constants { + instruction_offset = 0, + instruction_size = 1 * BytesPerInstWord, + return_address_offset = 1 * BytesPerInstWord, + displacement_offset = 0 + }; + + // We have only bl. + bool is_bl() const; + + address instruction_address() const { return addr_at(instruction_offset); } + + address next_instruction_address() const { + return addr_at(return_address_offset); + } + + address return_address() const { + return next_instruction_address(); + } + + address target_addr_for_bl(address orig_addr = 0) const; + address destination() const; + void set_destination(address dest); + + void verify_alignment() {} + void verify(); + void print(); + + // Creation + inline friend NativeCall* nativeCall_at(address address); + inline friend NativeCall* nativeCall_before(address return_address); + + static bool is_call_at(address instr) { + return nativeInstruction_at(instr)->is_call(); + } + + static bool is_call_before(address return_address) { + return is_call_at(return_address - return_address_offset); + } + + // MT-safe patching of a call instruction. + static void insert(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); + + // Similar to replace_mt_safe, but just changes the destination. The + // important thing is that free-running threads are able to execute + // this call instruction at all times. If the call is an immediate bl + // instruction we can simply rely on atomicity of 32-bit writes to + // make sure other threads will see no intermediate states. + + // We cannot rely on locks here, since the free-running threads must run at + // full speed. + // + // Used in the runtime linkage of calls; see class CompiledIC. + + // The parameter assert_lock disables the assertion during code generation. + void set_destination_mt_safe(address dest, bool assert_lock = true); + + address get_trampoline(); + +}; + +inline NativeCall* nativeCall_at(address address) { + NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +inline NativeCall* nativeCall_before(address return_address) { + NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +// The NativeFarCall is an abstraction for accessing/manipulating native +// call-anywhere instructions. +// Used to call native methods which may be loaded anywhere in the address +// space, possibly out of reach of a call instruction. +class NativeFarCall: public NativeInstruction { + public: + enum loongarch_specific_constants { + instruction_size = 2 * BytesPerInstWord, + }; + + // We use MacroAssembler::patchable_call() for implementing a + // call-anywhere instruction. + bool is_short() const; + bool is_far() const; + + // Checks whether instr points at a NativeFarCall instruction. + static bool is_far_call_at(address address) { + return nativeInstruction_at(address)->is_far_call(); + } + + // Returns the NativeFarCall's destination. + address destination(address orig_addr = 0) const; + + // Sets the NativeFarCall's destination, not necessarily mt-safe. + // Used when relocating code. + void set_destination(address dest); + + void verify(); +}; + +// Instantiates a NativeFarCall object starting at the given instruction +// address and returns the NativeFarCall object. +inline NativeFarCall* nativeFarCall_at(address address) { + NativeFarCall* call = (NativeFarCall*)address; +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +// An interface for accessing/manipulating native set_oop imm, reg instructions +// (used to manipulate inlined data references, etc.). +class NativeMovConstReg: public NativeInstruction { + public: + enum loongarch_specific_constants { + instruction_offset = 0, + instruction_size = 3 * BytesPerInstWord, + next_instruction_offset = 3 * BytesPerInstWord, + }; + + int insn_word() const { return long_at(instruction_offset); } + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(next_instruction_offset); } + intptr_t data() const; + void set_data(intptr_t x, intptr_t o = 0); + + bool is_li52() const { + return is_lu12iw_ori_lu32id() || + is_lu12iw_lu32id_nop() || + is_lu12iw_2nop() || + is_lu12iw_ori_nop() || + is_addid_2nop(); + } + bool is_lu12iw_ori_lu32id() const; + bool is_lu12iw_lu32id_nop() const; + bool is_lu12iw_2nop() const; + bool is_lu12iw_ori_nop() const; + bool is_addid_2nop() const; + void verify(); + void print(); + + // unit test stuff + static void test() {} + + // Creation + inline friend NativeMovConstReg* nativeMovConstReg_at(address address); + inline friend NativeMovConstReg* nativeMovConstReg_before(address address); +}; + +inline NativeMovConstReg* nativeMovConstReg_at(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +inline NativeMovConstReg* nativeMovConstReg_before(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovConstRegPatching: public NativeMovConstReg { + private: + friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { + NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + +class NativeMovRegMem: public NativeInstruction { + public: + enum loongarch_specific_constants { + instruction_offset = 0, + instruction_size = 4, + hiword_offset = 4, + ldst_offset = 12, + immediate_size = 4, + ldst_size = 16 + }; + + address instruction_address() const { return addr_at(instruction_offset); } + + int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; } + + int offset() const; + + void set_offset(int x); + + void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } + + void verify(); + void print (); + + // unit test stuff + static void test() {} + + private: + inline friend NativeMovRegMem* nativeMovRegMem_at (address address); +}; + +inline NativeMovRegMem* nativeMovRegMem_at (address address) { + NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovRegMemPatching: public NativeMovRegMem { + private: + friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { + NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + + +// Handles all kinds of jump on Loongson. +// short: +// b offs26 +// nop +// +// far: +// pcaddu18i reg, si20 +// jirl r0, reg, si18 +// +class NativeJump: public NativeInstruction { + public: + enum loongarch_specific_constants { + instruction_offset = 0, + instruction_size = 2 * BytesPerInstWord + }; + + bool is_short(); + bool is_far(); + + address instruction_address() const { return addr_at(instruction_offset); } + address jump_destination(address orig_addr = 0); + void set_jump_destination(address dest); + + // Creation + inline friend NativeJump* nativeJump_at(address address); + + // Insertion of native jump instruction + static void insert(address code_pos, address entry) { Unimplemented(); } + // MT-safe insertion of native jump at verified method entry + static void check_verified_entry_alignment(address entry, address verified_entry){} + static void patch_verified_entry(address entry, address verified_entry, address dest); + + void verify(); +}; + +inline NativeJump* nativeJump_at(address address) { + NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); + debug_only(jump->verify();) + return jump; +} + +class NativeGeneralJump: public NativeJump { + public: + // Creation + inline friend NativeGeneralJump* nativeGeneralJump_at(address address); + + // Insertion of native general jump instruction + static void insert_unconditional(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); +}; + +inline NativeGeneralJump* nativeGeneralJump_at(address address) { + NativeGeneralJump* jump = (NativeGeneralJump*)(address); + debug_only(jump->verify();) + return jump; +} + +class NativeIllegalInstruction: public NativeInstruction { +public: + enum loongarch_specific_constants { + instruction_code = 0xbadc0de0, // TODO: LA + // Temporary LoongArch reserved instruction + instruction_size = 4, + instruction_offset = 0, + next_instruction_offset = 4 + }; + + // Insert illegal opcode as specific address + static void insert(address code_pos); +}; + +inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } + +inline bool NativeInstruction::is_call() { + NativeCall *call = (NativeCall*)instruction_address(); + return call->is_bl(); +} + +inline bool NativeInstruction::is_far_call() { + NativeFarCall *call = (NativeFarCall*)instruction_address(); + + // short + if (call->is_short()) { + return true; + } + + // far + if (call->is_far()) { + return true; + } + + return false; +} + +inline bool NativeInstruction::is_jump() +{ + NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address(); + + // short + if (jump->is_short()) { + return true; + } + + // far + if (jump->is_far()) { + return true; + } + + return false; +} + +// Call trampoline stubs. +class NativeCallTrampolineStub : public NativeInstruction { + public: + + enum la_specific_constants { + instruction_size = 6 * 4, + instruction_offset = 0, + data_offset = 4 * 4, + next_instruction_offset = 6 * 4 + }; + + address destination() const { + return (address)ptr_at(data_offset); + } + + void set_destination(address new_destination) { + set_ptr_at(data_offset, (intptr_t)new_destination); + OrderAccess::fence(); + } +}; + +// Note: Other stubs must not begin with this pattern. +inline bool NativeInstruction::is_NativeCallTrampolineStub_at() { + // pcaddi + // ld_d + // jirl + return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op && + Assembler::high(int_at(4), 10) == Assembler::ld_d_op && + Assembler::high(int_at(8), 6) == Assembler::jirl_op && + Assembler::low(int_at(8), 5) == R0->encoding(); +} + +inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { + NativeInstruction* ni = nativeInstruction_at(addr); + assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found"); + return (NativeCallTrampolineStub*)addr; +} +#endif // CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp new file mode 100644 index 00000000000..5ff7555d2f0 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP + +// machine-dependent implemention for register maps + friend class frame; + + private: + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. + // Since there is none, we just return NULL. + // See registerMap_sparc.hpp for an example of grabbing registers + // from register save areas of a standard layout. + address pd_location(VMReg reg) const {return NULL;} + + // no PD state to clear or copy: + void pd_clear() {} + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp new file mode 100644 index 00000000000..c6424c321f1 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/register.hpp" +#include "register_loongarch.hpp" +#ifdef TARGET_ARCH_MODEL_loongarch_32 +# include "interp_masm_loongarch_32.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_loongarch_64 +# include "interp_masm_loongarch_64.hpp" +#endif + +REGISTER_DEFINITION(Register, noreg); +REGISTER_DEFINITION(Register, r0); +REGISTER_DEFINITION(Register, r1); +REGISTER_DEFINITION(Register, r2); +REGISTER_DEFINITION(Register, r3); +REGISTER_DEFINITION(Register, r4); +REGISTER_DEFINITION(Register, r5); +REGISTER_DEFINITION(Register, r6); +REGISTER_DEFINITION(Register, r7); +REGISTER_DEFINITION(Register, r8); +REGISTER_DEFINITION(Register, r9); +REGISTER_DEFINITION(Register, r10); +REGISTER_DEFINITION(Register, r11); +REGISTER_DEFINITION(Register, r12); +REGISTER_DEFINITION(Register, r13); +REGISTER_DEFINITION(Register, r14); +REGISTER_DEFINITION(Register, r15); +REGISTER_DEFINITION(Register, r16); +REGISTER_DEFINITION(Register, r17); +REGISTER_DEFINITION(Register, r18); +REGISTER_DEFINITION(Register, r19); +REGISTER_DEFINITION(Register, r20); +REGISTER_DEFINITION(Register, r21); +REGISTER_DEFINITION(Register, r22); +REGISTER_DEFINITION(Register, r23); +REGISTER_DEFINITION(Register, r24); +REGISTER_DEFINITION(Register, r25); +REGISTER_DEFINITION(Register, r26); +REGISTER_DEFINITION(Register, r27); +REGISTER_DEFINITION(Register, r28); +REGISTER_DEFINITION(Register, r29); +REGISTER_DEFINITION(Register, r30); +REGISTER_DEFINITION(Register, r31); + +REGISTER_DEFINITION(FloatRegister, fnoreg); +REGISTER_DEFINITION(FloatRegister, f0); +REGISTER_DEFINITION(FloatRegister, f1); +REGISTER_DEFINITION(FloatRegister, f2); +REGISTER_DEFINITION(FloatRegister, f3); +REGISTER_DEFINITION(FloatRegister, f4); +REGISTER_DEFINITION(FloatRegister, f5); +REGISTER_DEFINITION(FloatRegister, f6); +REGISTER_DEFINITION(FloatRegister, f7); +REGISTER_DEFINITION(FloatRegister, f8); +REGISTER_DEFINITION(FloatRegister, f9); +REGISTER_DEFINITION(FloatRegister, f10); +REGISTER_DEFINITION(FloatRegister, f11); +REGISTER_DEFINITION(FloatRegister, f12); +REGISTER_DEFINITION(FloatRegister, f13); +REGISTER_DEFINITION(FloatRegister, f14); +REGISTER_DEFINITION(FloatRegister, f15); +REGISTER_DEFINITION(FloatRegister, f16); +REGISTER_DEFINITION(FloatRegister, f17); +REGISTER_DEFINITION(FloatRegister, f18); +REGISTER_DEFINITION(FloatRegister, f19); +REGISTER_DEFINITION(FloatRegister, f20); +REGISTER_DEFINITION(FloatRegister, f21); +REGISTER_DEFINITION(FloatRegister, f22); +REGISTER_DEFINITION(FloatRegister, f23); +REGISTER_DEFINITION(FloatRegister, f24); +REGISTER_DEFINITION(FloatRegister, f25); +REGISTER_DEFINITION(FloatRegister, f26); +REGISTER_DEFINITION(FloatRegister, f27); +REGISTER_DEFINITION(FloatRegister, f28); +REGISTER_DEFINITION(FloatRegister, f29); +REGISTER_DEFINITION(FloatRegister, f30); +REGISTER_DEFINITION(FloatRegister, f31); diff --git a/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp new file mode 100644 index 00000000000..3104cd1cc5d --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "register_loongarch.hpp" + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; +const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + + 2 * FloatRegisterImpl::number_of_registers; + + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3", + "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", + "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0", + "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", + }; + return is_valid() ? names[encoding()] : "fnoreg"; +} + +const char* ConditionalFlagRegisterImpl::name() const { + const char* names[number_of_registers] = { + "fcc0", "fcc1", "fcc2", "fcc3", "fcc4", "fcc5", "fcc6", "fcc7", + }; + return is_valid() ? names[encoding()] : "fccnoreg"; +} diff --git a/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp new file mode 100644 index 00000000000..37b39f9129f --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp @@ -0,0 +1,436 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP + +#include "asm/register.hpp" +#include "vm_version_loongarch.hpp" + +class VMRegImpl; +typedef VMRegImpl* VMReg; + +// Use Register as shortcut +class RegisterImpl; +typedef RegisterImpl* Register; + + +// The implementation of integer registers for the LoongArch architecture +inline Register as_Register(int encoding) { + return (Register)(intptr_t) encoding; +} + +class RegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32, + max_slots_per_register = 2 + }; + + // derived registers, offsets, and addresses + Register successor() const { return as_Register(encoding() + 1); } + + // construction + inline friend Register as_Register(int encoding); + + VMReg as_VMReg(); + + // accessors + int encoding() const { assert(is_valid(),err_msg( "invalid register (%d)", (int)(intptr_t)this)); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; +}; + + +// The integer registers of the LoongArch architecture +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + + +CONSTANT_REGISTER_DECLARATION(Register, r0, (0)); +CONSTANT_REGISTER_DECLARATION(Register, r1, (1)); +CONSTANT_REGISTER_DECLARATION(Register, r2, (2)); +CONSTANT_REGISTER_DECLARATION(Register, r3, (3)); +CONSTANT_REGISTER_DECLARATION(Register, r4, (4)); +CONSTANT_REGISTER_DECLARATION(Register, r5, (5)); +CONSTANT_REGISTER_DECLARATION(Register, r6, (6)); +CONSTANT_REGISTER_DECLARATION(Register, r7, (7)); +CONSTANT_REGISTER_DECLARATION(Register, r8, (8)); +CONSTANT_REGISTER_DECLARATION(Register, r9, (9)); +CONSTANT_REGISTER_DECLARATION(Register, r10, (10)); +CONSTANT_REGISTER_DECLARATION(Register, r11, (11)); +CONSTANT_REGISTER_DECLARATION(Register, r12, (12)); +CONSTANT_REGISTER_DECLARATION(Register, r13, (13)); +CONSTANT_REGISTER_DECLARATION(Register, r14, (14)); +CONSTANT_REGISTER_DECLARATION(Register, r15, (15)); +CONSTANT_REGISTER_DECLARATION(Register, r16, (16)); +CONSTANT_REGISTER_DECLARATION(Register, r17, (17)); +CONSTANT_REGISTER_DECLARATION(Register, r18, (18)); +CONSTANT_REGISTER_DECLARATION(Register, r19, (19)); +CONSTANT_REGISTER_DECLARATION(Register, r20, (20)); +CONSTANT_REGISTER_DECLARATION(Register, r21, (21)); +CONSTANT_REGISTER_DECLARATION(Register, r22, (22)); +CONSTANT_REGISTER_DECLARATION(Register, r23, (23)); +CONSTANT_REGISTER_DECLARATION(Register, r24, (24)); +CONSTANT_REGISTER_DECLARATION(Register, r25, (25)); +CONSTANT_REGISTER_DECLARATION(Register, r26, (26)); +CONSTANT_REGISTER_DECLARATION(Register, r27, (27)); +CONSTANT_REGISTER_DECLARATION(Register, r28, (28)); +CONSTANT_REGISTER_DECLARATION(Register, r29, (29)); +CONSTANT_REGISTER_DECLARATION(Register, r30, (30)); +CONSTANT_REGISTER_DECLARATION(Register, r31, (31)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define NOREG ((Register)(noreg_RegisterEnumValue)) + +#define R0 ((Register)(r0_RegisterEnumValue)) +#define R1 ((Register)(r1_RegisterEnumValue)) +#define R2 ((Register)(r2_RegisterEnumValue)) +#define R3 ((Register)(r3_RegisterEnumValue)) +#define R4 ((Register)(r4_RegisterEnumValue)) +#define R5 ((Register)(r5_RegisterEnumValue)) +#define R6 ((Register)(r6_RegisterEnumValue)) +#define R7 ((Register)(r7_RegisterEnumValue)) +#define R8 ((Register)(r8_RegisterEnumValue)) +#define R9 ((Register)(r9_RegisterEnumValue)) +#define R10 ((Register)(r10_RegisterEnumValue)) +#define R11 ((Register)(r11_RegisterEnumValue)) +#define R12 ((Register)(r12_RegisterEnumValue)) +#define R13 ((Register)(r13_RegisterEnumValue)) +#define R14 ((Register)(r14_RegisterEnumValue)) +#define R15 ((Register)(r15_RegisterEnumValue)) +#define R16 ((Register)(r16_RegisterEnumValue)) +#define R17 ((Register)(r17_RegisterEnumValue)) +#define R18 ((Register)(r18_RegisterEnumValue)) +#define R19 ((Register)(r19_RegisterEnumValue)) +#define R20 ((Register)(r20_RegisterEnumValue)) +#define R21 ((Register)(r21_RegisterEnumValue)) +#define R22 ((Register)(r22_RegisterEnumValue)) +#define R23 ((Register)(r23_RegisterEnumValue)) +#define R24 ((Register)(r24_RegisterEnumValue)) +#define R25 ((Register)(r25_RegisterEnumValue)) +#define R26 ((Register)(r26_RegisterEnumValue)) +#define R27 ((Register)(r27_RegisterEnumValue)) +#define R28 ((Register)(r28_RegisterEnumValue)) +#define R29 ((Register)(r29_RegisterEnumValue)) +#define R30 ((Register)(r30_RegisterEnumValue)) +#define R31 ((Register)(r31_RegisterEnumValue)) + + +#define RA R1 +#define TP R2 +#define SP R3 +#define RA0 R4 +#define RA1 R5 +#define RA2 R6 +#define RA3 R7 +#define RA4 R8 +#define RA5 R9 +#define RA6 R10 +#define RA7 R11 +#define RT0 R12 +#define RT1 R13 +#define RT2 R14 +#define RT3 R15 +#define RT4 R16 +#define RT5 R17 +#define RT6 R18 +#define RT7 R19 +#define RT8 R20 +#define RX R21 +#define FP R22 +#define S0 R23 +#define S1 R24 +#define S2 R25 +#define S3 R26 +#define S4 R27 +#define S5 R28 +#define S6 R29 +#define S7 R30 +#define S8 R31 + +#define c_rarg0 RT0 +#define c_rarg1 RT1 +#define Rmethod S3 +#define Rsender S4 +#define Rnext S1 + +#define V0 RA0 +#define V1 RA1 + +#define SCR1 RT7 +#define SCR2 RT4 + +//for interpreter frame +// bytecode pointer register +#define BCP S0 +// local variable pointer register +#define LVP S7 +// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM +// be sure to save and restore its value in call_stub +#define TSR S2 + +//OPT_SAFEPOINT not supported yet +#define OPT_SAFEPOINT 1 + +#define OPT_THREAD 1 + +#define TREG S6 + +#define S5_heapbase S5 + +#define FSR V0 +#define SSR T6 +#define FSF FV0 + +#define RECEIVER T0 +#define IC_Klass T1 + +#define SHIFT_count T3 + +// ---------- Scratch Register ---------- +#define AT RT7 +#define fscratch F23 + +#endif // DONT_USE_REGISTER_DEFINES + +// Use FloatRegister as shortcut +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + +inline FloatRegister as_FloatRegister(int encoding) { + return (FloatRegister)(intptr_t) encoding; +} + +// The implementation of floating point registers for the LoongArch architecture +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32, + save_slots_per_register = 2, + slots_per_lsx_register = 4, + slots_per_lasx_register = 8, + max_slots_per_register = 8 + }; + + // construction + inline friend FloatRegister as_FloatRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + +}; + +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) +#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) +#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) +#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) +#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) +#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) +#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) +#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) +#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) +#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) +#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) +#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) +#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) +#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) +#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) +#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) +#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) +#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) +#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) +#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) +#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) +#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) +#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) +#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) +#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) +#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) +#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) +#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) +#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) +#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) +#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) +#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) +#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) + +#define FA0 F0 +#define FA1 F1 +#define FA2 F2 +#define FA3 F3 +#define FA4 F4 +#define FA5 F5 +#define FA6 F6 +#define FA7 F7 + +#define FV0 F0 +#define FV1 F1 + +#define FT0 F8 +#define FT1 F9 +#define FT2 F10 +#define FT3 F11 +#define FT4 F12 +#define FT5 F13 +#define FT6 F14 +#define FT7 F15 +#define FT8 F16 +#define FT9 F17 +#define FT10 F18 +#define FT11 F19 +#define FT12 F20 +#define FT13 F21 +#define FT14 F22 +#define FT15 F23 + +#define FS0 F24 +#define FS1 F25 +#define FS2 F26 +#define FS3 F27 +#define FS4 F28 +#define FS5 F29 +#define FS6 F30 +#define FS7 F31 + +#endif // DONT_USE_REGISTER_DEFINES + +// Use ConditionalFlagRegister as shortcut +class ConditionalFlagRegisterImpl; +typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister; + +inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) { + return (ConditionalFlagRegister)(intptr_t) encoding; +} + +// The implementation of floating point registers for the LoongArch architecture +class ConditionalFlagRegisterImpl: public AbstractRegisterImpl { + public: + enum { +// conditionalflag_arg_base = 12, + number_of_registers = 8 + }; + + // construction + inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + ConditionalFlagRegister successor() const { return as_ConditionalFlagRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + +}; + +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7 , ( 7)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue)) +#define FCC0 ((ConditionalFlagRegister)( fcc0_ConditionalFlagRegisterEnumValue)) +#define FCC1 ((ConditionalFlagRegister)( fcc1_ConditionalFlagRegisterEnumValue)) +#define FCC2 ((ConditionalFlagRegister)( fcc2_ConditionalFlagRegisterEnumValue)) +#define FCC3 ((ConditionalFlagRegister)( fcc3_ConditionalFlagRegisterEnumValue)) +#define FCC4 ((ConditionalFlagRegister)( fcc4_ConditionalFlagRegisterEnumValue)) +#define FCC5 ((ConditionalFlagRegister)( fcc5_ConditionalFlagRegisterEnumValue)) +#define FCC6 ((ConditionalFlagRegister)( fcc6_ConditionalFlagRegisterEnumValue)) +#define FCC7 ((ConditionalFlagRegister)( fcc7_ConditionalFlagRegisterEnumValue)) + +#endif // DONT_USE_REGISTER_DEFINES + +// Need to know the total number of registers of all sorts for SharedInfo. +// Define a class that exports it. +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // A big enough number for C2: all the registers plus flags + // This number must be large enough to cover REG_COUNT (defined by c2) registers. + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + }; + + static const int max_gpr; + static const int max_fpr; + + +}; + +#endif //CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp new file mode 100644 index 00000000000..bf4498dc62c --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/relocInfo.hpp" +#include "compiler/disassembler.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/safepoint.hpp" + + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + x += o; + typedef Assembler::WhichOperand WhichOperand; + WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop + assert(which == Assembler::disp32_operand || + which == Assembler::narrow_oop_operand || + which == Assembler::imm_operand, "format unpacks ok"); + if (which == Assembler::imm_operand) { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); + } + } else if (which == Assembler::narrow_oop_operand) { + // both compressed oops and compressed classes look the same + if (Universe::heap()->is_in_reserved((oop)x)) { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)oopDesc::encode_heap_oop((oop)x), "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(oopDesc::encode_heap_oop((oop)x)), (intptr_t)(x)); + } + } else { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); + } + } + } else { + // Note: Use runtime_call_type relocations for call32_operand. + assert(0, "call32_operand not supported in LoongArch64"); + } +} + + +address Relocation::pd_call_destination(address orig_addr) { + NativeInstruction* ni = nativeInstruction_at(addr()); + if (ni->is_far_call()) { + return nativeFarCall_at(addr())->destination(orig_addr); + } else if (ni->is_call()) { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline) { + return nativeCallTrampolineStub_at(trampoline)->destination(); + } else { + address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr); + // If call is branch to self, don't try to relocate it, just leave it + // as branch to self. This happens during code generation if the code + // buffer expands. It will be relocated to the trampoline above once + // code generation is complete. + return (new_addr == orig_addr) ? addr() : new_addr; + } + } else if (ni->is_jump()) { + return nativeGeneralJump_at(addr())->jump_destination(orig_addr); + } else { + tty->print_cr("\nError!\ncall destination: 0x%lx", p2i(addr())); + Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty); + ShouldNotReachHere(); + return NULL; + } +} + +void Relocation::pd_set_call_destination(address x) { + NativeInstruction* ni = nativeInstruction_at(addr()); + if (ni->is_far_call()) { + nativeFarCall_at(addr())->set_destination(x); + } else if (ni->is_call()) { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline) { + nativeCall_at(addr())->set_destination_mt_safe(x, false); + } else { + nativeCall_at(addr())->set_destination(x); + } + } else if (ni->is_jump()) { + nativeGeneralJump_at(addr())->set_jump_destination(x); + } else { + ShouldNotReachHere(); + } +} + +address* Relocation::pd_address_in_code() { + return (address*)addr(); +} + +address Relocation::pd_get_address_from_code() { + NativeMovConstReg* ni = nativeMovConstReg_at(addr()); + return (address)ni->data(); +} + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +void metadata_Relocation::pd_fix_value(address x) { +} diff --git a/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp new file mode 100644 index 00000000000..211242f3fb1 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP + + // machine-dependent parts of class relocInfo + private: + enum { + // Since LoongArch instructions are whole words, + // the two low-order offset bits can always be discarded. + offset_unit = 4, + + // imm_oop_operand vs. narrow_oop_operand + format_width = 2 + }; + +#endif // CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp new file mode 100644 index 00000000000..e6ee65f3672 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#ifdef COMPILER2 +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "classfile/systemDictionary.hpp" +#include "code/vmreg.hpp" +#include "interpreter/interpreter.hpp" +#include "opto/runtime.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/globalDefinitions.hpp" +#include "vmreg_loongarch.inline.hpp" +#endif + +#define __ masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +//-------------- generate_exception_blob ----------- +// creates _exception_blob. +// The exception blob is jumped to from a compiled method. +// (see emit_exception_handler in sparc.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers) unwind the frame and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a jump, and left with a jump. +// +// Arguments: +// V0: exception oop +// V1: exception pc +// +// Results: +// A0: exception oop +// A1: exception pc in caller or ??? +// jumps to: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) +// +// [stubGenerator_loongarch_64.cpp] generate_forward_exception() +// |- V0, V1 are created +// |- T4 <= SharedRuntime::exception_handler_for_return_address +// `- jr T4 +// `- the caller's exception_handler +// `- jr OptoRuntime::exception_blob +// `- here +// +void OptoRuntime::generate_exception_blob() { + // Capture info about frame layout + enum layout { + fp_off, + return_off, // slot for return address + framesize + }; + + // allocate space for the code + ResourceMark rm; + // setup code generation tools + CodeBuffer buffer("exception_blob", 5120, 5120); + MacroAssembler* masm = new MacroAssembler(&buffer); + + address start = __ pc(); + + __ addi_d(SP, SP, -1 * framesize * wordSize); // Prolog! + + // this frame will be treated as the original caller method. + // So, the return pc should be filled with the original exception pc. + // ref: X86's implementation + __ st_d(V1, SP, return_off * wordSize); // return address + __ st_d(FP, SP, fp_off * wordSize); + + // Save callee saved registers. None for UseSSE=0, + // floats-only for UseSSE=1, and doubles for UseSSE=2. + + __ addi_d(FP, SP, fp_off * wordSize); + + // Store exception in Thread object. We cannot pass any arguments to the + // handle_exception call, since we do not want to make any assumption + // about the size of the frame where the exception happened in. + Register thread = TREG; + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + __ st_d(V0, Address(thread, JavaThread::exception_oop_offset())); + __ st_d(V1, Address(thread, JavaThread::exception_pc_offset())); + + // This call does all the hard work. It checks if an exception handler + // exists in the method. + // If so, it returns the handler address. + // If not, it prepares for stack-unwinding, restoring the callee-save + // registers of the frame being removed. + Label L; + address the_pc = __ pc(); + __ bind(L); + __ set_last_Java_frame(thread, NOREG, NOREG, L); + + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + __ move(A0, thread); + // TODO: confirm reloc + __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type); + + // Set an oopmap for the call site + OopMapSet *oop_maps = new OopMapSet(); + + oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(thread, true); + + // Pop self-frame. + __ leave(); // Epilog! + + // V0: exception handler + + // We have a handler in V0, (could be deopt blob) + __ move(T4, V0); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // Get the exception + __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset())); + // Get the exception pc in case we are deoptimized + __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset())); +#ifdef ASSERT + __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset())); + __ st_d(R0, Address(thread, JavaThread::exception_pc_offset())); +#endif + // Clear the exception oop so GC no longer processes it as a root. + __ st_d(R0, Address(thread, JavaThread::exception_oop_offset())); + + // Fix seg fault when running: + // Eclipse + Plugin + Debug As + // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() + // + __ move(V0, A0); + __ move(V1, A1); + + // V0: exception oop + // T4: exception handler + // A1: exception pc + __ jr(T4); + + // make sure all code is generated + masm->flush(); + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); +} diff --git a/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp new file mode 100644 index 00000000000..36786b53bd4 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp @@ -0,0 +1,3453 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/compiledICHolder.hpp" +#include "prims/jvmtiRedefineClassesTrace.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_loongarch.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +#include + +#define __ masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; + +class RegisterSaver { + // Capture info about frame layout + enum layout { + fpr0_off = 0, + fpr1_off, + fpr2_off, + fpr3_off, + fpr4_off, + fpr5_off, + fpr6_off, + fpr7_off, + fpr8_off, + fpr9_off, + fpr10_off, + fpr11_off, + fpr12_off, + fpr13_off, + fpr14_off, + fpr15_off, + fpr16_off, + fpr17_off, + fpr18_off, + fpr19_off, + fpr20_off, + fpr21_off, + fpr22_off, + fpr23_off, + fpr24_off, + fpr25_off, + fpr26_off, + fpr27_off, + fpr28_off, + fpr29_off, + fpr30_off, + fpr31_off, + a0_off, + a1_off, + a2_off, + a3_off, + a4_off, + a5_off, + a6_off, + a7_off, + t0_off, + t1_off, + t2_off, + t3_off, + t4_off, + t5_off, + t6_off, + t7_off, + t8_off, + s0_off, + s1_off, + s2_off, + s3_off, + s4_off, + s5_off, + s6_off, + s7_off, + s8_off, + fp_off, + ra_off, + fpr_size = fpr31_off - fpr0_off + 1, + gpr_size = ra_off - a0_off + 1, + }; + + const bool _save_vectors; + public: + RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {} + + OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words); + void restore_live_registers(MacroAssembler* masm); + + int slots_save() { + int slots = gpr_size * VMRegImpl::slots_per_word; + + if (_save_vectors && UseLASX) + slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size; + else if (_save_vectors && UseLSX) + slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size; + else + slots += FloatRegisterImpl::save_slots_per_register * fpr_size; + + return slots; + } + + int gpr_offset(int off) { + int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; + int slots_per_gpr = VMRegImpl::slots_per_word; + + if (_save_vectors && UseLASX) + slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; + else if (_save_vectors && UseLSX) + slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; + + return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size; + } + + int fpr_offset(int off) { + int slots_per_fpr = FloatRegisterImpl::save_slots_per_register; + + if (_save_vectors && UseLASX) + slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register; + else if (_save_vectors && UseLSX) + slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register; + + return off * slots_per_fpr * VMRegImpl::stack_slot_size; + } + + int ra_offset() { return gpr_offset(ra_off); } + int t5_offset() { return gpr_offset(t5_off); } + int s3_offset() { return gpr_offset(s3_off); } + int v0_offset() { return gpr_offset(a0_off); } + int v1_offset() { return gpr_offset(a1_off); } + + int fpr0_offset() { return fpr_offset(fpr0_off); } + int fpr1_offset() { return fpr_offset(fpr1_off); } + + // During deoptimization only the result register need to be restored + // all the other values have already been extracted. + void restore_result_registers(MacroAssembler* masm); +}; + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) { + + // Always make the frame size 16-byte aligned + int frame_size_in_bytes = round_to(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words + int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size; + // The caller will allocate additional_frame_words + int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size; + // CodeBlob frame size is in words. + int frame_size_in_words = frame_size_in_bytes / wordSize; + + *total_frame_words = frame_size_in_words; + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = new OopMap(frame_size_in_slots, 0); + + // save registers + __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size); + + for (int i = 0; i < fpr_size; i++) { + FloatRegister fpr = as_FloatRegister(i); + int off = fpr_offset(i); + + if (_save_vectors && UseLASX) + __ xvst(fpr, SP, off); + else if (_save_vectors && UseLSX) + __ vst(fpr, SP, off); + else + __ fst_d(fpr, SP, off); + map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg()); + } + + for (int i = a0_off; i <= a7_off; i++) { + Register gpr = as_Register(A0->encoding() + (i - a0_off)); + int off = gpr_offset(i); + + __ st_d(gpr, SP, gpr_offset(i)); + map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); + } + + for (int i = t0_off; i <= t6_off; i++) { + Register gpr = as_Register(T0->encoding() + (i - t0_off)); + int off = gpr_offset(i); + + __ st_d(gpr, SP, gpr_offset(i)); + map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); + } + __ st_d(T8, SP, gpr_offset(t8_off)); + map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg()); + + for (int i = s0_off; i <= s8_off; i++) { + Register gpr = as_Register(S0->encoding() + (i - s0_off)); + int off = gpr_offset(i); + + __ st_d(gpr, SP, gpr_offset(i)); + map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg()); + } + + __ st_d(FP, SP, gpr_offset(fp_off)); + map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg()); + __ st_d(RA, SP, gpr_offset(ra_off)); + map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg()); + + __ addi_d(FP, SP, gpr_offset(fp_off)); + + return map; +} + + +// Pop the current frame and restore all the registers that we +// saved. +void RegisterSaver::restore_live_registers(MacroAssembler* masm) { + for (int i = 0; i < fpr_size; i++) { + FloatRegister fpr = as_FloatRegister(i); + int off = fpr_offset(i); + + if (_save_vectors && UseLASX) + __ xvld(fpr, SP, off); + else if (_save_vectors && UseLSX) + __ vld(fpr, SP, off); + else + __ fld_d(fpr, SP, off); + } + + for (int i = a0_off; i <= a7_off; i++) { + Register gpr = as_Register(A0->encoding() + (i - a0_off)); + int off = gpr_offset(i); + + __ ld_d(gpr, SP, gpr_offset(i)); + } + + for (int i = t0_off; i <= t6_off; i++) { + Register gpr = as_Register(T0->encoding() + (i - t0_off)); + int off = gpr_offset(i); + + __ ld_d(gpr, SP, gpr_offset(i)); + } + __ ld_d(T8, SP, gpr_offset(t8_off)); + + for (int i = s0_off; i <= s8_off; i++) { + Register gpr = as_Register(S0->encoding() + (i - s0_off)); + int off = gpr_offset(i); + + __ ld_d(gpr, SP, gpr_offset(i)); + } + + __ ld_d(FP, SP, gpr_offset(fp_off)); + __ ld_d(RA, SP, gpr_offset(ra_off)); + + __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size); +} + +// Pop the current frame and restore the registers that might be holding +// a result. +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { + // Just restore result register. Only used by deoptimization. By + // now any callee save register that needs to be restore to a c2 + // caller of the deoptee has been extracted into the vframeArray + // and will be stuffed into the c2i adapter we create for later + // restoration so only result registers need to be restored here. + + __ ld_d(V0, SP, gpr_offset(a0_off)); + __ ld_d(V1, SP, gpr_offset(a1_off)); + + __ fld_d(F0, SP, fpr_offset(fpr0_off)); + __ fld_d(F1, SP, fpr_offset(fpr1_off)); + + __ addi_d(SP, SP, gpr_offset(ra_off)); +} + +// Is vector's size (in bytes) bigger than a size saved by default? +// 8 bytes registers are saved by default using fld/fst instructions. +bool SharedRuntime::is_wide_vector(int size) { + return size > 8; +} + +// The java_calling_convention describes stack locations as ideal slots on +// a frame with no abi restrictions. Since we must observe abi restrictions +// (like the placement of the register window) the slots must be biased by +// the following value. + +static int reg2offset_in(VMReg r) { + // Account for saved fp and return address + // This should really be in_preserve_stack_slots + return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); +} + +static int reg2offset_out(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte +// quantities. Values less than SharedInfo::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer +// as framesizes are fixed. +// VMRegImpl::stack0 refers to the first slot 0(sp). +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register +// up to RegisterImpl::number_of_registers) are the 32-bit +// integer registers. + +// Pass first five oop/int args in registers T0, A0 - A3. +// Pass float/double/long args in stack. +// Doubles have precedence, so if you pass a mix of floats and doubles +// the doubles will grab the registers before the floats will. + +// Note: the INPUTS in sig_bt are in units of Java argument words, which are +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit +// units regardless of build. + + +// --------------------------------------------------------------------------- +// The compiled Java calling convention. +// Pass first five oop/int args in registers T0, A0 - A3. +// Pass float/double/long args in stack. +// Doubles have precedence, so if you pass a mix of floats and doubles +// the doubles will grab the registers before the floats will. + +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed, + int is_outgoing) { + + // Create the mapping between argument positions and registers. + static const Register INT_ArgReg[Argument::n_register_parameters + 1] = { + T0, A0, A1, A2, A3, A4, A5, A6, A7 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { + FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 + }; + + uint int_args = 0; + uint fp_args = 0; + uint stk_args = 0; // inc by 2 each time + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_register_parameters + 1) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + if (int_args < Argument::n_register_parameters + 1) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters) { + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters) { + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return round_to(stk_args, 2); +} + +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm) { + Label L; + __ verify_oop(Rmethod); + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); + __ beq(AT, R0, L); + // Schedule the branch target address early. + // Call into the VM to patch the caller, then jump to compiled callee + // T5 isn't live so capture return address while we easily can + __ move(T5, RA); + + __ pushad(); +#ifdef COMPILER2 + // C2 may leave the stack dirty if not in SSE2+ mode + __ empty_FPU_stack(); +#endif + + // VM needs caller's callsite + // VM needs target method + + __ move(A0, Rmethod); + __ move(A1, T5); + // we should preserve the return address + __ verify_oop(Rmethod); + __ move(S0, SP); + __ li(AT, -(StackAlignmentInBytes)); // align the stack + __ andr(SP, SP, AT); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), + relocInfo::runtime_call_type); + + __ move(SP, S0); + __ popad(); + __ bind(L); +} + +static void gen_c2i_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + Label& skip_fixup) { + + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + // However we will run interpreted if we come thru here. The next pass + // thru the call site will run compiled. If we ran compiled here then + // we can (theorectically) do endless i2c->c2i->i2c transitions during + // deopt/uncommon trap cycles. If we always go interpreted here then + // we can have at most one and don't need to play any tricks to keep + // from endlessly growing the stack. + // + // Actually if we detected that we had an i2c->c2i transition here we + // ought to be able to reset the world back to the state of the interpreted + // call and not bother building another interpreter arg area. We don't + // do that at this point. + + patch_callers_callsite(masm); + __ bind(skip_fixup); + +#ifdef COMPILER2 + __ empty_FPU_stack(); +#endif + //this is for native ? + // Since all args are passed on the stack, total_args_passed * interpreter_ + // stack_element_size is the + // space we need. + int extraspace = total_args_passed * Interpreter::stackElementSize; + + // stack is aligned, keep it that way + extraspace = round_to(extraspace, 2*wordSize); + + // Get return address + __ move(T5, RA); + // set senderSP value + //refer to interpreter_loongarch.cpp:generate_asm_entry + __ move(Rsender, SP); + __ addi_d(SP, SP, -extraspace); + + // Now write the args into the outgoing interpreter space + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // st_off points to lowest address on stack. + int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; + // Say 4 args: + // i st_off + // 0 12 T_LONG + // 1 8 T_VOID + // 2 4 T_OBJECT + // 3 0 T_BOOL + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // memory to memory use fpu stack top + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + if (!r_2->is_valid()) { + __ ld_ptr(AT, Address(SP, ld_off)); + __ st_ptr(AT, Address(SP, st_off)); + + } else { + + + int next_off = st_off - Interpreter::stackElementSize; + __ ld_ptr(AT, Address(SP, ld_off)); + __ st_ptr(AT, Address(SP, st_off)); + + // Ref to is_Register condition + if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) + __ st_ptr(AT, SP, st_off - 8); + } + } else if (r_1->is_Register()) { + Register r = r_1->as_Register(); + if (!r_2->is_valid()) { + __ st_d(r, SP, st_off); + } else { + //FIXME, LA will not enter here + // long/double in gpr + __ st_d(r, SP, st_off); + // In [java/util/zip/ZipFile.java] + // + // private static native long open(String name, int mode, long lastModified); + // private static native int getTotal(long jzfile); + // + // We need to transfer T_LONG paramenters from a compiled method to a native method. + // It's a complex process: + // + // Caller -> lir_static_call -> gen_resolve_stub + // -> -- resolve_static_call_C + // `- gen_c2i_adapter() [*] + // | + // `- AdapterHandlerLibrary::get_create_apapter_index + // -> generate_native_entry + // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] + // + // In [**], T_Long parameter is stored in stack as: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | | + // (low) + // + // However, the sequence is reversed here: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | | + // (low) + // + // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). + // + if (sig_bt[i] == T_LONG) + __ st_d(r, SP, st_off - 8); + } + } else if (r_1->is_FloatRegister()) { + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); + + FloatRegister fr = r_1->as_FloatRegister(); + if (sig_bt[i] == T_FLOAT) + __ fst_s(fr, SP, st_off); + else { + __ fst_d(fr, SP, st_off); + __ fst_d(fr, SP, st_off - 8); // T_DOUBLE needs two slots + } + } + } + + // Schedule the branch target address early. + __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); + // And repush original return address + __ move(RA, T5); + __ jr (AT); +} + +static void gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + + // Generate an I2C adapter: adjust the I-frame to make space for the C-frame + // layout. Lesp was saved by the calling I-frame and will be restored on + // return. Meanwhile, outgoing arg space is all owned by the callee + // C-frame, so we can mangle it at will. After adjusting the frame size, + // hoist register arguments and repack other args according to the compiled + // code convention. Finally, end in a jump to the compiled code. The entry + // point address is the start of the buffer. + + // We will only enter here from an interpreted frame and never from after + // passing thru a c2i. Azul allowed this but we do not. If we lose the + // race and use a c2i we will remain interpreted for the race loser(s). + // This removes all sorts of headaches on the LA side and also eliminates + // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. + + __ move(T4, SP); + + // Cut-out for having no stack args. Since up to 2 int/oop args are passed + // in registers, we will occasionally have no stack args. + int comp_words_on_stack = 0; + if (comp_args_on_stack) { + // Sig words on the stack are greater-than VMRegImpl::stack0. Those in + // registers are below. By subtracting stack0, we either get a negative + // number (all values in registers) or the maximum stack slot accessed. + // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); + // Convert 4-byte stack slots to words. + // did LA need round? FIXME + comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; + // Round up to miminum stack alignment, in wordSize + comp_words_on_stack = round_to(comp_words_on_stack, 2); + __ addi_d(SP, SP, -comp_words_on_stack * wordSize); + } + + // Align the outgoing SP + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + // push the return address on the stack (note that pushing, rather + // than storing it, yields the correct frame alignment for the callee) + // Put saved SP in another register + const Register saved_sp = T5; + __ move(saved_sp, T4); + + + // Will jump to the compiled code just as if compiled code was doing it. + // Pre-load the register-jump target early, to schedule it better. + __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset())); + + // Now generate the shuffle code. Pick up all register args and move the + // rest through the floating point stack top. + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + // Longs and doubles are passed in native word order, but misaligned + // in the 32-bit build. + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from SP+offset. + + assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); + // Load in argument order going down. + int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; + // Point to interpreter value (vs. tag) + int next_off = ld_off - Interpreter::stackElementSize; + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // Convert stack slot to an SP offset (+ wordSize to + // account for return address ) + // NOTICE HERE!!!! I sub a wordSize here + int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; + //+ wordSize; + + if (!r_2->is_valid()) { + __ ld_d(AT, saved_sp, ld_off); + __ st_d(AT, SP, st_off); + } else { + // Interpreter local[n] == MSW, local[n+1] == LSW however locals + // are accessed as negative so LSW is at LOW address + + // ld_off is MSW so get LSW + // st_off is LSW (i.e. reg.first()) + + // [./org/eclipse/swt/graphics/GC.java] + // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, + // int destX, int destY, int destWidth, int destHeight, + // boolean simple, + // int imgWidth, int imgHeight, + // long maskPixmap, <-- Pass T_LONG in stack + // int maskType); + // Before this modification, Eclipse displays icons with solid black background. + // + __ ld_d(AT, saved_sp, ld_off); + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) + __ ld_d(AT, saved_sp, ld_off - 8); + __ st_d(AT, SP, st_off); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + if (r_2->is_valid()) { + // Remember r_1 is low address (and LSB on LA) + // So r_2 gets loaded from high address regardless of the platform + assert(r_2->as_Register() == r_1->as_Register(), ""); + __ ld_d(r, saved_sp, ld_off); + + // + // For T_LONG type, the real layout is as below: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | | + // (low) + // + // We should load the low-8 bytes. + // + if (sig_bt[i] == T_LONG) + __ ld_d(r, saved_sp, ld_off - 8); + } else { + __ ld_w(r, saved_sp, ld_off); + } + } else if (r_1->is_FloatRegister()) { // Float Register + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); + + FloatRegister fr = r_1->as_FloatRegister(); + if (sig_bt[i] == T_FLOAT) + __ fld_s(fr, saved_sp, ld_off); + else { + __ fld_d(fr, saved_sp, ld_off); + __ fld_d(fr, saved_sp, ld_off - 8); + } + } + } + + // 6243940 We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find there should this case occur. + __ get_thread(T8); + __ st_d(Rmethod, T8, in_bytes(JavaThread::callee_target_offset())); + + // move methodOop to T5 in case we end up in an c2i adapter. + // the c2i adapters expect methodOop in T5 (c2) because c2's + // resolve stubs return the result (the method) in T5. + // I'd love to fix this. + __ move(T5, Rmethod); + __ jr(T4); +} + +// --------------------------------------------------------------- +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) { + address i2c_entry = __ pc(); + + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + // ------------------------------------------------------------------------- + // Generate a C2I adapter. On entry we know G5 holds the methodOop. The + // args start out packed in the compiled layout. They need to be unpacked + // into the interpreter layout. This will almost always require some stack + // space. We grow the current (compiled) stack, then repack the args. We + // finally end in a jump to the generic interpreter entry point. On exit + // from the interpreter, the interpreter will restore our SP (lest the + // compiled code, which relys solely on SP and not FP, get sick). + + address c2i_unverified_entry = __ pc(); + Label skip_fixup; + { + Register holder = T1; + Register receiver = T0; + Register temp = T8; + address ic_miss = SharedRuntime::get_ic_miss_stub(); + + Label missed; + + __ verify_oop(holder); + //add for compressedoops + __ load_klass(temp, receiver); + __ verify_oop(temp); + + __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); + __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); + __ bne(AT, temp, missed); + // Method might have been compiled since the call site was patched to + // interpreted if that is the case treat it as a miss so we can get + // the call site corrected. + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); + __ beq(AT, R0, skip_fixup); + __ bind(missed); + + __ jmp(ic_miss, relocInfo::runtime_call_type); + } + address c2i_entry = __ pc(); + + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); +} + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "not needed on LA"); + // Return the number of VMReg stack_slots needed for the args. + // This value does not include an abi space (like register window + // save area). + + // We return the amount of VMReg stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. Since we always + // have space for storing at least 6 registers to memory we start with that. + // See int_stk_helper for a further discussion. + // We return the amount of VMRegImpl stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. + static const Register INT_ArgReg[Argument::n_register_parameters] = { + A0, A1, A2, A3, A4, A5, A6, A7 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { + FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7 + }; + uint int_args = 0; + uint fp_args = 0; + uint stk_args = 0; // inc by 2 each time + +// Example: +// n java.lang.UNIXProcess::forkAndExec +// private native int forkAndExec(byte[] prog, +// byte[] argBlock, int argc, +// byte[] envBlock, int envc, +// byte[] dir, +// boolean redirectErrorStream, +// FileDescriptor stdin_fd, +// FileDescriptor stdout_fd, +// FileDescriptor stderr_fd) +// JNIEXPORT jint JNICALL +// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, +// jobject process, +// jbyteArray prog, +// jbyteArray argBlock, jint argc, +// jbyteArray envBlock, jint envc, +// jbyteArray dir, +// jboolean redirectErrorStream, +// jobject stdin_fd, +// jobject stdout_fd, +// jobject stderr_fd) +// +// ::c_calling_convention +// 0: // env <-- a0 +// 1: L // klass/obj <-- t0 => a1 +// 2: [ // prog[] <-- a0 => a2 +// 3: [ // argBlock[] <-- a1 => a3 +// 4: I // argc <-- a2 => a4 +// 5: [ // envBlock[] <-- a3 => a5 +// 6: I // envc <-- a4 => a5 +// 7: [ // dir[] <-- a5 => a7 +// 8: Z // redirectErrorStream <-- a6 => sp[0] +// 9: L // stdin <-- a7 => sp[8] +// 10: L // stdout fp[16] => sp[16] +// 11: L // stderr fp[24] => sp[24] +// + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_VOID: // Halves of longs and doubles + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_register_parameters) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + if (int_args < Argument::n_register_parameters) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters) { + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); + } else if (int_args < Argument::n_register_parameters) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters) { + regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg()); + } else if (int_args < Argument::n_register_parameters) { + regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return round_to(stk_args, 2); +} + +// --------------------------------------------------------------------------- +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ fst_s(FSF, FP, -wordSize); + break; + case T_DOUBLE: + __ fst_d(FSF, FP, -wordSize ); + break; + case T_VOID: break; + case T_LONG: + __ st_d(V0, FP, -wordSize); + break; + case T_OBJECT: + case T_ARRAY: + __ st_d(V0, FP, -wordSize); + break; + default: { + __ st_w(V0, FP, -wordSize); + } + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ fld_s(FSF, FP, -wordSize); + break; + case T_DOUBLE: + __ fld_d(FSF, FP, -wordSize ); + break; + case T_LONG: + __ ld_d(V0, FP, -wordSize); + break; + case T_VOID: break; + case T_OBJECT: + case T_ARRAY: + __ ld_d(V0, FP, -wordSize); + break; + default: { + __ ld_w(V0, FP, -wordSize); + } + } +} + +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + __ push(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ push(args[i].first()->as_FloatRegister()); + } + } +} + +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { + if (args[i].first()->is_Register()) { + __ pop(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ pop(args[i].first()->as_FloatRegister()); + } + } +} + +// A simple move of integer like type +static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ld_w(AT, FP, reg2offset_in(src.first())); + __ st_d(AT, SP, reg2offset_out(dst.first())); + } else { + // stack to reg + __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); + } else { + if (dst.first() != src.first()){ + __ move(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +// An oop arg. Must pass a handle not the oop itself +static void object_move(MacroAssembler* masm, + OopMap* map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int* receiver_offset) { + + // must pass a handle. First figure out the location we use as a handle + + if (src.first()->is_stack()) { + // Oop is already on the stack as an argument + Register rHandle = T5; + Label nil; + __ xorr(rHandle, rHandle, rHandle); + __ ld_d(AT, FP, reg2offset_in(src.first())); + __ beq(AT, R0, nil); + __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); + __ bind(nil); + if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); + else __ move( (dst.first())->as_Register(), rHandle); + + int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + if (is_receiver) { + *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + } else { + // Oop is in an a register we must store it to the space we reserve + // on the stack for oop_handles + const Register rOop = src.first()->as_Register(); + assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); + const Register rHandle = T5; + //Important: refer to java_calling_convertion + int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; + int offset = oop_slot*VMRegImpl::stack_slot_size; + Label skip; + __ st_d( rOop , SP, offset ); + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + __ xorr( rHandle, rHandle, rHandle); + __ beq(rOop, R0, skip); + __ lea(rHandle, Address(SP, offset)); + __ bind(skip); + // Store the handle parameter + if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); + else __ move((dst.first())->as_Register(), rHandle); + + if (is_receiver) { + *receiver_offset = offset; + } + } +} + +// A float arg may have to do float reg int reg conversion +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); + if (src.first()->is_stack()) { + // stack to stack/reg + if (dst.first()->is_stack()) { + __ ld_w(AT, FP, reg2offset_in(src.first())); + __ st_w(AT, SP, reg2offset_out(dst.first())); + } else if (dst.first()->is_FloatRegister()) { + __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); + } else { + __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else { + // reg to stack/reg + if(dst.first()->is_stack()) { + __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); + } else if (dst.first()->is_FloatRegister()) { + __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } else { + __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister()); + } + } +} + +// A long move +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + + // The only legal possibility for a long_move VMRegPair is: + // 1: two stack slots (possibly unaligned) + // as neither the java or C calling convention will use registers + // for longs. + if (src.first()->is_stack()) { + assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); + if( dst.first()->is_stack()){ + __ ld_d(AT, FP, reg2offset_in(src.first())); + __ st_d(AT, SP, reg2offset_out(dst.first())); + } else { + __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else { + if( dst.first()->is_stack()){ + __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); + } else { + __ move(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +// A double move +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + + // The only legal possibilities for a double_move VMRegPair are: + // The painful thing here is that like long_move a VMRegPair might be + + // Because of the calling convention we know that src is either + // 1: a single physical register (xmm registers only) + // 2: two stack slots (possibly unaligned) + // dst can only be a pair of stack slots. + + if (src.first()->is_stack()) { + // source is all stack + if( dst.first()->is_stack()){ + __ ld_d(AT, FP, reg2offset_in(src.first())); + __ st_d(AT, SP, reg2offset_out(dst.first())); + } else if (dst.first()->is_FloatRegister()) { + __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); + } else { + __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else { + // reg to stack/reg + // No worries about stack alignment + if( dst.first()->is_stack()){ + __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); + } else if (dst.first()->is_FloatRegister()) { + __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } else { + __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister()); + } + } +} + +static void verify_oop_args(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + Register temp_reg = T4; // not part of any compiled calling seq + if (VerifyOops) { + for (int i = 0; i < method->size_of_parameters(); i++) { + if (sig_bt[i] == T_OBJECT || + sig_bt[i] == T_ARRAY) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + if (r->is_stack()) { + __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); + __ verify_oop(temp_reg); + } else { + __ verify_oop(r->as_Register()); + } + } + } + } +} + +static void gen_special_dispatch(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); + + // Now write the args into the outgoing interpreter space + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); + if (ref_kind != 0) { + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = S3; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { + fatal(err_msg_res("unexpected intrinsic id %d", iid)); + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); + VMReg r = regs[member_arg_pos].first(); + if (r->is_stack()) { + __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(method->size_of_parameters() > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + fatal("receiver always in a register"); + receiver_reg = SSR; // known to be free at this point + __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, iid, + receiver_reg, member_reg, /*for_compiler_entry:*/ true); +} + +// --------------------------------------------------------------------------- +// Generate a native wrapper for a given method. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// convention (handlizes oops, etc), transitions to native, makes the call, +// returns to java state (possibly blocking), unhandlizes any result and +// returns. +nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + methodHandle method, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, + BasicType ret_type) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); + int vep_offset = ((intptr_t)__ pc()) - start; + gen_special_dispatch(masm, + method, + in_sig_bt, + in_regs); + assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, + "valid size for make_non_entrant"); + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period + __ flush(); + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet*)NULL); + } + + bool is_critical_native = true; + address native_func = method->critical_native_function(); + if (native_func == NULL) { + native_func = method->native_function(); + is_critical_native = false; + } + assert(native_func != NULL, "must have function"); + + // Native nmethod wrappers never take possesion of the oop arguments. + // So the caller will gc the arguments. The only thing we need an + // oopMap for is if the call is static + // + // An OopMap for lock (and class if static), and one for the VM call itself + OopMapSet *oop_maps = new OopMapSet(); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); + int total_c_args = total_in_args; + if (!is_critical_native) { + total_c_args += 1; + if (method->is_static()) { + total_c_args++; + } + } else { + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + total_c_args++; + } + } + } + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + int argc = 0; + if (!is_critical_native) { + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } else { + Thread* THREAD = Thread::current(); + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); + SignatureStream ss(method->signature()); + for (int i = 0; i < total_in_args ; i++ ) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as int, elem* pair + out_sig_bt[argc++] = T_INT; + out_sig_bt[argc++] = T_ADDRESS; + Symbol* atype = ss.as_symbol(CHECK_NULL); + const char* at = atype->as_C_string(); + if (strlen(at) == 2) { + assert(at[0] == '[', "must be"); + switch (at[1]) { + case 'B': in_elem_bt[i] = T_BYTE; break; + case 'C': in_elem_bt[i] = T_CHAR; break; + case 'D': in_elem_bt[i] = T_DOUBLE; break; + case 'F': in_elem_bt[i] = T_FLOAT; break; + case 'I': in_elem_bt[i] = T_INT; break; + case 'J': in_elem_bt[i] = T_LONG; break; + case 'S': in_elem_bt[i] = T_SHORT; break; + case 'Z': in_elem_bt[i] = T_BOOLEAN; break; + default: ShouldNotReachHere(); + } + } + } else { + out_sig_bt[argc++] = in_sig_bt[i]; + in_elem_bt[i] = T_VOID; + } + if (in_sig_bt[i] != T_VOID) { + assert(in_sig_bt[i] == ss.type(), "must match"); + ss.next(); + } + } + } + + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots but space for storing + // the 1st six register arguments). It's weird see int_stk_helper. + // + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Compute framesize for the wrapper. We need to handlize all oops in + // registers. We must create space for them here that is disjoint from + // the windowed save area because we have no control over when we might + // flush the window again and overwrite values that gc has since modified. + // (The live window race) + // + // We always just allocate 6 word for storing down these object. This allow + // us to simply record the base and use the Ireg number to decide which + // slot to use. (Note that the reg number is the inbound number not the + // outbound number). + // We must shuffle args to match the native convention, and include var-args space. + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Now the space for the inbound oop handle area + int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers + if (is_critical_native) { + // Critical natives may have to call out so they need a save area + // for register arguments. + int double_slots = 0; + int single_slots = 0; + for ( int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + switch (in_sig_bt[i]) { + case T_BOOLEAN: + case T_BYTE: + case T_SHORT: + case T_CHAR: + case T_INT: single_slots++; break; + case T_ARRAY: + case T_LONG: double_slots++; break; + default: ShouldNotReachHere(); + } + } else if (in_regs[i].first()->is_FloatRegister()) { + switch (in_sig_bt[i]) { + case T_FLOAT: single_slots++; break; + case T_DOUBLE: double_slots++; break; + default: ShouldNotReachHere(); + } + } + } + total_save_slots = double_slots * 2 + single_slots; + // align the save area + if (double_slots != 0) { + stack_slots = round_to(stack_slots, 2); + } + } + + int oop_handle_offset = stack_slots; + stack_slots += total_save_slots; + + // Now any space we need for handlizing a klass if static method + + int klass_slot_offset = 0; + int klass_offset = -1; + int lock_slot_offset = 0; + bool is_static = false; + + if (method->is_static()) { + klass_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + is_static = true; + } + + // Plus a lock if needed + + if (method->is_synchronized()) { + lock_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + } + + // Now a place to save return value or as a temporary for any gpr -> fpr moves + // + 2 for return address (which we own) and saved fp + stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | 2 slots for moves | + // |---------------------| + // | lock box (if sync) | + // |---------------------| <- lock_slot_offset + // | klass (if static) | + // |---------------------| <- klass_slot_offset + // | oopHandle area | + // |---------------------| <- oop_handle_offset + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | vararg area | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = round_to(stack_slots, StackAlignmentInSlots); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + intptr_t start = (intptr_t)__ pc(); + + + + // First thing make an ic check to see if we should even be here + address ic_miss = SharedRuntime::get_ic_miss_stub(); + + // We are free to use all registers as temps without saving them and + // restoring them except fp. fp is the only callee save register + // as far as the interpreter and the compiler(s) are concerned. + + //refer to register_loongarch.hpp:IC_Klass + const Register ic_reg = T1; + const Register receiver = T0; + + Label hit; + Label exception_pending; + + __ verify_oop(receiver); + //add for compressedoops + __ load_klass(T4, receiver); + __ beq(T4, ic_reg, hit); + __ jmp(ic_miss, relocInfo::runtime_call_type); + __ bind(hit); + + int vep_offset = ((intptr_t)__ pc()) - start; + + // Generate stack overflow check + if (UseStackBanging) { + __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); + } + + // The instruction at the verified entry point must be 4 bytes or longer + // because it can be patched on the fly by make_non_entrant. + if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) { + __ nop(); + } + + // Generate a new frame for the wrapper. + // do LA need this ? +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + __ enter(); + // -2 because return address is already present and so is saved fp + __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize)); + + // Frame is now completed as far a size and linkage. + + int frame_complete = ((intptr_t)__ pc()) - start; + + // Calculate the difference between sp and fp. We need to know it + // after the native call because on windows Java Natives will pop + // the arguments and it is painful to do sp relative addressing + // in a platform independent way. So after the call we switch to + // fp relative addressing. + //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change + //the SP + int fp_adjustment = stack_size - 2*wordSize; + +#ifdef COMPILER2 + // C2 may leave the stack dirty if not in SSE2+ mode + __ empty_FPU_stack(); +#endif + + // Compute the fp offset for any slots used after the jni call + + int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; + // We use TREG as a thread pointer because it is callee save and + // if we load it once it is usable thru the entire wrapper + const Register thread = TREG; + + // We use S4 as the oop handle for the receiver/klass + // It is callee save so it survives the call to native + + const Register oop_handle_reg = S4; + if (is_critical_native) { + Unimplemented(); + // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, + // oop_handle_offset, oop_maps, in_regs, in_sig_bt); + } + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmpi, etc.) we will have + // captured the oops from our caller and have a valid oopMap for + // them. + + // ----------------- + // The Grand Shuffle + // + // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* + // and, if static, the class mirror instead of a receiver. This pretty much + // guarantees that register layout will not match (and LA doesn't use reg + // parms though amd does). Since the native abi doesn't use register args + // and the java conventions does we don't have to worry about collisions. + // All of our moved are reg->stack or stack->stack. + // We ignore the extra arguments during the shuffle and handle them at the + // last moment. The shuffle is described by the two calling convention + // vectors we have in our possession. We simply walk the java vector to + // get the source locations and the c vector to get the destinations. + + int c_arg = method->is_static() ? 2 : 1 ; + + // Record sp-based slot for receiver on stack for non-static methods + int receiver_offset = -1; + + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the + // caller. + // + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + + // Mark location of fp (someday) + // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + // This may iterate in two different directions depending on the + // kind of native it is. The reason is that for regular JNI natives + // the incoming and outgoing registers are offset upwards and for + // critical natives they are offset down. + GrowableArray arg_order(2 * total_in_args); + VMRegPair tmp_vmreg; + tmp_vmreg.set2(T8->as_VMReg()); + + if (!is_critical_native) { + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); + } + } else { + // Compute a valid move order, using tmp_vmreg to break any cycles + Unimplemented(); + // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); + } + + int temploc = -1; + for (int ai = 0; ai < arg_order.length(); ai += 2) { + int i = arg_order.at(ai); + int c_arg = arg_order.at(ai + 1); + __ block_comment(err_msg("move %d -> %d", i, c_arg)); + if (c_arg == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // This arg needs to be moved to a temporary + __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); + in_regs[i] = tmp_vmreg; + temploc = i; + continue; + } else if (i == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // Read from the temporary location + assert(temploc != -1, "must be valid"); + i = temploc; + temploc = -1; + } +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); + } else if (in_regs[i].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); + } + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif /* ASSERT */ + switch (in_sig_bt[i]) { + case T_ARRAY: + if (is_critical_native) { + Unimplemented(); + // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); + c_arg++; +#ifdef ASSERT + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif + break; + } + case T_OBJECT: + assert(!is_critical_native, "no oop arguments"); + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], + ((i == 0) && (!is_static)), + &receiver_offset); + break; + case T_VOID: + break; + + case T_FLOAT: + float_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_DOUBLE: + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + double_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_LONG : + long_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + simple_move32(masm, in_regs[i], out_regs[c_arg]); + } + } + + // point c_arg at the first arg that is already loaded in case we + // need to spill before we call out + c_arg = total_c_args - total_in_args; + // Pre-load a static method's oop. Used both by locking code and + // the normal JNI call code. + + __ move(oop_handle_reg, A1); + + if (method->is_static() && !is_critical_native) { + + // load opp into a register + int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( + (method->method_holder())->java_mirror())); + + + RelocationHolder rspec = oop_Relocation::spec(oop_index); + __ relocate(rspec); + __ patchable_li52(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); + // Now handlize the static class mirror it's known not-null. + __ st_d( oop_handle_reg, SP, klass_offset); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + + // Now get the handle + __ lea(oop_handle_reg, Address(SP, klass_offset)); + // store the klass handle as second argument + __ move(A1, oop_handle_reg); + // and protect the arg if we must spill + c_arg--; + } + + // Change state to native (we save the return address in the thread, since it might not + // be pushed on the stack when we do a a stack traversal). It is enough that the pc() + // points into the right code segment. It does not have to be the correct return pc. + // We use the same pc/oopMap repeatedly when we call out + + Label native_return; + __ set_last_Java_frame(SP, noreg, native_return); + + // We have all of the arguments setup at this point. We must not touch any register + // argument registers at this point (what if we save/restore them there are no oop? + { + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); + save_args(masm, total_c_args, c_arg, out_regs); + int metadata_index = __ oop_recorder()->find_index(method()); + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); + __ relocate(rspec); + __ patchable_li52(AT, (long)(method())); + + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + thread, AT); + + restore_args(masm, total_c_args, c_arg, out_regs); + } + + // These are register definitions we need for locking/unlocking + const Register swap_reg = T8; // Must use T8 for cmpxchg instruction + const Register obj_reg = T4; // Will contain the oop + //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) + const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) + + + + Label slow_path_lock; + Label lock_done; + + // Lock a synchronized method + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + + // Get the handle (the 2nd argument) + __ move(oop_handle_reg, A1); + + // Get address of the box + __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); + + // Load the oop from the handle + __ ld_d(obj_reg, oop_handle_reg, 0); + + if (UseBiasedLocking) { + // Note that oop_handle_reg is trashed during this call + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); + } + + // Load immediate 1 into swap_reg %T8 + __ li(swap_reg, 1); + + __ ld_d(AT, obj_reg, 0); + __ orr(swap_reg, swap_reg, AT); + + __ st_d(swap_reg, lock_reg, mark_word_offset); + __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg + + __ sub_d(swap_reg, swap_reg, SP); + __ li(AT, 3 - os::vm_page_size()); + __ andr(swap_reg , swap_reg, AT); + // Save the test result, for recursive case, the result is zero + __ st_d(swap_reg, lock_reg, mark_word_offset); + __ bne(swap_reg, R0, slow_path_lock); + // Slow path will re-enter here + __ bind(lock_done); + + if (UseBiasedLocking) { + // Re-fetch oop_handle_reg as we trashed it above + __ move(A1, oop_handle_reg); + } + } + + + // Finally just about ready to make the JNI call + + + // get JNIEnv* which is first argument to native + if (!is_critical_native) { + __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset())); + } + + // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) + // Load the second arguments into A1 + //__ ld(A1, SP , wordSize ); // klass + + // Now set thread in native + __ addi_d(AT, R0, _thread_in_native); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); + // do the call + __ call(native_func, relocInfo::runtime_call_type); + __ bind(native_return); + + oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map); + + // WARNING - on Windows Java Natives use pascal calling convention and pop the + // arguments off of the stack. We could just re-adjust the stack pointer here + // and continue to do SP relative addressing but we instead switch to FP + // relative addressing. + + // Unpack native results. + switch (ret_type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : // nothing to do break; + case T_DOUBLE : + case T_FLOAT : + // Result is in st0 we'll save as needed + break; + case T_ARRAY: // Really a handle + case T_OBJECT: // Really a handle + break; // can't de-handlize until after safepoint check + case T_VOID: break; + case T_LONG: break; + default : ShouldNotReachHere(); + } + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ addi_d(AT, R0, _thread_in_native_trans); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); + + if(os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ membar(__ AnyAny); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(thread, T5); + } + } + + Label after_transition; + + // check for safepoint operation in progress and/or pending suspend requests + { + Label Continue; + __ li(AT, SafepointSynchronize::address_of_state()); + __ ld_w(T5, AT, 0); + __ addi_d(AT, T5, -SafepointSynchronize::_not_synchronized); + Label L; + __ bne(AT, R0, L); + __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); + __ beq(AT, R0, Continue); + __ bind(L); + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // + save_native_result(masm, ret_type, stack_slots); + __ move(A0, thread); + __ addi_d(SP, SP, -wordSize); + __ push(S2); + __ li(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + if (!is_critical_native) { + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); + } else { + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); + } + __ move(SP, S2); // use S2 as a sender SP holder + __ pop(S2); + __ addi_d(SP, SP, wordSize); + //add for compressedoops + __ reinit_heapbase(); + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + + if (is_critical_native) { + // The call above performed the transition to thread_in_Java so + // skip the transition logic below. + __ beq(R0, R0, after_transition); + } + + __ bind(Continue); + } + + // change thread state + __ addi_d(AT, R0, _thread_in_Java); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset())); + __ bind(after_transition); + Label reguard; + Label reguard_done; + __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); + __ addi_d(AT, AT, -JavaThread::stack_guard_yellow_disabled); + __ beq(AT, R0, reguard); + // slow path reguard re-enters here + __ bind(reguard_done); + + // Handle possible exception (will unlock if necessary) + + // native result if any is live + + // Unlock + Label slow_path_unlock; + Label unlock_done; + if (method->is_synchronized()) { + + Label done; + + // Get locked oop from the handle we passed to jni + __ ld_d( obj_reg, oop_handle_reg, 0); + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, T8, done); + + } + + // Simple recursive lock? + + __ ld_d(AT, FP, lock_slot_fp_offset); + __ beq(AT, R0, done); + // Must save FSF if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + + // get old displaced header + __ ld_d (T8, FP, lock_slot_fp_offset); + // get address of the stack lock + __ addi_d (c_rarg0, FP, lock_slot_fp_offset); + // Atomic swap old header if oop still contains the stack lock + __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); + + // slow path re-enters here + __ bind(unlock_done); + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + restore_native_result(masm, ret_type, stack_slots); + } + + __ bind(done); + + } + { + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); + // Tell dtrace about this method exit + save_native_result(masm, ret_type, stack_slots); + int metadata_index = __ oop_recorder()->find_index( (method())); + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); + __ relocate(rspec); + __ patchable_li52(AT, (long)(method())); + + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + thread, AT); + restore_native_result(masm, ret_type, stack_slots); + } + + // We can finally stop using that last_Java_frame we setup ages ago + + __ reset_last_Java_frame(false); + + // Unpack oop result, e.g. JNIHandles::resolve value. + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + __ resolve_jobject(V0, thread, T4); + } + + if (!is_critical_native) { + // reset handle block + __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset())); + __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes()); + } + + if (!is_critical_native) { + // Any exception pending? + __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, exception_pending); + } + // no exception, we're almost done + + // check that only result value is on FPU stack + __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); + + // Return +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); + __ leave(); + + __ jr(RA); + // Unexpected paths are out of line and go here + // Slow path locking & unlocking + if (method->is_synchronized()) { + + // BEGIN Slow path lock + __ bind(slow_path_lock); + + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM + // args are (oop obj, BasicLock* lock, JavaThread* thread) + + __ move(A0, obj_reg); + __ move(A1, lock_reg); + __ move(A2, thread); + __ addi_d(SP, SP, - 3*wordSize); + + __ li(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); + __ move(SP, S2); + __ addi_d(SP, SP, 3*wordSize); + + restore_args(masm, total_c_args, c_arg, out_regs); + +#ifdef ASSERT + { Label L; + __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ stop("no pending exception allowed on exit from monitorenter"); + __ bind(L); + } +#endif + __ b(lock_done); + // END Slow path lock + + // BEGIN Slow path unlock + __ bind(slow_path_unlock); + + // Slow path unlock + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + save_native_result(masm, ret_type, stack_slots); + } + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) + + __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ push(AT); + __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); + + __ li(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + + // should be a peal + // +wordSize because of the push above + __ addi_d(A1, FP, lock_slot_fp_offset); + + __ move(A0, obj_reg); + __ addi_d(SP,SP, -2*wordSize); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), + relocInfo::runtime_call_type); + __ addi_d(SP, SP, 2*wordSize); + __ move(SP, S2); + //add for compressedoops + __ reinit_heapbase(); +#ifdef ASSERT + { + Label L; + __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); + __ bind(L); + } +#endif /* ASSERT */ + + __ pop(AT); + __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + restore_native_result(masm, ret_type, stack_slots); + } + __ b(unlock_done); + // END Slow path unlock + + } + + // SLOW PATH Reguard the stack if needed + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), + relocInfo::runtime_call_type); + //add for compressedoops + __ reinit_heapbase(); + restore_native_result(masm, ret_type, stack_slots); + __ b(reguard_done); + + // BEGIN EXCEPTION PROCESSING + if (!is_critical_native) { + // Forward the exception + __ bind(exception_pending); + + // remove possible return value from FPU register stack + __ empty_FPU_stack(); + + // pop our frame + //forward_exception_entry need return address on stack + __ move(SP, FP); + __ pop(FP); + + // and forward the exception + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + } + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); + + if (is_critical_native) { + nm->set_lazy_critical_native(true); + } + return nm; +} + +#ifdef HAVE_DTRACE_H +// --------------------------------------------------------------------------- +// Generate a dtrace nmethod for a given signature. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// abi and then leaves nops at the position you would expect to call a native +// function. When the probe is enabled the nops are replaced with a trap +// instruction that dtrace inserts and the trace will cause a notification +// to dtrace. +// +// The probes are only able to take primitive types and java/lang/String as +// arguments. No other java types are allowed. Strings are converted to utf8 +// strings so that from dtrace point of view java strings are converted to C +// strings. There is an arbitrary fixed limit on the total space that a method +// can use for converting the strings. (256 chars per string in the signature). +// So any java string larger then this is truncated. + +static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; +static bool offsets_initialized = false; + +static VMRegPair reg64_to_VMRegPair(Register r) { + VMRegPair ret; + if (wordSize == 8) { + ret.set2(r->as_VMReg()); + } else { + ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); + } + return ret; +} + + +nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, + methodHandle method) { + + + // generate_dtrace_nmethod is guarded by a mutex so we are sure to + // be single threaded in this method. + assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); + + // Fill in the signature array, for the calling-convention call. + int total_args_passed = method->size_of_parameters(); + + BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); + VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); + + // The signature we are going to use for the trap that dtrace will see + // java/lang/String is converted. We drop "this" and any other object + // is converted to NULL. (A one-slot java/lang/Long object reference + // is converted to a two-slot long, which is why we double the allocation). + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); + + int i=0; + int total_strings = 0; + int first_arg_to_pass = 0; + int total_c_args = 0; + + // Skip the receiver as dtrace doesn't want to see it + if( !method->is_static() ) { + in_sig_bt[i++] = T_OBJECT; + first_arg_to_pass = 1; + } + + SignatureStream ss(method->signature()); + for ( ; !ss.at_return_type(); ss.next()) { + BasicType bt = ss.type(); + in_sig_bt[i++] = bt; // Collect remaining bits of signature + out_sig_bt[total_c_args++] = bt; + if( bt == T_OBJECT) { + symbolOop s = ss.as_symbol_or_null(); + if (s == vmSymbols::java_lang_String()) { + total_strings++; + out_sig_bt[total_c_args-1] = T_ADDRESS; + } else if (s == vmSymbols::java_lang_Boolean() || + s == vmSymbols::java_lang_Byte()) { + out_sig_bt[total_c_args-1] = T_BYTE; + } else if (s == vmSymbols::java_lang_Character() || + s == vmSymbols::java_lang_Short()) { + out_sig_bt[total_c_args-1] = T_SHORT; + } else if (s == vmSymbols::java_lang_Integer() || + s == vmSymbols::java_lang_Float()) { + out_sig_bt[total_c_args-1] = T_INT; + } else if (s == vmSymbols::java_lang_Long() || + s == vmSymbols::java_lang_Double()) { + out_sig_bt[total_c_args-1] = T_LONG; + out_sig_bt[total_c_args++] = T_VOID; + } + } else if ( bt == T_LONG || bt == T_DOUBLE ) { + in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots + // We convert double to long + out_sig_bt[total_c_args-1] = T_LONG; + out_sig_bt[total_c_args++] = T_VOID; + } else if ( bt == T_FLOAT) { + // We convert float to int + out_sig_bt[total_c_args-1] = T_INT; + } + } + + assert(i==total_args_passed, "validly parsed signature"); + + // Now get the compiled-Java layout as input arguments + int comp_args_on_stack; + comp_args_on_stack = SharedRuntime::java_calling_convention( + in_sig_bt, in_regs, total_args_passed, false); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the a native (non-jni) function would expect them. To figure out + // where they go we convert the java signature to a C signature and remove + // T_VOID for any long/double we might have received. + + + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots but space for storing + // the 1st six register arguments). It's weird see int_stk_helper. + + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Plus a temp for possible converion of float/double/long register args + + int conversion_temp = stack_slots; + stack_slots += 2; + + + // Now space for the string(s) we must convert + + int string_locs = stack_slots; + stack_slots += total_strings * + (max_dtrace_string_size / VMRegImpl::stack_slot_size); + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | string[n] | + // |---------------------| <- string_locs[n] + // | string[n-1] | + // |---------------------| <- string_locs[n-1] + // | ... | + // | ... | + // |---------------------| <- string_locs[1] + // | string[0] | + // |---------------------| <- string_locs[0] + // | temp | + // |---------------------| <- conversion_temp + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + intptr_t start = (intptr_t)__ pc(); + + // First thing make an ic check to see if we should even be here + + { + Label L; + const Register temp_reg = G3_scratch; + Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); + __ verify_oop(O0); + __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); + __ cmp(temp_reg, G5_inline_cache_reg); + __ brx(Assembler::equal, true, Assembler::pt, L); + + __ jump_to(ic_miss, 0); + __ align(CodeEntryAlignment); + __ bind(L); + } + + int vep_offset = ((intptr_t)__ pc()) - start; + + // The instruction at the verified entry point must be 4 bytes or longer + // because it can be patched on the fly by make_non_entrant. The stack bang + // instruction fits that requirement. + + // Generate stack overflow check before creating frame + __ generate_stack_overflow_check(stack_size); + + assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord, + "valid size for make_non_entrant"); + + // Generate a new frame for the wrapper. + __ save(SP, -stack_size, SP); + + // Frame is now completed as far a size and linkage. + + int frame_complete = ((intptr_t)__ pc()) - start; + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + VMRegPair zero; + const Register g0 = G0; // without this we get a compiler warning (why??) + zero.set2(g0->as_VMReg()); + + int c_arg, j_arg; + + Register conversion_off = noreg; + + for (j_arg = first_arg_to_pass, c_arg = 0 ; + j_arg < total_args_passed ; j_arg++, c_arg++ ) { + + VMRegPair src = in_regs[j_arg]; + VMRegPair dst = out_regs[c_arg]; + +#ifdef ASSERT + if (src.first()->is_Register()) { + assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); + } else if (src.first()->is_FloatRegister()) { + assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( + FloatRegisterImpl::S)], "ack!"); + } + if (dst.first()->is_Register()) { + reg_destroyed[dst.first()->as_Register()->encoding()] = true; + } else if (dst.first()->is_FloatRegister()) { + freg_destroyed[dst.first()->as_FloatRegister()->encoding( + FloatRegisterImpl::S)] = true; + } +#endif /* ASSERT */ + + switch (in_sig_bt[j_arg]) { + case T_ARRAY: + case T_OBJECT: + { + if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || + out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { + // need to unbox a one-slot value + Register in_reg = L0; + Register tmp = L2; + if ( src.first()->is_reg() ) { + in_reg = src.first()->as_Register(); + } else { + assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), + "must be"); + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); + } + // If the final destination is an acceptable register + if ( dst.first()->is_reg() ) { + if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { + tmp = dst.first()->as_Register(); + } + } + + Label skipUnbox; + if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { + __ mov(G0, tmp->successor()); + } + __ mov(G0, tmp); + __ br_null(in_reg, true, Assembler::pn, skipUnbox); + + BasicType bt = out_sig_bt[c_arg]; + int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); + switch (bt) { + case T_BYTE: + __ ldub(in_reg, box_offset, tmp); break; + case T_SHORT: + __ lduh(in_reg, box_offset, tmp); break; + case T_INT: + __ ld(in_reg, box_offset, tmp); break; + case T_LONG: + __ ld_long(in_reg, box_offset, tmp); break; + default: ShouldNotReachHere(); + } + + __ bind(skipUnbox); + // If tmp wasn't final destination copy to final destination + if (tmp == L2) { + VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); + if (out_sig_bt[c_arg] == T_LONG) { + long_move(masm, tmp_as_VM, dst); + } else { + move32_64(masm, tmp_as_VM, out_regs[c_arg]); + } + } + if (out_sig_bt[c_arg] == T_LONG) { + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); + ++c_arg; // move over the T_VOID to keep the loop indices in sync + } + } else if (out_sig_bt[c_arg] == T_ADDRESS) { + Register s = + src.first()->is_reg() ? src.first()->as_Register() : L2; + Register d = + dst.first()->is_reg() ? dst.first()->as_Register() : L2; + + // We store the oop now so that the conversion pass can reach + // while in the inner frame. This will be the only store if + // the oop is NULL. + if (s != L2) { + // src is register + if (d != L2) { + // dst is register + __ mov(s, d); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } else { + // src not a register + assert(Assembler::is_simm13(reg2offset(src.first()) + + STACK_BIAS), "must be"); + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); + if (d == L2) { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } + } else if (out_sig_bt[c_arg] != T_VOID) { + // Convert the arg to NULL + if (dst.first()->is_reg()) { + __ mov(G0, dst.first()->as_Register()); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } + } + break; + case T_VOID: + break; + + case T_FLOAT: + if (src.first()->is_stack()) { + // Stack to stack/reg is simple + move32_64(masm, src, dst); + } else { + if (dst.first()->is_reg()) { + // freg -> reg + int off = + STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + Register d = dst.first()->as_Register(); + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, off); + __ ld(SP, off, d); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, conversion_off); + __ ld(SP, conversion_off , d); + } + } else { + // freg -> mem + int off = STACK_BIAS + reg2offset(dst.first()); + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, off); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, conversion_off); + } + } + } + break; + + case T_DOUBLE: + assert( j_arg + 1 < total_args_passed && + in_sig_bt[j_arg + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + if (src.first()->is_stack()) { + // Stack to stack/reg is simple + long_move(masm, src, dst); + } else { + Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; + + // Destination could be an odd reg on 32bit in which case + // we can't load direct to the destination. + + if (!d->is_even() && wordSize == 4) { + d = L2; + } + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), + SP, off); + __ ld_long(SP, off, d); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), + SP, conversion_off); + __ ld_long(SP, conversion_off, d); + } + if (d == L2) { + long_move(masm, reg64_to_VMRegPair(L2), dst); + } + } + break; + + case T_LONG : + // 32bit can't do a split move of something like g1 -> O0, O1 + // so use a memory temp + if (src.is_single_phys_reg() && wordSize == 4) { + Register tmp = L2; + if (dst.first()->is_reg() && + (wordSize == 8 || dst.first()->as_Register()->is_even())) { + tmp = dst.first()->as_Register(); + } + + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + if (Assembler::is_simm13(off)) { + __ stx(src.first()->as_Register(), SP, off); + __ ld_long(SP, off, tmp); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stx(src.first()->as_Register(), SP, conversion_off); + __ ld_long(SP, conversion_off, tmp); + } + + if (tmp == L2) { + long_move(masm, reg64_to_VMRegPair(L2), dst); + } + } else { + long_move(masm, src, dst); + } + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + move32_64(masm, src, dst); + } + } + + + // If we have any strings we must store any register based arg to the stack + // This includes any still live xmm registers too. + + if (total_strings > 0 ) { + + // protect all the arg registers + __ save_frame(0); + __ mov(G2_thread, L7_thread_cache); + const Register L2_string_off = L2; + + // Get first string offset + __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); + + for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { + if (out_sig_bt[c_arg] == T_ADDRESS) { + + VMRegPair dst = out_regs[c_arg]; + const Register d = dst.first()->is_reg() ? + dst.first()->as_Register()->after_save() : noreg; + + // It's a string the oop and it was already copied to the out arg + // position + if (d != noreg) { + __ mov(d, O0); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), + "must be"); + __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); + } + Label skip; + + __ add_d(FP, L2_string_off, O1); + __ br_null(O0, false, Assembler::pn, skip); + + if (d != noreg) { + __ mov(O1, d); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), + "must be"); + __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); + } + + __ addi_d(L2_string_off, max_dtrace_string_size, L2_string_off); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), + relocInfo::runtime_call_type); + + __ bind(skip); + + } + + } + __ mov(L7_thread_cache, G2_thread); + __ restore(); + + } + + + // Ok now we are done. Need to place the nop that dtrace wants in order to + // patch in the trap + + int patch_offset = ((intptr_t)__ pc()) - start; + + __ nop(); + + + // Return + + __ restore(); + __ ret(); + + __ flush(); + nmethod *nm = nmethod::new_dtrace_nmethod( + method, masm->code(), vep_offset, patch_offset, frame_complete, + stack_slots / VMRegImpl::slots_per_word); + return nm; +} + +#endif // HAVE_DTRACE_H + +// this function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { + return (callee_locals - callee_parameters) * Interpreter::stackElementWords; +} + +// "Top of Stack" slots that may be unused by the calling convention but must +// otherwise be preserved. +// On Intel these are not necessary and the value can be zero. +// On Sparc this describes the words reserved for storing a register window +// when an interrupt occurs. +uint SharedRuntime::out_preserve_stack_slots() { + return 0; +} + +//------------------------------generate_deopt_blob---------------------------- +// Ought to generate an ideal graph & compile, but here's some SPARC ASM +// instead. +void SharedRuntime::generate_deopt_blob() { + // allocate space for the code + ResourceMark rm; + // setup code generation tools + //CodeBuffer buffer ("deopt_blob", 4000, 2048); + CodeBuffer buffer ("deopt_blob", 8000, 2048); // FIXME for debug + MacroAssembler* masm = new MacroAssembler( & buffer); + int frame_size_in_words; + OopMap* map = NULL; + // Account for the extra args we place on the stack + // by the time we call fetch_unroll_info + const int additional_words = 2; // deopt kind, thread + + OopMapSet *oop_maps = new OopMapSet(); + RegisterSaver reg_save(false); + + address start = __ pc(); + Label cont; + // we use S3 for DeOpt reason register + Register reason = S3; + // use S6 for thread register + Register thread = TREG; + // use S7 for fetch_unroll_info returned UnrollBlock + Register unroll = S7; + // Prolog for non exception case! + + // We have been called from the deopt handler of the deoptee. + // + // deoptee: + // ... + // call X + // ... + // deopt_handler: call_deopt_stub + // cur. return pc --> ... + // + // So currently RA points behind the call in the deopt handler. + // We adjust it such that it points to the start of the deopt handler. + // The return_pc has been stored in the frame of the deoptee and + // will replace the address of the deopt_handler in the call + // to Deoptimization::fetch_unroll_info below. + + // HandlerImpl::size_deopt_handler() + __ addi_d(RA, RA, - NativeFarCall::instruction_size); + // Save everything in sight. + map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); + // Normal deoptimization + __ li(reason, Deoptimization::Unpack_deopt); + __ b(cont); + + int reexecute_offset = __ pc() - start; + + // Reexecute case + // return address is the pc describes what bci to do re-execute at + + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); + __ li(reason, Deoptimization::Unpack_reexecute); + __ b(cont); + + int exception_offset = __ pc() - start; + // Prolog for exception case + + // all registers are dead at this entry point, except for V0 and + // V1 which contain the exception oop and exception pc + // respectively. Set them in TLS and fall thru to the + // unpack_with_exception_in_tls entry point. + + __ get_thread(thread); + __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); + int exception_in_tls_offset = __ pc() - start; + // new implementation because exception oop is now passed in JavaThread + + // Prolog for exception case + // All registers must be preserved because they might be used by LinearScan + // Exceptiop oop and throwing PC are passed in JavaThread + // tos: stack at point of call to method that threw the exception (i.e. only + // args are on the stack, no return address) + + // Return address will be patched later with the throwing pc. The correct value is not + // available now because loading it from memory would destroy registers. + // Save everything in sight. + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); + + // Now it is safe to overwrite any register + // store the correct deoptimization type + __ li(reason, Deoptimization::Unpack_exception); + // load throwing pc from JavaThread and patch it as the return address + // of the current frame. Then clear the field in JavaThread + __ get_thread(thread); + __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); + + +#ifdef ASSERT + // verify that there is really an exception oop in JavaThread + __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); + __ verify_oop(AT); + // verify that there is no pending exception + Label no_pending_exception; + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, no_pending_exception); + __ stop("must not have pending exception here"); + __ bind(no_pending_exception); +#endif + __ bind(cont); + // Compiled code leaves the floating point stack dirty, empty it. + __ empty_FPU_stack(); + + + // Call C code. Need thread and this frame, but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + __ move(A0, thread); + __ addi_d(SP, SP, -additional_words * wordSize); + + Label retaddr; + __ set_last_Java_frame(NOREG, NOREG, retaddr); + + // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on + // this call, no GC can happen. Call should capture return values. + + // TODO: confirm reloc + __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); + __ bind(retaddr); + oop_maps->add_gc_map(__ pc() - start, map); + __ addi_d(SP, SP, additional_words * wordSize); + __ get_thread(thread); + __ reset_last_Java_frame(false); + + // Load UnrollBlock into S7 + __ move(unroll, V0); + + + // Move the unpack kind to a safe place in the UnrollBlock because + // we are very short of registers + + Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); + __ st_w(reason, unpack_kind); + // save the unpack_kind value + // Retrieve the possible live values (return values) + // All callee save registers representing jvm state + // are now in the vframeArray. + + Label noException; + __ li(AT, Deoptimization::Unpack_exception); + __ bne(AT, reason, noException);// Was exception pending? + __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); + __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); + + __ verify_oop(V0); + + // Overwrite the result registers with the exception results. + __ st_ptr(V0, SP, reg_save.v0_offset()); + __ st_ptr(V1, SP, reg_save.v1_offset()); + + __ bind(noException); + + + // Stack is back to only having register save data on the stack. + // Now restore the result registers. Everything else is either dead or captured + // in the vframeArray. + + reg_save.restore_result_registers(masm); + // All of the register save area has been popped of the stack. Only the + // return address remains. + // Pop all the frames we must move/replace. + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + // + // Note: by leaving the return address of self-frame on the stack + // and using the size of frame 2 to adjust the stack + // when we are done the return to frame 3 will still be on the stack. + + // register for the sender's sp + Register sender_sp = Rsender; + // register for frame pcs + Register pcs = T0; + // register for frame sizes + Register sizes = T1; + // register for frame count + Register count = T3; + + // Pop deoptimized frame + __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); + __ add_d(SP, SP, AT); + // sp should be pointing at the return address to the caller (3) + + // Load array of frame pcs into pcs + __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); + __ addi_d(SP, SP, wordSize); // trash the old pc + // Load array of frame sizes into T6 + __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); + + + + // Load count of frams into T3 + __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); + // Pick up the initial fp we should save + __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + __ move(sender_sp, SP); + __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); + __ sub_d(SP, SP, AT); + + Label loop; + __ bind(loop); + __ ld_d(T2, sizes, 0); // Load frame size + __ ld_ptr(AT, pcs, 0); // save return address + __ addi_d(T2, T2, -2 * wordSize); // we'll push pc and fp, by hand + __ push2(AT, FP); + __ move(FP, SP); + __ sub_d(SP, SP, T2); // Prolog! + // This value is corrected by layout_activation_impl + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable + __ move(sender_sp, SP); // pass to next frame + __ addi_d(count, count, -1); // decrement counter + __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) + __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) + __ bne(count, R0, loop); + __ ld_d(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); + // Re-push self-frame + __ push2(AT, FP); + __ move(FP, SP); + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); + + // Restore frame locals after moving the frame + __ st_d(V0, SP, reg_save.v0_offset()); + __ st_d(V1, SP, reg_save.v1_offset()); + __ fst_d(F0, SP, reg_save.fpr0_offset()); + __ fst_d(F1, SP, reg_save.fpr1_offset()); + + // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on + // this call, no GC can happen. + __ move(A1, reason); // exec_mode + __ get_thread(thread); + __ move(A0, thread); // thread + __ addi_d(SP, SP, (-additional_words) *wordSize); + + // set last_Java_sp, last_Java_fp + Label L; + address the_pc = __ pc(); + __ bind(L); + __ set_last_Java_frame(NOREG, FP, L); + + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); + // Revert SP alignment after call since we're going to do some SP relative addressing below + __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); + // Set an oopmap for the call site + oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0)); + + __ push(V0); + + __ get_thread(thread); + __ reset_last_Java_frame(true); + + // Collect return values + __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize); + __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize); + // Pop float stack and store in local + __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize); + __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize); + + //FIXME, + // Clear floating point stack before returning to interpreter + __ empty_FPU_stack(); + //FIXME, we should consider about float and double + // Push a float or double return value if necessary. + __ leave(); + + // Jump to interpreter + __ jr(RA); + + masm->flush(); + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +} + +#ifdef COMPILER2 + +//------------------------------generate_uncommon_trap_blob-------------------- +// Ought to generate an ideal graph & compile, but here's some SPARC ASM +// instead. +void SharedRuntime::generate_uncommon_trap_blob() { + // allocate space for the code + ResourceMark rm; + // setup code generation tools + CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); + MacroAssembler* masm = new MacroAssembler(&buffer); + + enum frame_layout { + fp_off, fp_off2, + return_off, return_off2, + framesize + }; + assert(framesize % 4 == 0, "sp not 16-byte aligned"); + address start = __ pc(); + + // Push self-frame. + __ addi_d(SP, SP, -framesize * BytesPerInt); + + __ st_d(RA, SP, return_off * BytesPerInt); + __ st_d(FP, SP, fp_off * BytesPerInt); + + __ addi_d(FP, SP, fp_off * BytesPerInt); + + // Clear the floating point exception stack + __ empty_FPU_stack(); + + Register thread = TREG; + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // set last_Java_sp + Label retaddr; + __ set_last_Java_frame(NOREG, FP, retaddr); + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // capture callee-saved registers as well as return values. + __ move(A0, thread); + // argument already in T0 + __ move(A1, T0); + __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); + __ bind(retaddr); + + // Set an oopmap for the call site + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = new OopMap( framesize, 0 ); + + oop_maps->add_gc_map(__ pc() - start, map); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(false); + + // Load UnrollBlock into S7 + Register unroll = S7; + __ move(unroll, V0); + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: possible-i2c-adapter-frame + // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an + // and c2i here) + + __ addi_d(SP, SP, framesize * BytesPerInt); + + // Pop deoptimized frame + __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); + __ add_d(SP, SP, AT); + + // register for frame pcs + Register pcs = T8; + // register for frame sizes + Register sizes = T4; + // register for frame count + Register count = T3; + // register for the sender's sp + Register sender_sp = T1; + + // sp should be pointing at the return address to the caller (4) + // Load array of frame pcs + __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); + + // Load array of frame sizes + __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); + __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); + + // Pick up the initial fp we should save + __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + + __ move(sender_sp, SP); + __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); + __ sub_d(SP, SP, AT); + // Push interpreter frames in a loop + Label loop; + __ bind(loop); + __ ld_d(T2, sizes, 0); // Load frame size + __ ld_d(AT, pcs, 0); // save return address + __ addi_d(T2, T2, -2*wordSize); // we'll push pc and fp, by hand + __ push2(AT, FP); + __ move(FP, SP); + __ sub_d(SP, SP, T2); // Prolog! + // This value is corrected by layout_activation_impl + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable + __ move(sender_sp, SP); // pass to next frame + __ addi_d(count, count, -1); // decrement counter + __ addi_d(sizes, sizes, wordSize); // Bump array pointer (sizes) + __ addi_d(pcs, pcs, wordSize); // Bump array pointer (pcs) + __ bne(count, R0, loop); + + __ ld_d(RA, pcs, 0); + + // Re-push self-frame + // save old & set new FP + // save final return address + __ enter(); + + // Use FP because the frames look interpreted now + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + Label L; + address the_pc = __ pc(); + __ bind(L); + __ set_last_Java_frame(NOREG, FP, L); + + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + __ move(A0, thread); + __ li(A1, Deoptimization::Unpack_uncommon_trap); + __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type); + // Set an oopmap for the call site + oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0)); + + __ reset_last_Java_frame(true); + + // Pop self-frame. + __ leave(); // Epilog! + + // Jump to interpreter + __ jr(RA); + // ------------- + // make sure all code is generated + masm->flush(); + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); +} + +#endif // COMPILER2 + +//------------------------------generate_handler_blob------------------- +// +// Generate a special Compile2Runtime blob that saves all registers, and sets +// up an OopMap and calls safepoint code to stop the compiled code for +// a safepoint. +// +// This blob is jumped to (via a breakpoint and the signal handler) from a +// safepoint in compiled code. + +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { + + // Account for thread arg in our frame + const int additional_words = 0; + int frame_size_in_words; + + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + ResourceMark rm; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map; + + // allocate space for the code + // setup code generation tools + CodeBuffer buffer ("handler_blob", 2048, 512); + MacroAssembler* masm = new MacroAssembler( &buffer); + + const Register thread = TREG; + address start = __ pc(); + bool cause_return = (poll_type == POLL_AT_RETURN); + RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */); + + // If cause_return is true we are at a poll_return and there is + // the return address in RA to the caller on the nmethod + // that is safepoint. We can leave this return in RA and + // effectively complete the return and safepoint in the caller. + // Otherwise we load exception pc to RA. + __ push(thread); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + if(!cause_return) { + __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset())); + } + + __ pop(thread); + map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + // The following is basically a call_VM. However, we need the precise + // address of the call in order to generate an oopmap. Hence, we do all the + // work outselvs. + + __ move(A0, thread); + Label retaddr; + __ set_last_Java_frame(NOREG, NOREG, retaddr); + + // Do the call + // TODO: confirm reloc + __ call(call_ptr, relocInfo::runtime_call_type); + __ bind(retaddr); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + oop_maps->add_gc_map(__ pc() - start, map); + + Label noException; + + // Clear last_Java_sp again + __ reset_last_Java_frame(false); + + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, noException); + + // Exception pending + + reg_save.restore_live_registers(masm); + //forward_exception_entry need return address on the stack + __ push(RA); + // TODO: confirm reloc + __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + + // No exception case + __ bind(noException); + // Normal exit, register restoring and exit + reg_save.restore_live_registers(masm); + __ jr(RA); + + masm->flush(); + // Fill-out other meta info + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); +} + +// +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss +// +// Generate a stub that calls into vm to find out the proper destination +// of a java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + + //CodeBuffer buffer(name, 1000, 512); + //FIXME. code_size + CodeBuffer buffer(name, 2000, 2048); + MacroAssembler* masm = new MacroAssembler(&buffer); + + int frame_size_words; + RegisterSaver reg_save(false /* save_vectors */); + //we put the thread in A0 + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + address start = __ pc(); + map = reg_save.save_live_registers(masm, 0, &frame_size_words); + + + int frame_complete = __ offset(); + const Register thread = T8; + __ get_thread(thread); + + __ move(A0, thread); + Label retaddr; + __ set_last_Java_frame(noreg, FP, retaddr); + // align the stack before invoke native + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + // TODO: confirm reloc + __ call(destination, relocInfo::runtime_call_type); + __ bind(retaddr); + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + oop_maps->add_gc_map(__ pc() - start, map); + // V0 contains the address we are going to jump to assuming no exception got installed + __ get_thread(thread); + __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); + // clear last_Java_sp + __ reset_last_Java_frame(true); + // check for pending exceptions + Label pending; + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, pending); + // get the returned Method* + __ get_vm_result_2(Rmethod, thread); + __ st_ptr(Rmethod, SP, reg_save.s3_offset()); + __ st_ptr(V0, SP, reg_save.t5_offset()); + reg_save.restore_live_registers(masm); + + // We are back the the original state on entry and ready to go the callee method. + __ jr(T5); + // Pending exception after the safepoint + + __ bind(pending); + + reg_save.restore_live_registers(masm); + + // exception pending => remove activation and forward to exception handler + //forward_exception_entry need return address on the stack + __ push(RA); + __ get_thread(thread); + __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); + __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + // + // make sure all code is generated + masm->flush(); + RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); + return tmp; +} + +extern "C" int SpinPause() {return 0;} diff --git a/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp new file mode 100644 index 00000000000..361b775144d --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp @@ -0,0 +1,3445 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/top.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure +// see the comment in stubRoutines.hpp + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) + +//#ifdef PRODUCT +//#define BLOCK_COMMENT(str) /* nothing */ +//#else +//#define BLOCK_COMMENT(str) __ block_comment(str) +//#endif + +//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions + +// Stub Code definitions + +static address handle_unsafe_access() { + JavaThread* thread = JavaThread::current(); + address pc = thread->saved_exception_pc(); + // pc is the instruction which we must emulate + // doing a no-op is fine: return garbage from the load + // therefore, compute npc + address npc = (address)((unsigned long)pc + sizeof(unsigned int)); + + // request an async exception + thread->set_pending_unsafe_access_error(); + + // return address of next instruction to execute + return npc; +} + +class StubGenerator: public StubCodeGenerator { + private: + + // This fig is not LA ABI. It is call Java from C ABI. + // Call stubs are used to call Java from C + // + // [ return_from_Java ] + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + // ... + // -8 [ S6 ] + // -7 [ S5 ] + // -6 [ S4 ] + // -5 [ S3 ] + // -4 [ S1 ] + // -3 [ TSR(S2) ] + // -2 [ LVP(S7) ] + // -1 [ BCP(S1) ] + // 0 [ saved fp ] <--- fp_after_call + // 1 [ return address ] + // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp + // 3 [ result ] <--- a1 + // 4 [ result_type ] <--- a2 + // 5 [ method ] <--- a3 + // 6 [ entry_point ] <--- a4 + // 7 [ parameters ] <--- a5 + // 8 [ parameter_size ] <--- a6 + // 9 [ thread ] <--- a7 + + // + // LA ABI does not save paras in sp. + // + // [ return_from_Java ] + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + // ... + //-13 [ thread ] + //-12 [ result_type ] <--- a2 + //-11 [ result ] <--- a1 + //-10 [ ] + // -9 [ ptr. to call wrapper ] <--- a0 + // -8 [ S6 ] + // -7 [ S5 ] + // -6 [ S4 ] + // -5 [ S3 ] + // -4 [ S1 ] + // -3 [ TSR(S2) ] + // -2 [ LVP(S7) ] + // -1 [ BCP(S1) ] + // 0 [ saved fp ] <--- fp_after_call + // 1 [ return address ] + // 2 [ ] <--- old sp + // + // Find a right place in the call_stub for S8. + // S8 will point to the starting point of Interpreter::dispatch_table(itos). + // It should be saved/restored before/after Java calls. + // + enum call_stub_layout { + RA_off = 1, + FP_off = 0, + BCP_off = -1, + LVP_off = -2, + TSR_off = -3, + S1_off = -4, + S3_off = -5, + S4_off = -6, + S5_off = -7, + S6_off = -8, + call_wrapper_off = -9, + result_off = -11, + result_type_off = -12, + thread_off = -13, + total_off = thread_off - 1, + S8_off = -14, + }; + + address generate_call_stub(address& return_address) { + assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); + StubCodeMark mark(this, "StubRoutines", "call_stub"); + address start = __ pc(); + + // same as in generate_catch_exception()! + + // stub code + // save ra and fp + __ enter(); + // I think 14 is the max gap between argument and callee saved register + __ addi_d(SP, SP, total_off * wordSize); + __ st_d(BCP, FP, BCP_off * wordSize); + __ st_d(LVP, FP, LVP_off * wordSize); + __ st_d(TSR, FP, TSR_off * wordSize); + __ st_d(S1, FP, S1_off * wordSize); + __ st_d(S3, FP, S3_off * wordSize); + __ st_d(S4, FP, S4_off * wordSize); + __ st_d(S5, FP, S5_off * wordSize); + __ st_d(S6, FP, S6_off * wordSize); + __ st_d(A0, FP, call_wrapper_off * wordSize); + __ st_d(A1, FP, result_off * wordSize); + __ st_d(A2, FP, result_type_off * wordSize); + __ st_d(A7, FP, thread_off * wordSize); + __ st_d(S8, FP, S8_off * wordSize); + + __ li(S8, (long)Interpreter::dispatch_table(itos)); + +#ifdef OPT_THREAD + __ move(TREG, A7); +#endif + //add for compressedoops + __ reinit_heapbase(); + +#ifdef ASSERT + // make sure we have no pending exceptions + { + Label L; + __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + /* FIXME: I do not know how to realize stop in LA, do it in the future */ + __ stop("StubRoutines::call_stub: entered with pending exception"); + __ bind(L); + } +#endif + + // pass parameters if any + // A5: parameter + // A6: parameter_size + // T0: parameter_size_tmp(--) + // T2: offset(++) + // T3: tmp + Label parameters_done; + // judge if the parameter_size equals 0 + __ beq(A6, R0, parameters_done); + __ slli_d(AT, A6, Interpreter::logStackElementSize); + __ sub_d(SP, SP, AT); + __ li(AT, -StackAlignmentInBytes); + __ andr(SP, SP, AT); + // Copy Java parameters in reverse order (receiver last) + // Note that the argument order is inverted in the process + Label loop; + __ move(T0, A6); + __ move(T2, R0); + __ bind(loop); + + // get parameter + __ alsl_d(T3, T0, A5, LogBytesPerWord - 1); + __ ld_d(AT, T3, -wordSize); + __ alsl_d(T3, T2, SP, LogBytesPerWord - 1); + __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0)); + __ addi_d(T2, T2, 1); + __ addi_d(T0, T0, -1); + __ bne(T0, R0, loop); + // advance to next parameter + + // call Java function + __ bind(parameters_done); + + // receiver in V0, methodOop in Rmethod + + __ move(Rmethod, A3); + __ move(Rsender, SP); //set sender sp + __ jalr(A4); + return_address = __ pc(); + + Label common_return; + __ bind(common_return); + + // store result depending on type + // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) + __ ld_d(T0, FP, result_off * wordSize); // result --> T0 + Label is_long, is_float, is_double, exit; + __ ld_d(T2, FP, result_type_off * wordSize); // result_type --> T2 + __ addi_d(T3, T2, (-1) * T_LONG); + __ beq(T3, R0, is_long); + __ addi_d(T3, T2, (-1) * T_FLOAT); + __ beq(T3, R0, is_float); + __ addi_d(T3, T2, (-1) * T_DOUBLE); + __ beq(T3, R0, is_double); + + // handle T_INT case + __ st_d(V0, T0, 0 * wordSize); + __ bind(exit); + + // restore + __ ld_d(BCP, FP, BCP_off * wordSize); + __ ld_d(LVP, FP, LVP_off * wordSize); + __ ld_d(S8, FP, S8_off * wordSize); + __ ld_d(TSR, FP, TSR_off * wordSize); + + __ ld_d(S1, FP, S1_off * wordSize); + __ ld_d(S3, FP, S3_off * wordSize); + __ ld_d(S4, FP, S4_off * wordSize); + __ ld_d(S5, FP, S5_off * wordSize); + __ ld_d(S6, FP, S6_off * wordSize); + + __ leave(); + + // return + __ jr(RA); + + // handle return types different from T_INT + __ bind(is_long); + __ st_d(V0, T0, 0 * wordSize); + __ b(exit); + + __ bind(is_float); + __ fst_s(FV0, T0, 0 * wordSize); + __ b(exit); + + __ bind(is_double); + __ fst_d(FV0, T0, 0 * wordSize); + __ b(exit); + StubRoutines::la::set_call_stub_compiled_return(__ pc()); + __ b(common_return); + return start; + } + + // Return point for a Java call if there's an exception thrown in + // Java code. The exception is caught and transformed into a + // pending exception stored in JavaThread that can be tested from + // within the VM. + // + // Note: Usually the parameters are removed by the callee. In case + // of an exception crossing an activation frame boundary, that is + // not the case if the callee is compiled code => need to setup the + // sp. + // + // V0: exception oop + + address generate_catch_exception() { + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + address start = __ pc(); + + Register thread = TREG; + + // get thread directly +#ifndef OPT_THREAD + __ ld_d(thread, FP, thread_off * wordSize); +#endif + +#ifdef ASSERT + // verify that threads correspond + { Label L; + __ get_thread(T8); + __ beq(T8, thread, L); + __ stop("StubRoutines::catch_exception: threads must correspond"); + __ bind(L); + } +#endif + // set pending exception + __ verify_oop(V0); + __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ li(AT, (long)__FILE__); + __ st_d(AT, thread, in_bytes(Thread::exception_file_offset ())); + __ li(AT, (long)__LINE__); + __ st_d(AT, thread, in_bytes(Thread::exception_line_offset ())); + + // complete return to VM + assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); + __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); + return start; + } + + // Continuation point for runtime calls returning with a pending + // exception. The pending exception check happened in the runtime + // or native call stub. The pending exception in Thread is + // converted into a Java-level exception. + // + // Contract with Java-level exception handlers: + // V0: exception + // V1: throwing pc + // + // NOTE: At entry of this stub, exception-pc must be on stack !! + + address generate_forward_exception() { + StubCodeMark mark(this, "StubRoutines", "forward exception"); + //Register thread = TREG; + Register thread = TREG; + address start = __ pc(); + + // Upon entry, the sp points to the return address returning into + // Java (interpreted or compiled) code; i.e., the return address + // throwing pc. + // + // Arguments pushed before the runtime call are still on the stack + // but the exception handler will reset the stack pointer -> + // ignore them. A potential result in registers can be ignored as + // well. + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif +#ifdef ASSERT + // make sure this code is only executed if there is a pending exception + { + Label L; + __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, L); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } +#endif + + // compute exception handler into T4 + __ ld_d(A1, SP, 0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); + __ move(T4, V0); + __ pop(V1); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset())); + +#ifdef ASSERT + // make sure exception is set + { + Label L; + __ bne(V0, R0, L); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + + // continue at exception handler (return address removed) + // V0: exception + // T4: exception handler + // V1: throwing pc + __ verify_oop(V0); + __ jr(T4); + return start; + } + + // The following routine generates a subroutine to throw an + // asynchronous UnknownError when an unsafe access gets a fault that + // could not be reasonably prevented by the programmer. (Example: + // SIGBUS/OBJERR.) + address generate_handler_for_unsafe_access() { + StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); + address start = __ pc(); + __ push(V0); + __ pushad_except_v0(); // push registers + __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type); + __ popad_except_v0(); + __ move(RA, V0); + __ pop(V0); + __ jr(RA); + return start; + } + + // Non-destructive plausibility checks for oops + // + address generate_verify_oop() { + StubCodeMark mark(this, "StubRoutines", "verify_oop"); + address start = __ pc(); + __ reinit_heapbase(); + __ verify_oop_subroutine(); + address end = __ pc(); + return start; + } + + // + // Generate stub for array fill. If "aligned" is true, the + // "to" address is assumed to be heapword aligned. + // + // Arguments for generated stub: + // to: A0 + // value: A1 + // count: A2 treated as signed + // + address generate_fill(BasicType t, bool aligned, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + const Register to = A0; // source array address + const Register value = A1; // value + const Register count = A2; // elements count + + const Register end = T5; // source array address end + const Register tmp = T8; // temp register + + Label L_fill_elements; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 0; + __ slti(AT, count, 9); // Short arrays (<= 8 bytes) fill by element + __ bstrins_d(value, value, 15, 8); // 8 bit -> 16 bit + __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit + __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit + __ bnez(AT, L_fill_elements); + break; + case T_SHORT: + shift = 1; + __ slti(AT, count, 5); // Short arrays (<= 8 bytes) fill by element + __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit + __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit + __ bnez(AT, L_fill_elements); + break; + case T_INT: + shift = 2; + __ slti(AT, count, 3); // Short arrays (<= 8 bytes) fill by element + __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit + __ bnez(AT, L_fill_elements); + break; + default: ShouldNotReachHere(); + } + + switch (t) { + case T_BYTE: + __ add_d(end, to, count); + break; + case T_SHORT: + case T_INT: + __ alsl_d(end, count, to, shift-1); + break; + default: ShouldNotReachHere(); + } + if (!aligned) { + __ st_d(value, to, 0); + __ bstrins_d(to, R0, 2, 0); + __ addi_d(to, to, 8); + } + __ st_d(value, end, -8); + __ bstrins_d(end, R0, 2, 0); + + // + // Fill large chunks + // + Label L_loop_begin, L_not_64bytes_fill, L_loop_end; + __ addi_d(AT, to, 64); + __ blt(end, AT, L_not_64bytes_fill); + __ addi_d(to, to, 64); + __ bind(L_loop_begin); + __ st_d(value, to, -8); + __ st_d(value, to, -16); + __ st_d(value, to, -24); + __ st_d(value, to, -32); + __ st_d(value, to, -40); + __ st_d(value, to, -48); + __ st_d(value, to, -56); + __ st_d(value, to, -64); + __ addi_d(to, to, 64); + __ bge(end, to, L_loop_begin); + __ addi_d(to, to, -64); + __ beq(to, end, L_loop_end); + + __ bind(L_not_64bytes_fill); + // There are 0 - 7 words + __ pcaddi(AT, 4); + __ sub_d(tmp, end, to); + __ alsl_d(AT, tmp, AT, 1); + __ jr(AT); + + // 0: + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ st_d(value, to, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 2: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 3: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ st_d(value, to, 16); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 4: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ st_d(value, to, 16); + __ st_d(value, to, 24); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 5: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ st_d(value, to, 16); + __ st_d(value, to, 24); + __ st_d(value, to, 32); + __ jr(RA); + __ nop(); + __ nop(); + + // 6: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ st_d(value, to, 16); + __ st_d(value, to, 24); + __ st_d(value, to, 32); + __ st_d(value, to, 40); + __ jr(RA); + __ nop(); + + // 7: + __ st_d(value, to, 0); + __ st_d(value, to, 8); + __ st_d(value, to, 16); + __ st_d(value, to, 24); + __ st_d(value, to, 32); + __ st_d(value, to, 40); + __ st_d(value, to, 48); + + __ bind(L_loop_end); + __ jr(RA); + + // Short arrays (<= 8 bytes) + __ bind(L_fill_elements); + __ pcaddi(AT, 4); + __ slli_d(tmp, count, 4 + shift); + __ add_d(AT, AT, tmp); + __ jr(AT); + + // 0: + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ st_b(value, to, 0); + __ jr(RA); + __ nop(); + __ nop(); + + // 2: + __ st_h(value, to, 0); + __ jr(RA); + __ nop(); + __ nop(); + + // 3: + __ st_h(value, to, 0); + __ st_b(value, to, 2); + __ jr(RA); + __ nop(); + + // 4: + __ st_w(value, to, 0); + __ jr(RA); + __ nop(); + __ nop(); + + // 5: + __ st_w(value, to, 0); + __ st_b(value, to, 4); + __ jr(RA); + __ nop(); + + // 6: + __ st_w(value, to, 0); + __ st_h(value, to, 4); + __ jr(RA); + __ nop(); + + // 7: + __ st_w(value, to, 0); + __ st_w(value, to, 3); + __ jr(RA); + __ nop(); + + // 8: + __ st_d(value, to, 0); + __ jr(RA); + return start; + } + + // + // Generate overlap test for array copy stubs + // + // Input: + // A0 - source array address + // A1 - destination array address + // A2 - element count + // + // Temp: + // AT - destination array address - source array address + // T4 - element count * element size + // + void array_overlap_test(address no_overlap_target, int log2_elem_size) { + __ slli_d(T4, A2, log2_elem_size); + __ sub_d(AT, A1, A0); + __ bgeu(AT, T4, no_overlap_target); + } + + // Generate code for an array write pre barrier + // + // Input: + // addr - starting address + // count - element count + // + // Temp: + // AT - used to swap addr and count + // + void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { + BarrierSet* bs = Universe::heap()->barrier_set(); + switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + // With G1, don't generate the call if we statically know that the target in uninitialized + if (!dest_uninitialized) { + if (count == A0) { + if (addr == A1) { + // exactly backwards!! + __ move(AT, A0); + __ move(A0, A1); + __ move(A1, AT); + } else { + __ move(A1, count); + __ move(A0, addr); + } + } else { + __ move(A0, addr); + __ move(A1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2); + } + break; + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + case BarrierSet::ModRef: + break; + default: + ShouldNotReachHere(); + } + } + + // + // Generate code for an array write post barrier + // + // Input: + // start - register containing starting address of destination array + // count - elements count + // scratch - scratch register + // + // Temp: + // AT - used to swap addr and count + // + // The input registers are overwritten. + // + void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) { + assert_different_registers(start, count, scratch, AT); + BarrierSet* bs = Universe::heap()->barrier_set(); + switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + if (count == A0) { + if (start == A1) { + // exactly backwards!! + __ move(AT, A0); + __ move(A0, A1); + __ move(A1, AT); + } else { + __ move(A1, count); + __ move(A0, start); + } + } else { + __ move(A0, start); + __ move(A1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2); + } + break; + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + Label L_loop; + const Register end = count; + + if (UseConcMarkSweepGC) { + __ membar(__ StoreStore); + } + + int64_t disp = (int64_t) ct->byte_map_base; + __ li(scratch, disp); + + __ lea(end, Address(start, count, TIMES_OOP, 0)); // end == start + count * oop_size + __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive + __ shr(start, CardTableModRefBS::card_shift); + __ shr(end, CardTableModRefBS::card_shift); + __ sub_d(end, end, start); // end --> cards count + + __ add_d(start, start, scratch); + + __ bind(L_loop); + __ stx_b(R0, start, count); + __ addi_d(count, count, -1); + __ bge(count, R0, L_loop); + } + break; + default: + ShouldNotReachHere(); + } + } + + // disjoint large copy + void generate_disjoint_large_copy(Label &entry, const char *name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + Label loop, le32, le16, le8, lt8; + + __ bind(entry); + __ add_d(A3, A1, A2); + __ add_d(A2, A0, A2); + __ ld_d(A6, A0, 0); + __ ld_d(A7, A2, -8); + + __ andi(T1, A0, 7); + __ sub_d(T0, R0, T1); + __ addi_d(T0, T0, 8); + + __ add_d(A0, A0, T0); + __ add_d(A5, A1, T0); + + __ addi_d(A4, A2, -64); + __ bgeu(A0, A4, le32); + + __ bind(loop); + __ ld_d(T0, A0, 0); + __ ld_d(T1, A0, 8); + __ ld_d(T2, A0, 16); + __ ld_d(T3, A0, 24); + __ ld_d(T4, A0, 32); + __ ld_d(T5, A0, 40); + __ ld_d(T6, A0, 48); + __ ld_d(T7, A0, 56); + __ addi_d(A0, A0, 64); + __ st_d(T0, A5, 0); + __ st_d(T1, A5, 8); + __ st_d(T2, A5, 16); + __ st_d(T3, A5, 24); + __ st_d(T4, A5, 32); + __ st_d(T5, A5, 40); + __ st_d(T6, A5, 48); + __ st_d(T7, A5, 56); + __ addi_d(A5, A5, 64); + __ bltu(A0, A4, loop); + + __ bind(le32); + __ addi_d(A4, A2, -32); + __ bgeu(A0, A4, le16); + __ ld_d(T0, A0, 0); + __ ld_d(T1, A0, 8); + __ ld_d(T2, A0, 16); + __ ld_d(T3, A0, 24); + __ addi_d(A0, A0, 32); + __ st_d(T0, A5, 0); + __ st_d(T1, A5, 8); + __ st_d(T2, A5, 16); + __ st_d(T3, A5, 24); + __ addi_d(A5, A5, 32); + + __ bind(le16); + __ addi_d(A4, A2, -16); + __ bgeu(A0, A4, le8); + __ ld_d(T0, A0, 0); + __ ld_d(T1, A0, 8); + __ addi_d(A0, A0, 16); + __ st_d(T0, A5, 0); + __ st_d(T1, A5, 8); + __ addi_d(A5, A5, 16); + + __ bind(le8); + __ addi_d(A4, A2, -8); + __ bgeu(A0, A4, lt8); + __ ld_d(T0, A0, 0); + __ st_d(T0, A5, 0); + + __ bind(lt8); + __ st_d(A6, A1, 0); + __ st_d(A7, A3, -8); + __ jr(RA); + } + + // conjoint large copy + void generate_conjoint_large_copy(Label &entry, const char *name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + Label loop, le32, le16, le8, lt8; + + __ bind(entry); + __ add_d(A3, A1, A2); + __ add_d(A2, A0, A2); + __ ld_d(A6, A0, 0); + __ ld_d(A7, A2, -8); + + __ andi(T1, A0, 7); + __ sub_d(A2, A2, T1); + __ sub_d(A5, A3, T1); + + __ addi_d(A4, A0, 64); + __ bgeu(A4, A2, le32); + + __ bind(loop); + __ ld_d(T0, A2, -8); + __ ld_d(T1, A2, -16); + __ ld_d(T2, A2, -24); + __ ld_d(T3, A2, -32); + __ ld_d(T4, A2, -40); + __ ld_d(T5, A2, -48); + __ ld_d(T6, A2, -56); + __ ld_d(T7, A2, -64); + __ addi_d(A2, A2, -64); + __ st_d(T0, A5, -8); + __ st_d(T1, A5, -16); + __ st_d(T2, A5, -24); + __ st_d(T3, A5, -32); + __ st_d(T4, A5, -40); + __ st_d(T5, A5, -48); + __ st_d(T6, A5, -56); + __ st_d(T7, A5, -64); + __ addi_d(A5, A5, -64); + __ bltu(A4, A2, loop); + + __ bind(le32); + __ addi_d(A4, A0, 32); + __ bgeu(A4, A2, le16); + __ ld_d(T0, A2, -8); + __ ld_d(T1, A2, -16); + __ ld_d(T2, A2, -24); + __ ld_d(T3, A2, -32); + __ addi_d(A2, A2, -32); + __ st_d(T0, A5, -8); + __ st_d(T1, A5, -16); + __ st_d(T2, A5, -24); + __ st_d(T3, A5, -32); + __ addi_d(A5, A5, -32); + + __ bind(le16); + __ addi_d(A4, A0, 16); + __ bgeu(A4, A2, le8); + __ ld_d(T0, A2, -8); + __ ld_d(T1, A2, -16); + __ addi_d(A2, A2, -16); + __ st_d(T0, A5, -8); + __ st_d(T1, A5, -16); + __ addi_d(A5, A5, -16); + + __ bind(le8); + __ addi_d(A4, A0, 8); + __ bgeu(A4, A2, lt8); + __ ld_d(T0, A2, -8); + __ st_d(T0, A5, -8); + + __ bind(lt8); + __ st_d(A6, A1, 0); + __ st_d(A7, A3, -8); + __ jr(RA); + } + + // Byte small copy: less than 9 elements. + void generate_byte_small_copy(Label &entry, const char *name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + Label L; + __ bind(entry); + __ lipc(AT, L); + __ slli_d(A2, A2, 5); + __ add_d(AT, AT, A2); + __ jr(AT); + + __ bind(L); + // 0: + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ ld_b(AT, A0, 0); + __ st_b(AT, A1, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 2: + __ ld_h(AT, A0, 0); + __ st_h(AT, A1, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 3: + __ ld_h(AT, A0, 0); + __ ld_b(A2, A0, 2); + __ st_h(AT, A1, 0); + __ st_b(A2, A1, 2); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 4: + __ ld_w(AT, A0, 0); + __ st_w(AT, A1, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 5: + __ ld_w(AT, A0, 0); + __ ld_b(A2, A0, 4); + __ st_w(AT, A1, 0); + __ st_b(A2, A1, 4); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 6: + __ ld_w(AT, A0, 0); + __ ld_h(A2, A0, 4); + __ st_w(AT, A1, 0); + __ st_h(A2, A1, 4); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 7: + __ ld_w(AT, A0, 0); + __ ld_w(A2, A0, 3); + __ st_w(AT, A1, 0); + __ st_w(A2, A1, 3); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 8: + __ ld_d(AT, A0, 0); + __ st_d(AT, A1, 0); + __ jr(RA); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_byte_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_byte_copy(). + // + address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large, + const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + __ sltui(T0, A2, 9); + __ bnez(T0, small); + + __ b(large); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large, + const char *name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0); + + __ sltui(T0, A2, 9); + __ bnez(T0, small); + + __ b(large); + + return start; + } + + // Short small copy: less than 9 elements. + void generate_short_small_copy(Label &entry, const char *name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + Label L; + __ bind(entry); + __ lipc(AT, L); + __ slli_d(A2, A2, 5); + __ add_d(AT, AT, A2); + __ jr(AT); + + __ bind(L); + // 0: + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ ld_h(AT, A0, 0); + __ st_h(AT, A1, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 2: + __ ld_w(AT, A0, 0); + __ st_w(AT, A1, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 3: + __ ld_w(AT, A0, 0); + __ ld_h(A2, A0, 4); + __ st_w(AT, A1, 0); + __ st_h(A2, A1, 4); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 4: + __ ld_d(AT, A0, 0); + __ st_d(AT, A1, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 5: + __ ld_d(AT, A0, 0); + __ ld_h(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_h(A2, A1, 8); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 6: + __ ld_d(AT, A0, 0); + __ ld_w(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_w(A2, A1, 8); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 7: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 6); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 6); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 8: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ jr(RA); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_short_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_short_copy(). + // + address generate_disjoint_short_copy(bool aligned, Label &small, Label &large, + const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + __ sltui(T0, A2, 9); + __ bnez(T0, small); + + __ slli_d(A2, A2, 1); + __ b(large); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + // let the hardware handle it. The two or four words within dwords + // or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_short_copy(bool aligned, Label &small, Label &large, + const char *name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1); + + __ sltui(T0, A2, 9); + __ bnez(T0, small); + + __ slli_d(A2, A2, 1); + __ b(large); + + return start; + } + + // Short small copy: less than 7 elements. + void generate_int_small_copy(Label &entry, const char *name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + Label L; + __ bind(entry); + __ lipc(AT, L); + __ slli_d(A2, A2, 5); + __ add_d(AT, AT, A2); + __ jr(AT); + + __ bind(L); + // 0: + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ ld_w(AT, A0, 0); + __ st_w(AT, A1, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 2: + __ ld_d(AT, A0, 0); + __ st_d(AT, A1, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 3: + __ ld_d(AT, A0, 0); + __ ld_w(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_w(A2, A1, 8); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 4: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 5: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ ld_w(A3, A0, 16); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ st_w(A3, A1, 16); + __ jr(RA); + __ nop(); + + // 6: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ ld_d(A3, A0, 16); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ st_d(A3, A1, 16); + __ jr(RA); + } + + // Generate maybe oop copy + void gen_maybe_oop_copy(bool is_oop, Label &small, Label &large, + const char *name, int small_limit, int log2_elem_size, + bool dest_uninitialized = false) { + Label post, _large; + + if (is_oop) { + __ addi_d(SP, SP, -4 * wordSize); + __ st_d(A2, SP, 3 * wordSize); + __ st_d(A1, SP, 2 * wordSize); + __ st_d(A0, SP, 1 * wordSize); + __ st_d(RA, SP, 0 * wordSize); + + gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized); + + __ ld_d(A2, SP, 3 * wordSize); + __ ld_d(A1, SP, 2 * wordSize); + __ ld_d(A0, SP, 1 * wordSize); + } + + __ sltui(T0, A2, small_limit); + if (is_oop) { + __ beqz(T0, _large); + __ bl(small); + __ b(post); + } else { + __ bnez(T0, small); + } + + __ bind(_large); + __ slli_d(A2, A2, log2_elem_size); + + if (is_oop) { + __ bl(large); + } else { + __ b(large); + } + + if (is_oop) { + __ bind(post); + __ ld_d(A2, SP, 3 * wordSize); + __ ld_d(A1, SP, 2 * wordSize); + + gen_write_ref_array_post_barrier(A1, A2, T1); + + __ ld_d(RA, SP, 0 * wordSize); + __ addi_d(SP, SP, 4 * wordSize); + __ jr(RA); + } + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, const char *name, + bool dest_uninitialized = false) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + gen_maybe_oop_copy(is_oop, small, large, name, 7, 2, dest_uninitialized); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, + Label &small, Label &large, const char *name, + bool dest_uninitialized = false) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + if (is_oop) { + array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2); + } else { + array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2); + } + + gen_maybe_oop_copy(is_oop, small, large, name, 7, 2, dest_uninitialized); + + return start; + } + + // Long small copy: less than 4 elements. + void generate_long_small_copy(Label &entry, const char *name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + Label L; + __ bind(entry); + __ lipc(AT, L); + __ slli_d(A2, A2, 5); + __ add_d(AT, AT, A2); + __ jr(AT); + + __ bind(L); + // 0: + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 1: + __ ld_d(AT, A0, 0); + __ st_d(AT, A1, 0); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + + // 2: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ jr(RA); + __ nop(); + __ nop(); + __ nop(); + + // 3: + __ ld_d(AT, A0, 0); + __ ld_d(A2, A0, 8); + __ ld_d(A3, A0, 16); + __ st_d(AT, A1, 0); + __ st_d(A2, A1, 8); + __ st_d(A3, A1, 16); + __ jr(RA); + __ nop(); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, const char *name, + bool dest_uninitialized = false) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + gen_maybe_oop_copy(is_oop, small, large, name, 4, 3, dest_uninitialized); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small, + Label &large, const char *name, + bool dest_uninitialized = false) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + if (is_oop) { + array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3); + } else { + array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3); + } + + gen_maybe_oop_copy(is_oop, small, large, name, 4, 3, dest_uninitialized); + + return start; + } + + void generate_arraycopy_stubs() { + Label disjoint_large_copy, conjoint_large_copy; + Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy; + + generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy"); + generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy"); + generate_byte_small_copy(byte_small_copy, "jbyte_small_copy"); + generate_short_small_copy(short_small_copy, "jshort_small_copy"); + generate_int_small_copy(int_small_copy, "jint_small_copy"); + generate_long_small_copy(long_small_copy, "jlong_small_copy"); + + if (UseCompressedOops) { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, "oop_arraycopy"); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", true); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, "oop_arraycopy_uninit", true); + } else { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, "oop_arraycopy"); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", true); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, "oop_arraycopy_uninit", true); + } + + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, "jbyte_disjoint_arraycopy"); + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, "jshort_disjoint_arraycopy"); + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy, "jint_disjoint_arraycopy"); + StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, "jlong_disjoint_arraycopy", false); + + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, "jbyte_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, "jshort_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy, "jint_arraycopy"); + StubRoutines::_jlong_arraycopy = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, "jlong_arraycopy", false); + + // We don't generate specialized code for HeapWord-aligned source + // arrays, so just use the code we've already generated + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; + StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; + + StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; + StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; + + StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; + StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; + + StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; + StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; + + StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; + StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; + + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; + StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; + + StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); + StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); + StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); + StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); + StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); + StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); + } + + // Arguments: + // + // Inputs: + // A0 - source byte array address + // A1 - destination byte array address + // A2 - K (key) in little endian int array + // A3 - r vector byte array address + // A4 - input length + // + // Output: + // A0 - input length + // + address generate_aescrypt_encryptBlock(bool cbc) { + static const uint32_t ft_consts[256] = { + 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, + 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, + 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, + 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, + 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, + 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, + 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, + 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, + 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, + 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, + 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, + 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, + 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, + 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, + 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, + 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, + 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, + 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, + 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, + 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, + 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, + 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, + 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, + 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, + 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, + 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, + 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, + 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, + 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, + 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, + 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, + 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, + 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, + 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, + 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, + 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, + 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, + 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, + 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, + 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, + 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, + 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, + 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, + 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, + 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, + 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, + 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, + 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, + 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, + 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, + 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, + 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, + 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, + 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, + 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, + 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, + 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, + 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, + 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, + 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, + 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, + 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, + 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, + 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a + }; + static const uint8_t fsb_consts[256] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, + 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, + 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, + 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, + 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, + 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, + 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, + 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, + 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, + 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, + 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, + 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, + 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, + 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, + 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, + 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, + 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + }; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + + // Allocate registers + Register src = A0; + Register dst = A1; + Register key = A2; + Register rve = A3; + Register srclen = A4; + Register keylen = T8; + Register srcend = A5; + Register keyold = A6; + Register t0 = A7; + Register t1, t2, t3, ftp; + Register xa[4] = { T0, T1, T2, T3 }; + Register ya[4] = { T4, T5, T6, T7 }; + + Label loop, tail, done; + address start = __ pc(); + + if (cbc) { + t1 = S0; + t2 = S1; + t3 = S2; + ftp = S3; + + __ beqz(srclen, done); + + __ addi_d(SP, SP, -4 * wordSize); + __ st_d(S3, SP, 3 * wordSize); + __ st_d(S2, SP, 2 * wordSize); + __ st_d(S1, SP, 1 * wordSize); + __ st_d(S0, SP, 0 * wordSize); + + __ add_d(srcend, src, srclen); + __ move(keyold, key); + } else { + t1 = A3; + t2 = A4; + t3 = A5; + ftp = A6; + } + + __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); + + // Round 1 + if (cbc) { + for (int i = 0; i < 4; i++) { + __ ld_w(xa[i], rve, 4 * i); + } + + __ bind(loop); + + for (int i = 0; i < 4; i++) { + __ ld_w(ya[i], src, 4 * i); + } + for (int i = 0; i < 4; i++) { + __ XOR(xa[i], xa[i], ya[i]); + } + } else { + for (int i = 0; i < 4; i++) { + __ ld_w(xa[i], src, 4 * i); + } + } + for (int i = 0; i < 4; i++) { + __ ld_w(ya[i], key, 4 * i); + } + for (int i = 0; i < 4; i++) { + __ revb_2h(xa[i], xa[i]); + } + for (int i = 0; i < 4; i++) { + __ rotri_w(xa[i], xa[i], 16); + } + for (int i = 0; i < 4; i++) { + __ XOR(xa[i], xa[i], ya[i]); + } + + __ li(ftp, (intptr_t)ft_consts); + + // Round 2 - (N-1) + for (int r = 0; r < 14; r++) { + Register *xp; + Register *yp; + + if (r & 1) { + xp = xa; + yp = ya; + } else { + xp = ya; + yp = xa; + } + + for (int i = 0; i < 4; i++) { + __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i)); + } + + for (int i = 0; i < 4; i++) { + __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0); + __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); + __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16); + __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); + __ slli_w(t0, t0, 2); + __ slli_w(t1, t1, 2); + __ slli_w(t2, t2, 2); + __ slli_w(t3, t3, 2); + __ ldx_w(t0, ftp, t0); + __ ldx_w(t1, ftp, t1); + __ ldx_w(t2, ftp, t2); + __ ldx_w(t3, ftp, t3); + __ rotri_w(t0, t0, 24); + __ rotri_w(t1, t1, 16); + __ rotri_w(t2, t2, 8); + __ XOR(xp[i], xp[i], t0); + __ XOR(t0, t1, t2); + __ XOR(xp[i], xp[i], t3); + __ XOR(xp[i], xp[i], t0); + } + + if (r == 8) { + // AES 128 + __ li(t0, 44); + __ beq(t0, keylen, tail); + } else if (r == 10) { + // AES 192 + __ li(t0, 52); + __ beq(t0, keylen, tail); + } + } + + __ bind(tail); + __ li(ftp, (intptr_t)fsb_consts); + __ alsl_d(key, keylen, key, 2 - 1); + + // Round N + for (int i = 0; i < 4; i++) { + __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0); + __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); + __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16); + __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); + __ ldx_bu(t0, ftp, t0); + __ ldx_bu(t1, ftp, t1); + __ ldx_bu(t2, ftp, t2); + __ ldx_bu(t3, ftp, t3); + __ ld_w(xa[i], key, 4 * i - 16); + __ slli_w(t1, t1, 8); + __ slli_w(t2, t2, 16); + __ slli_w(t3, t3, 24); + __ XOR(xa[i], xa[i], t0); + __ XOR(t0, t1, t2); + __ XOR(xa[i], xa[i], t3); + __ XOR(xa[i], xa[i], t0); + } + + for (int i = 0; i < 4; i++) { + __ revb_2h(xa[i], xa[i]); + } + for (int i = 0; i < 4; i++) { + __ rotri_w(xa[i], xa[i], 16); + } + for (int i = 0; i < 4; i++) { + __ st_w(xa[i], dst, 4 * i); + } + + if (cbc) { + __ move(key, keyold); + __ addi_d(src, src, 16); + __ addi_d(dst, dst, 16); + __ blt(src, srcend, loop); + + for (int i = 0; i < 4; i++) { + __ st_w(xa[i], rve, 4 * i); + } + + __ ld_d(S3, SP, 3 * wordSize); + __ ld_d(S2, SP, 2 * wordSize); + __ ld_d(S1, SP, 1 * wordSize); + __ ld_d(S0, SP, 0 * wordSize); + __ addi_d(SP, SP, 4 * wordSize); + + __ bind(done); + __ move(A0, srclen); + } + + __ jr(RA); + + return start; + } + + // Arguments: + // + // Inputs: + // A0 - source byte array address + // A1 - destination byte array address + // A2 - K (key) in little endian int array + // A3 - r vector byte array address + // A4 - input length + // + // Output: + // A0 - input length + // + address generate_aescrypt_decryptBlock(bool cbc) { + static const uint32_t rt_consts[256] = { + 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, + 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, + 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, + 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, + 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, + 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, + 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, + 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, + 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, + 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, + 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, + 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, + 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, + 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, + 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, + 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, + 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, + 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, + 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, + 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, + 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, + 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, + 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, + 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, + 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, + 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, + 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, + 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, + 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, + 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, + 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, + 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, + 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, + 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, + 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, + 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, + 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, + 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, + 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, + 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, + 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, + 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, + 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, + 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, + 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, + 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, + 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, + 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, + 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, + 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, + 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, + 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, + 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, + 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, + 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, + 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, + 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, + 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, + 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, + 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, + 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, + 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, + 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, + 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 + }; + static const uint8_t rsb_consts[256] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, + 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, + 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, + 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, + 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, + 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, + 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, + 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, + 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, + 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, + 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, + 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, + 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, + 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, + 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, + 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, + 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + }; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + + // Allocate registers + Register src = A0; + Register dst = A1; + Register key = A2; + Register rve = A3; + Register srclen = A4; + Register keylen = T8; + Register srcend = A5; + Register t0 = A6; + Register t1 = A7; + Register t2, t3, rtp, rvp; + Register xa[4] = { T0, T1, T2, T3 }; + Register ya[4] = { T4, T5, T6, T7 }; + + Label loop, tail, done; + address start = __ pc(); + + if (cbc) { + t2 = S0; + t3 = S1; + rtp = S2; + rvp = S3; + + __ beqz(srclen, done); + + __ addi_d(SP, SP, -4 * wordSize); + __ st_d(S3, SP, 3 * wordSize); + __ st_d(S2, SP, 2 * wordSize); + __ st_d(S1, SP, 1 * wordSize); + __ st_d(S0, SP, 0 * wordSize); + + __ add_d(srcend, src, srclen); + __ move(rvp, rve); + } else { + t2 = A3; + t3 = A4; + rtp = A5; + } + + __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)); + + __ bind(loop); + + // Round 1 + for (int i = 0; i < 4; i++) { + __ ld_w(xa[i], src, 4 * i); + } + for (int i = 0; i < 4; i++) { + __ ld_w(ya[i], key, 4 * (4 + i)); + } + for (int i = 0; i < 4; i++) { + __ revb_2h(xa[i], xa[i]); + } + for (int i = 0; i < 4; i++) { + __ rotri_w(xa[i], xa[i], 16); + } + for (int i = 0; i < 4; i++) { + __ XOR(xa[i], xa[i], ya[i]); + } + + __ li(rtp, (intptr_t)rt_consts); + + // Round 2 - (N-1) + for (int r = 0; r < 14; r++) { + Register *xp; + Register *yp; + + if (r & 1) { + xp = xa; + yp = ya; + } else { + xp = ya; + yp = xa; + } + + for (int i = 0; i < 4; i++) { + __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i)); + } + + for (int i = 0; i < 4; i++) { + __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0); + __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8); + __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16); + __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24); + __ slli_w(t0, t0, 2); + __ slli_w(t1, t1, 2); + __ slli_w(t2, t2, 2); + __ slli_w(t3, t3, 2); + __ ldx_w(t0, rtp, t0); + __ ldx_w(t1, rtp, t1); + __ ldx_w(t2, rtp, t2); + __ ldx_w(t3, rtp, t3); + __ rotri_w(t0, t0, 24); + __ rotri_w(t1, t1, 16); + __ rotri_w(t2, t2, 8); + __ XOR(xp[i], xp[i], t0); + __ XOR(t0, t1, t2); + __ XOR(xp[i], xp[i], t3); + __ XOR(xp[i], xp[i], t0); + } + + if (r == 8) { + // AES 128 + __ li(t0, 44); + __ beq(t0, keylen, tail); + } else if (r == 10) { + // AES 192 + __ li(t0, 52); + __ beq(t0, keylen, tail); + } + } + + __ bind(tail); + __ li(rtp, (intptr_t)rsb_consts); + + // Round N + for (int i = 0; i < 4; i++) { + __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0); + __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8); + __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16); + __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24); + __ ldx_bu(t0, rtp, t0); + __ ldx_bu(t1, rtp, t1); + __ ldx_bu(t2, rtp, t2); + __ ldx_bu(t3, rtp, t3); + __ ld_w(xa[i], key, 4 * i); + __ slli_w(t1, t1, 8); + __ slli_w(t2, t2, 16); + __ slli_w(t3, t3, 24); + __ XOR(xa[i], xa[i], t0); + __ XOR(t0, t1, t2); + __ XOR(xa[i], xa[i], t3); + __ XOR(xa[i], xa[i], t0); + } + + if (cbc) { + for (int i = 0; i < 4; i++) { + __ ld_w(ya[i], rvp, 4 * i); + } + } + for (int i = 0; i < 4; i++) { + __ revb_2h(xa[i], xa[i]); + } + for (int i = 0; i < 4; i++) { + __ rotri_w(xa[i], xa[i], 16); + } + if (cbc) { + for (int i = 0; i < 4; i++) { + __ XOR(xa[i], xa[i], ya[i]); + } + } + for (int i = 0; i < 4; i++) { + __ st_w(xa[i], dst, 4 * i); + } + + if (cbc) { + __ move(rvp, src); + __ addi_d(src, src, 16); + __ addi_d(dst, dst, 16); + __ blt(src, srcend, loop); + + __ ld_d(t0, src, -16); + __ ld_d(t1, src, -8); + __ st_d(t0, rve, 0); + __ st_d(t1, rve, 8); + + __ ld_d(S3, SP, 3 * wordSize); + __ ld_d(S2, SP, 2 * wordSize); + __ ld_d(S1, SP, 1 * wordSize); + __ ld_d(S0, SP, 0 * wordSize); + __ addi_d(SP, SP, 4 * wordSize); + + __ bind(done); + __ move(A0, srclen); + } + + __ jr(RA); + + return start; + } + + // Arguments: + // + // Inputs: + // A0 - byte[] source+offset + // A1 - int[] SHA.state + // A2 - int offset + // A3 - int limit + // + void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + Label keys, loop; + + // Keys + __ bind(keys); + __ emit_int32(0x5a827999); + __ emit_int32(0x6ed9eba1); + __ emit_int32(0x8f1bbcdc); + __ emit_int32(0xca62c1d6); + + // Allocate registers + Register t0 = T5; + Register t1 = T6; + Register t2 = T7; + Register t3 = T8; + Register buf = A0; + Register state = A1; + Register ofs = A2; + Register limit = A3; + Register ka[4] = { A4, A5, A6, A7 }; + Register sa[5] = { T0, T1, T2, T3, T4 }; + + // Entry + entry = __ pc(); + __ move(ofs, R0); + __ move(limit, R0); + + // Entry MB + entry_mb = __ pc(); + + // Allocate scratch space + __ addi_d(SP, SP, -64); + + // Load keys + __ lipc(t0, keys); + __ ld_w(ka[0], t0, 0); + __ ld_w(ka[1], t0, 4); + __ ld_w(ka[2], t0, 8); + __ ld_w(ka[3], t0, 12); + + __ bind(loop); + // Load arguments + __ ld_w(sa[0], state, 0); + __ ld_w(sa[1], state, 4); + __ ld_w(sa[2], state, 8); + __ ld_w(sa[3], state, 12); + __ ld_w(sa[4], state, 16); + + // 80 rounds of hashing + for (int i = 0; i < 80; i++) { + Register a = sa[(5 - (i % 5)) % 5]; + Register b = sa[(6 - (i % 5)) % 5]; + Register c = sa[(7 - (i % 5)) % 5]; + Register d = sa[(8 - (i % 5)) % 5]; + Register e = sa[(9 - (i % 5)) % 5]; + + if (i < 16) { + __ ld_w(t0, buf, i * 4); + __ revb_2h(t0, t0); + __ rotri_w(t0, t0, 16); + __ add_w(e, e, t0); + __ st_w(t0, SP, i * 4); + __ XOR(t0, c, d); + __ AND(t0, t0, b); + __ XOR(t0, t0, d); + } else { + __ ld_w(t0, SP, ((i - 3) & 0xF) * 4); + __ ld_w(t1, SP, ((i - 8) & 0xF) * 4); + __ ld_w(t2, SP, ((i - 14) & 0xF) * 4); + __ ld_w(t3, SP, ((i - 16) & 0xF) * 4); + __ XOR(t0, t0, t1); + __ XOR(t0, t0, t2); + __ XOR(t0, t0, t3); + __ rotri_w(t0, t0, 31); + __ add_w(e, e, t0); + __ st_w(t0, SP, (i & 0xF) * 4); + + if (i < 20) { + __ XOR(t0, c, d); + __ AND(t0, t0, b); + __ XOR(t0, t0, d); + } else if (i < 40 || i >= 60) { + __ XOR(t0, b, c); + __ XOR(t0, t0, d); + } else if (i < 60) { + __ OR(t0, c, d); + __ AND(t0, t0, b); + __ AND(t2, c, d); + __ OR(t0, t0, t2); + } + } + + __ rotri_w(b, b, 2); + __ add_w(e, e, t0); + __ add_w(e, e, ka[i / 20]); + __ rotri_w(t0, a, 27); + __ add_w(e, e, t0); + } + + // Save updated state + __ ld_w(t0, state, 0); + __ ld_w(t1, state, 4); + __ ld_w(t2, state, 8); + __ ld_w(t3, state, 12); + __ add_w(sa[0], sa[0], t0); + __ ld_w(t0, state, 16); + __ add_w(sa[1], sa[1], t1); + __ add_w(sa[2], sa[2], t2); + __ add_w(sa[3], sa[3], t3); + __ add_w(sa[4], sa[4], t0); + __ st_w(sa[0], state, 0); + __ st_w(sa[1], state, 4); + __ st_w(sa[2], state, 8); + __ st_w(sa[3], state, 12); + __ st_w(sa[4], state, 16); + + __ addi_w(ofs, ofs, 64); + __ addi_d(buf, buf, 64); + __ bge(limit, ofs, loop); + __ move(V0, ofs); // return ofs + + __ addi_d(SP, SP, 64); + __ jr(RA); + } + + // Arguments: + // + // Inputs: + // A0 - byte[] source+offset + // A1 - int[] SHA.state + // A2 - int offset + // A3 - int limit + // + void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) { + static const uint32_t round_consts[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, + }; + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + Label loop; + + // Allocate registers + Register t0 = A4; + Register t1 = A5; + Register t2 = A6; + Register t3 = A7; + Register buf = A0; + Register state = A1; + Register ofs = A2; + Register limit = A3; + Register kptr = T8; + Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 }; + + // Entry + entry = __ pc(); + __ move(ofs, R0); + __ move(limit, R0); + + // Entry MB + entry_mb = __ pc(); + + // Allocate scratch space + __ addi_d(SP, SP, -64); + + // Load keys base address + __ li(kptr, (intptr_t)round_consts); + + __ bind(loop); + // Load state + __ ld_w(sa[0], state, 0); + __ ld_w(sa[1], state, 4); + __ ld_w(sa[2], state, 8); + __ ld_w(sa[3], state, 12); + __ ld_w(sa[4], state, 16); + __ ld_w(sa[5], state, 20); + __ ld_w(sa[6], state, 24); + __ ld_w(sa[7], state, 28); + + // Do 64 rounds of hashing + for (int i = 0; i < 64; i++) { + Register a = sa[(0 - i) & 7]; + Register b = sa[(1 - i) & 7]; + Register c = sa[(2 - i) & 7]; + Register d = sa[(3 - i) & 7]; + Register e = sa[(4 - i) & 7]; + Register f = sa[(5 - i) & 7]; + Register g = sa[(6 - i) & 7]; + Register h = sa[(7 - i) & 7]; + + if (i < 16) { + __ ld_w(t1, buf, i * 4); + __ revb_2h(t1, t1); + __ rotri_w(t1, t1, 16); + } else { + __ ld_w(t0, SP, ((i - 15) & 0xF) * 4); + __ ld_w(t1, SP, ((i - 16) & 0xF) * 4); + __ ld_w(t2, SP, ((i - 7) & 0xF) * 4); + __ add_w(t1, t1, t2); + __ rotri_w(t2, t0, 18); + __ srli_w(t3, t0, 3); + __ rotri_w(t0, t0, 7); + __ XOR(t2, t2, t3); + __ XOR(t0, t0, t2); + __ add_w(t1, t1, t0); + __ ld_w(t0, SP, ((i - 2) & 0xF) * 4); + __ rotri_w(t2, t0, 19); + __ srli_w(t3, t0, 10); + __ rotri_w(t0, t0, 17); + __ XOR(t2, t2, t3); + __ XOR(t0, t0, t2); + __ add_w(t1, t1, t0); + } + + __ rotri_w(t2, e, 11); + __ rotri_w(t3, e, 25); + __ rotri_w(t0, e, 6); + __ XOR(t2, t2, t3); + __ XOR(t0, t0, t2); + __ XOR(t2, g, f); + __ ld_w(t3, kptr, i * 4); + __ AND(t2, t2, e); + __ XOR(t2, t2, g); + __ add_w(t0, t0, t2); + __ add_w(t0, t0, t3); + __ add_w(h, h, t1); + __ add_w(h, h, t0); + __ add_w(d, d, h); + __ rotri_w(t2, a, 13); + __ rotri_w(t3, a, 22); + __ rotri_w(t0, a, 2); + __ XOR(t2, t2, t3); + __ XOR(t0, t0, t2); + __ add_w(h, h, t0); + __ OR(t0, c, b); + __ AND(t2, c, b); + __ AND(t0, t0, a); + __ OR(t0, t0, t2); + __ add_w(h, h, t0); + __ st_w(t1, SP, (i & 0xF) * 4); + } + + // Add to state + __ ld_w(t0, state, 0); + __ ld_w(t1, state, 4); + __ ld_w(t2, state, 8); + __ ld_w(t3, state, 12); + __ add_w(sa[0], sa[0], t0); + __ add_w(sa[1], sa[1], t1); + __ add_w(sa[2], sa[2], t2); + __ add_w(sa[3], sa[3], t3); + __ ld_w(t0, state, 16); + __ ld_w(t1, state, 20); + __ ld_w(t2, state, 24); + __ ld_w(t3, state, 28); + __ add_w(sa[4], sa[4], t0); + __ add_w(sa[5], sa[5], t1); + __ add_w(sa[6], sa[6], t2); + __ add_w(sa[7], sa[7], t3); + __ st_w(sa[0], state, 0); + __ st_w(sa[1], state, 4); + __ st_w(sa[2], state, 8); + __ st_w(sa[3], state, 12); + __ st_w(sa[4], state, 16); + __ st_w(sa[5], state, 20); + __ st_w(sa[6], state, 24); + __ st_w(sa[7], state, 28); + + __ addi_w(ofs, ofs, 64); + __ addi_d(buf, buf, 64); + __ bge(limit, ofs, loop); + __ move(V0, ofs); // return ofs + + __ addi_d(SP, SP, 64); + __ jr(RA); + } + + // Do NOT delete this node which stands for stub routine placeholder + address generate_updateBytesCRC32() { + assert(UseCRC32Intrinsics, "need CRC32 instructions support"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); + + address start = __ pc(); + + const Register crc = A0; // crc + const Register buf = A1; // source java byte array address + const Register len = A2; // length + const Register tmp = A3; + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ kernel_crc32(crc, buf, len, tmp); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ jr(RA); + + return start; + } + + // add a function to implement SafeFetch32 and SafeFetchN + void generate_safefetch(const char* name, int size, address* entry, + address* fault_pc, address* continuation_pc) { + // safefetch signatures: + // int SafeFetch32(int* adr, int errValue); + // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); + // + // arguments: + // A0 = adr + // A1 = errValue + // + // result: + // PPC_RET = *adr or errValue + StubCodeMark mark(this, "StubRoutines", name); + + // Entry point, pc or function descriptor. + *entry = __ pc(); + + // Load *adr into A1, may fault. + *fault_pc = __ pc(); + switch (size) { + case 4: + // int32_t + __ ld_w(A1, A0, 0); + break; + case 8: + // int64_t + __ ld_d(A1, A0, 0); + break; + default: + ShouldNotReachHere(); + } + + // return errValue or *adr + *continuation_pc = __ pc(); + __ add_d(V0, A1, R0); + __ jr(RA); + } + + +#undef __ +#define __ masm-> + + // Continuation point for throwing of implicit exceptions that are + // not handled in the current activation. Fabricates an exception + // oop and initiates normal exception dispatching in this + // frame. Since we need to preserve callee-saved values (currently + // only for C2, but done for C1 as well) we need a callee-saved oop + // map and therefore have to make these stubs into RuntimeStubs + // rather than BufferBlobs. If the compiler needs all registers to + // be preserved between the fault point and the exception handler + // then it must assume responsibility for that in + // AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other + // implicit exceptions (e.g., NullPointerException or + // AbstractMethodError on entry) are either at call sites or + // otherwise assume that stack unwinding will be initiated, so + // caller saved registers were assumed volatile in the compiler. + address generate_throw_exception(const char* name, + address runtime_entry, + bool restore_saved_exception_pc) { + // Information about frame layout at time of blocking runtime call. + // Note that we only have to preserve callee-saved registers since + // the compilers are responsible for supplying a continuation point + // if they expect all registers to be preserved. + enum layout { + thread_off, // last_java_sp + S7_off, // callee saved register sp + 1 + S6_off, // callee saved register sp + 2 + S5_off, // callee saved register sp + 3 + S4_off, // callee saved register sp + 4 + S3_off, // callee saved register sp + 5 + S2_off, // callee saved register sp + 6 + S1_off, // callee saved register sp + 7 + S0_off, // callee saved register sp + 8 + FP_off, + ret_address, + framesize + }; + + int insts_size = 2048; + int locs_size = 32; + + // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, + // NULL, NULL, NULL, false, NULL, name, false); + CodeBuffer code (name , insts_size, locs_size); + OopMapSet* oop_maps = new OopMapSet(); + MacroAssembler* masm = new MacroAssembler(&code); + + address start = __ pc(); + + // This is an inlined and slightly modified version of call_VM + // which has the ability to fetch the return PC out of + // thread-local storage and also sets up last_Java_sp slightly + // differently than the real call_VM +#ifndef OPT_THREAD + Register java_thread = TREG; + __ get_thread(java_thread); +#else + Register java_thread = TREG; +#endif + if (restore_saved_exception_pc) { + __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); + } + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog + __ st_d(S0, SP, S0_off * wordSize); + __ st_d(S1, SP, S1_off * wordSize); + __ st_d(S2, SP, S2_off * wordSize); + __ st_d(S3, SP, S3_off * wordSize); + __ st_d(S4, SP, S4_off * wordSize); + __ st_d(S5, SP, S5_off * wordSize); + __ st_d(S6, SP, S6_off * wordSize); + __ st_d(S7, SP, S7_off * wordSize); + + int frame_complete = __ pc() - start; + // push java thread (becomes first argument of C function) + __ st_d(java_thread, SP, thread_off * wordSize); + if (java_thread != A0) + __ move(A0, java_thread); + + // Set up last_Java_sp and last_Java_fp + Label before_call; + address the_pc = __ pc(); + __ bind(before_call); + __ set_last_Java_frame(java_thread, SP, FP, before_call); + // Align stack + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + // Call runtime + // TODO: confirm reloc + __ call(runtime_entry, relocInfo::runtime_call_type); + // Generate oop map + OopMap* map = new OopMap(framesize, 0); + oop_maps->add_gc_map(the_pc - start, map); + + // restore the thread (cannot use the pushed argument since arguments + // may be overwritten by C code generated by an optimizing compiler); + // however can use the register value directly if it is callee saved. +#ifndef OPT_THREAD + __ get_thread(java_thread); +#endif + + __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + __ reset_last_Java_frame(java_thread, true); + + // Restore callee save registers. This must be done after resetting the Java frame + __ ld_d(S0, SP, S0_off * wordSize); + __ ld_d(S1, SP, S1_off * wordSize); + __ ld_d(S2, SP, S2_off * wordSize); + __ ld_d(S3, SP, S3_off * wordSize); + __ ld_d(S4, SP, S4_off * wordSize); + __ ld_d(S5, SP, S5_off * wordSize); + __ ld_d(S6, SP, S6_off * wordSize); + __ ld_d(S7, SP, S7_off * wordSize); + + // discard arguments + __ move(SP, FP); // epilog + __ pop(FP); + // check for pending exceptions +#ifdef ASSERT + Label L; + __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, L); + __ should_not_reach_here(); + __ bind(L); +#endif //ASSERT + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + + RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, + &code, + frame_complete, + framesize, + oop_maps, false); + return stub->entry_point(); + } + + class MontgomeryMultiplyGenerator : public MacroAssembler { + + Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm, + Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj; + + bool _squaring; + + public: + MontgomeryMultiplyGenerator (Assembler *as, bool squaring) + : MacroAssembler(as->code()), _squaring(squaring) { + + // Register allocation + + Register reg = A0; + Pa_base = reg; // Argument registers: + if (squaring) + Pb_base = Pa_base; + else + Pb_base = ++reg; + Pn_base = ++reg; + Rlen = ++reg; + inv = ++reg; + Rlen2 = inv; // Reuse inv + Pm_base = ++reg; + + // Working registers: + Ra = ++reg; // The current digit of a, b, n, and m. + Rb = ++reg; + Rm = ++reg; + Rn = ++reg; + + Iam = ++reg; // Index to the current/next digit of a, b, n, and m. + Ibn = ++reg; + + t0 = ++reg; // Three registers which form a + t1 = ++reg; // triple-precision accumuator. + t2 = ++reg; + + Ri = ++reg; // Inner and outer loop indexes. + Rj = ++reg; + + if (squaring) { + Rhi_ab = ++reg; // Product registers: low and high parts + reg = S0; + Rlo_ab = ++reg; // of a*b and m*n. + } else { + reg = S0; + Rhi_ab = reg; // Product registers: low and high parts + Rlo_ab = ++reg; // of a*b and m*n. + } + + Rhi_mn = ++reg; + Rlo_mn = ++reg; + } + + private: + void enter() { + addi_d(SP, SP, -6 * wordSize); + st_d(FP, SP, 0 * wordSize); + move(FP, SP); + } + + void leave() { + addi_d(T0, FP, 6 * wordSize); + ld_d(FP, FP, 0 * wordSize); + move(SP, T0); + } + + void save_regs() { + if (!_squaring) + st_d(Rhi_ab, FP, 5 * wordSize); + st_d(Rlo_ab, FP, 4 * wordSize); + st_d(Rhi_mn, FP, 3 * wordSize); + st_d(Rlo_mn, FP, 2 * wordSize); + st_d(Pm_base, FP, 1 * wordSize); + } + + void restore_regs() { + if (!_squaring) + ld_d(Rhi_ab, FP, 5 * wordSize); + ld_d(Rlo_ab, FP, 4 * wordSize); + ld_d(Rhi_mn, FP, 3 * wordSize); + ld_d(Rlo_mn, FP, 2 * wordSize); + ld_d(Pm_base, FP, 1 * wordSize); + } + + template + void unroll_2(Register count, T block, Register tmp) { + Label loop, end, odd; + andi(tmp, count, 1); + bnez(tmp, odd); + beqz(count, end); + align(16); + bind(loop); + (this->*block)(); + bind(odd); + (this->*block)(); + addi_w(count, count, -2); + blt(R0, count, loop); + bind(end); + } + + template + void unroll_2(Register count, T block, Register d, Register s, Register tmp) { + Label loop, end, odd; + andi(tmp, count, 1); + bnez(tmp, odd); + beqz(count, end); + align(16); + bind(loop); + (this->*block)(d, s, tmp); + bind(odd); + (this->*block)(d, s, tmp); + addi_w(count, count, -2); + blt(R0, count, loop); + bind(end); + } + + void acc(Register Rhi, Register Rlo, + Register t0, Register t1, Register t2, Register t, Register c) { + add_d(t0, t0, Rlo); + OR(t, t1, Rhi); + sltu(c, t0, Rlo); + add_d(t1, t1, Rhi); + add_d(t1, t1, c); + sltu(c, t1, t); + add_d(t2, t2, c); + } + + void pre1(Register i) { + block_comment("pre1"); + // Iam = 0; + // Ibn = i; + + slli_w(Ibn, i, LogBytesPerWord); + + // Ra = Pa_base[Iam]; + // Rb = Pb_base[Ibn]; + // Rm = Pm_base[Iam]; + // Rn = Pn_base[Ibn]; + + ld_d(Ra, Pa_base, 0); + ldx_d(Rb, Pb_base, Ibn); + ld_d(Rm, Pm_base, 0); + ldx_d(Rn, Pn_base, Ibn); + + move(Iam, R0); + + // Zero the m*n result. + move(Rhi_mn, R0); + move(Rlo_mn, R0); + } + + // The core multiply-accumulate step of a Montgomery + // multiplication. The idea is to schedule operations as a + // pipeline so that instructions with long latencies (loads and + // multiplies) have time to complete before their results are + // used. This most benefits in-order implementations of the + // architecture but out-of-order ones also benefit. + void step() { + block_comment("step"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = Pa_base[++Iam]; + // Rb = Pb_base[--Ibn]; + addi_d(Iam, Iam, wordSize); + addi_d(Ibn, Ibn, -wordSize); + mul_d(Rlo_ab, Ra, Rb); + mulh_du(Rhi_ab, Ra, Rb); + acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the + // previous iteration. + ldx_d(Ra, Pa_base, Iam); + ldx_d(Rb, Pb_base, Ibn); + + // MACC(Rm, Rn, t0, t1, t2); + // Rm = Pm_base[Iam]; + // Rn = Pn_base[Ibn]; + mul_d(Rlo_mn, Rm, Rn); + mulh_du(Rhi_mn, Rm, Rn); + acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn); + ldx_d(Rm, Pm_base, Iam); + ldx_d(Rn, Pn_base, Ibn); + } + + void post1() { + block_comment("post1"); + + // MACC(Ra, Rb, t0, t1, t2); + mul_d(Rlo_ab, Ra, Rb); + mulh_du(Rhi_ab, Ra, Rb); + acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n + acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb); + + // Pm_base[Iam] = Rm = t0 * inv; + mul_d(Rm, t0, inv); + stx_d(Rm, Pm_base, Iam); + + // MACC(Rm, Rn, t0, t1, t2); + // t0 = t1; t1 = t2; t2 = 0; + mulh_du(Rhi_mn, Rm, Rn); + +#ifndef PRODUCT + // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply"); + { + mul_d(Rlo_mn, Rm, Rn); + add_d(Rlo_mn, t0, Rlo_mn); + Label ok; + beqz(Rlo_mn, ok); { + stop("broken Montgomery multiply"); + } bind(ok); + } +#endif + + // We have very carefully set things up so that + // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate + // the lower half of Rm * Rn because we know the result already: + // it must be -t0. t0 + (-t0) must generate a carry iff + // t0 != 0. So, rather than do a mul and an adds we just set + // the carry flag iff t0 is nonzero. + // + // mul_d(Rlo_mn, Rm, Rn); + // add_d(t0, t0, Rlo_mn); + OR(Ra, t1, Rhi_mn); + sltu(Rb, R0, t0); + add_d(t0, t1, Rhi_mn); + add_d(t0, t0, Rb); + sltu(Rb, t0, Ra); + add_d(t1, t2, Rb); + move(t2, R0); + } + + void pre2(Register i, Register len) { + block_comment("pre2"); + + // Rj == i-len + sub_w(Rj, i, len); + + // Iam = i - len; + // Ibn = len; + slli_w(Iam, Rj, LogBytesPerWord); + slli_w(Ibn, len, LogBytesPerWord); + + // Ra = Pa_base[++Iam]; + // Rb = Pb_base[--Ibn]; + // Rm = Pm_base[++Iam]; + // Rn = Pn_base[--Ibn]; + addi_d(Iam, Iam, wordSize); + addi_d(Ibn, Ibn, -wordSize); + + ldx_d(Ra, Pa_base, Iam); + ldx_d(Rb, Pb_base, Ibn); + ldx_d(Rm, Pm_base, Iam); + ldx_d(Rn, Pn_base, Ibn); + + move(Rhi_mn, R0); + move(Rlo_mn, R0); + } + + void post2(Register i, Register len) { + block_comment("post2"); + + sub_w(Rj, i, len); + slli_w(Iam, Rj, LogBytesPerWord); + + add_d(t0, t0, Rlo_mn); // The pending m*n, low part + + // As soon as we know the least significant digit of our result, + // store it. + // Pm_base[i-len] = t0; + stx_d(t0, Pm_base, Iam); + + // t0 = t1; t1 = t2; t2 = 0; + OR(Ra, t1, Rhi_mn); + sltu(Rb, t0, Rlo_mn); + add_d(t0, t1, Rhi_mn); // The pending m*n, high part + add_d(t0, t0, Rb); + sltu(Rb, t0, Ra); + add_d(t1, t2, Rb); + move(t2, R0); + } + + // A carry in t0 after Montgomery multiplication means that we + // should subtract multiples of n from our result in m. We'll + // keep doing that until there is no carry. + void normalize(Register len) { + block_comment("normalize"); + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + Label loop, post, again; + Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now + beqz(t0, post); { + bind(again); { + move(i, R0); + move(b, R0); + slli_w(cnt, len, LogBytesPerWord); + align(16); + bind(loop); { + ldx_d(Rm, Pm_base, i); + ldx_d(Rn, Pn_base, i); + sltu(t, Rm, b); + sub_d(Rm, Rm, b); + sltu(b, Rm, Rn); + sub_d(Rm, Rm, Rn); + OR(b, b, t); + stx_d(Rm, Pm_base, i); + addi_w(i, i, BytesPerWord); + } blt(i, cnt, loop); + sub_d(t0, t0, b); + } bnez(t0, again); + } bind(post); + } + + // Move memory at s to d, reversing words. + // Increments d to end of copied memory + // Destroys tmp1, tmp2, tmp3 + // Preserves len + // Leaves s pointing to the address which was in d at start + void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) { + assert(tmp1 < S0 && tmp2 < S0, "register corruption"); + + alsl_d(s, len, s, LogBytesPerWord - 1); + move(tmp1, len); + unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2); + slli_w(s, len, LogBytesPerWord); + sub_d(s, d, s); + } + + // where + void reverse1(Register d, Register s, Register tmp) { + ld_d(tmp, s, -wordSize); + addi_d(s, s, -wordSize); + addi_d(d, d, wordSize); + rotri_d(tmp, tmp, 32); + st_d(tmp, d, -wordSize); + } + + public: + /** + * Fast Montgomery multiplication. The derivation of the + * algorithm is in A Cryptographic Library for the Motorola + * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + * + * Arguments: + * + * Inputs for multiplication: + * A0 - int array elements a + * A1 - int array elements b + * A2 - int array elements n (the modulus) + * A3 - int length + * A4 - int inv + * A5 - int array elements m (the result) + * + * Inputs for squaring: + * A0 - int array elements a + * A1 - int array elements n (the modulus) + * A2 - int length + * A3 - int inv + * A4 - int array elements m (the result) + * + */ + address generate_multiply() { + Label argh, nothing; + bind(argh); + stop("MontgomeryMultiply total_allocation must be <= 8192"); + + align(CodeEntryAlignment); + address entry = pc(); + + beqz(Rlen, nothing); + + enter(); + + // Make room. + sltui(Ra, Rlen, 513); + beqz(Ra, argh); + slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint))); + sub_d(Ra, SP, Ra); + + srli_w(Rlen, Rlen, 1); // length in longwords = len/2 + + { + // Copy input args, reversing as we go. We use Ra as a + // temporary variable. + reverse(Ra, Pa_base, Rlen, t0, t1); + if (!_squaring) + reverse(Ra, Pb_base, Rlen, t0, t1); + reverse(Ra, Pn_base, Rlen, t0, t1); + } + + // Push all call-saved registers and also Pm_base which we'll need + // at the end. + save_regs(); + +#ifndef PRODUCT + // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + { + ld_d(Rn, Pn_base, 0); + li(t0, -1); + mul_d(Rlo_mn, Rn, inv); + Label ok; + beq(Rlo_mn, t0, ok); { + stop("broken inverse in Montgomery multiply"); + } bind(ok); + } +#endif + + move(Pm_base, Ra); + + move(t0, R0); + move(t1, R0); + move(t2, R0); + + block_comment("for (int i = 0; i < len; i++) {"); + move(Ri, R0); { + Label loop, end; + bge(Ri, Rlen, end); + + bind(loop); + pre1(Ri); + + block_comment(" for (j = i; j; j--) {"); { + move(Rj, Ri); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); + } block_comment(" } // j"); + + post1(); + addi_w(Ri, Ri, 1); + blt(Ri, Rlen, loop); + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + move(Ri, Rlen); + slli_w(Rlen2, Rlen, 1); { + Label loop, end; + bge(Ri, Rlen2, end); + + bind(loop); + pre2(Ri, Rlen); + + block_comment(" for (j = len*2-i-1; j; j--) {"); { + sub_w(Rj, Rlen2, Ri); + addi_w(Rj, Rj, -1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab); + } block_comment(" } // j"); + + post2(Ri, Rlen); + addi_w(Ri, Ri, 1); + blt(Ri, Rlen2, loop); + bind(end); + } + block_comment("} // i"); + + normalize(Rlen); + + move(Ra, Pm_base); // Save Pm_base in Ra + restore_regs(); // Restore caller's Pm_base + + // Copy our result into caller's Pm_base + reverse(Pm_base, Ra, Rlen, t0, t1); + + leave(); + bind(nothing); + jr(RA); + + return entry; + } + // In C, approximately: + + // void + // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[], + // unsigned long Pn_base[], unsigned long Pm_base[], + // unsigned long inv, int len) { + // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + // unsigned long Ra, Rb, Rn, Rm; + // int i, Iam, Ibn; + + // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); + + // for (i = 0; i < len; i++) { + // int j; + + // Iam = 0; + // Ibn = i; + + // Ra = Pa_base[Iam]; + // Rb = Pb_base[Iam]; + // Rm = Pm_base[Ibn]; + // Rn = Pn_base[Ibn]; + + // int iters = i; + // for (j = 0; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = Pa_base[++Iam]; + // Rb = pb_base[--Ibn]; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = Pm_base[++Iam]; + // Rn = Pn_base[--Ibn]; + // } + + // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Pm_base[Iam] = Rm = t0 * inv; + // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + + // assert(t0 == 0, "broken Montgomery multiply"); + + // t0 = t1; t1 = t2; t2 = 0; + // } + + // for (i = len; i < 2*len; i++) { + // int j; + + // Iam = i - len; + // Ibn = len; + + // Ra = Pa_base[++Iam]; + // Rb = Pb_base[--Ibn]; + // Rm = Pm_base[++Iam]; + // Rn = Pn_base[--Ibn]; + + // int iters = len*2-i-1; + // for (j = i-len+1; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = Pa_base[++Iam]; + // Rb = Pb_base[--Ibn]; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = Pm_base[++Iam]; + // Rn = Pn_base[--Ibn]; + // } + + // Pm_base[i-len] = t0; + // t0 = t1; t1 = t2; t2 = 0; + // } + + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + // } + }; + + // Initialization + void generate_initial() { + // Generates all stubs and initializes the entry points + + //------------------------------------------------------------- + //----------------------------------------------------------- + // entry points that exist in all platforms + // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller + // than the disadvantage of having a much more complicated generator structure. + // See also comment in stubRoutines.hpp. + StubRoutines::_forward_exception_entry = generate_forward_exception(); + StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); + // is referenced by megamorphic call + StubRoutines::_catch_exception_entry = generate_catch_exception(); + + StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); + + StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); + } + + void generate_all() { + // Generates all stubs and initializes the entry points + + // These entry points require SharedInfo::stack0 to be set up in + // non-core builds and need to be relocatable, so they each + // fabricate a RuntimeStub internally. + StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); + + StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); + + StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); + + // entry points that are platform specific + + // support for verify_oop (must happen after universe_init) + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); +#ifndef CORE + // arraycopy stubs used by compilers + generate_arraycopy_stubs(); +#endif + + // Safefetch stubs. + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, + &StubRoutines::_safefetch32_fault_pc, + &StubRoutines::_safefetch32_continuation_pc); + generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, + &StubRoutines::_safefetchN_fault_pc, + &StubRoutines::_safefetchN_continuation_pc); + + if (UseMontgomeryMultiplyIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); + MontgomeryMultiplyGenerator g(_masm, false /* squaring */); + StubRoutines::_montgomeryMultiply = g.generate_multiply(); + } + + if (UseMontgomerySquareIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); + MontgomeryMultiplyGenerator g(_masm, true /* squaring */); + // We use generate_multiply() rather than generate_square() + // because it's faster for the sizes of modulus we care about. + StubRoutines::_montgomerySquare = g.generate_multiply(); + } + + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true); + } + + if (UseSHA1Intrinsics) { + generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB); + } + + if (UseSHA256Intrinsics) { + generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB); + } + + if (UseCRC32Intrinsics) { + // set table address before stub generation which use it + StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table; + StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); + } + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + if (all) { + generate_all(); + } else { + generate_initial(); + } + } +}; // end class declaration + +void StubGenerator_generate(CodeBuffer* code, bool all) { + StubGenerator g(code, all); +} diff --git a/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp new file mode 100644 index 00000000000..f0f3d55a4ea --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" + +// a description of how to extend it, see the stubRoutines.hpp file. + +//find the last fp value +address StubRoutines::la::_call_stub_compiled_return = NULL; + +/** + * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h + */ +juint StubRoutines::la::_crc_table[] = +{ + // Table 0 + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL, + + // Table 1 + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL, + + // Table 2 + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL, + + // Table 3 + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL, + // Constants for Neon CRC232 implementation + // k3 = 0x78ED02D5 = x^288 mod poly - bit reversed + // k4 = 0xED627DAE = x^256 mod poly - bit reversed + 0x78ED02D5UL, 0xED627DAEUL, // k4:k3 + 0xED78D502UL, 0x62EDAE7DUL, // byte swap + 0x02D578EDUL, 0x7DAEED62UL, // word swap + 0xD502ED78UL, 0xAE7D62EDUL, // byte swap of word swap +}; diff --git a/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp new file mode 100644 index 00000000000..d020a527e49 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP +#define CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to +// extend it. + +static bool returns_to_call_stub(address return_pc){ + return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return(); +} + +enum platform_dependent_constants { + code_size1 = 20000, // simply increase if too small (assembler will crash if too small) + code_size2 = 60000 // simply increase if too small (assembler will crash if too small) +}; + +class la { + friend class StubGenerator; + friend class VMStructs; + private: + // If we call compiled code directly from the call stub we will + // need to adjust the return back to the call stub to a specialized + // piece of code that can handle compiled results and cleaning the fpu + // stack. The variable holds that location. + static address _call_stub_compiled_return; + static juint _crc_table[]; + +public: + // Call back points for traps in compiled code + static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } + static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } + +}; + +#endif // CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp new file mode 100644 index 00000000000..213e69b0b21 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP + + protected: + + void generate_fixed_frame(bool native_call); + + // address generate_asm_interpreter_entry(bool synchronized); + +#endif // CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp new file mode 100644 index 00000000000..39e3ad7bb57 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP + + + protected: + + // Size of interpreter code. Increase if too small. Interpreter will + // fail with a guarantee ("not enough space for interpreter generation"); + // if too small. + // Run with +PrintInterpreter to get the VM to print out the size. + // Max size with JVMTI + // The sethi() instruction generates lots more instructions when shell + // stack limit is unlimited, so that's why this is much bigger. + const static int InterpreterCodeSize = 500 * K; + +#endif // CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp new file mode 100644 index 00000000000..b25086a3997 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp @@ -0,0 +1,2335 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +#ifndef CC_INTERP + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + + // fixed size of an interpreter frame: + int overhead = frame::sender_sp_offset - + frame::interpreter_frame_initial_sp_offset; + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params)*Interpreter::stackElementWords + + monitors * frame::interpreter_frame_monitor_size() + + temps* Interpreter::stackElementWords + extra_args; + + return size; +} + + +const int Interpreter::return_sentinel = 0xfeedbeed; +const int method_offset = frame::interpreter_frame_method_offset * wordSize; +const int bci_offset = frame::interpreter_frame_bcx_offset * wordSize; +const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; + +//----------------------------------------------------------------------------- + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + +#ifdef ASSERT + { + Label L; + __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp + __ bge(T1, R0, L); // check if frame is complete + __ stop("interpreter frame not set up"); + __ bind(L); + } +#endif // ASSERT + // Restore bcp under the assumption that the current frame is still + // interpreted + // FIXME: please change the func restore_bcp + // S0 is the conventional register for bcp + __ restore_bcp(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // throw exception + // FIXME: why do not pass parameter thread ? + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); + return entry; +} + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler( + const char* name) { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + __ li(A1, (long)name); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + __ empty_FPU_stack(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common( + const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + + // expression stack must be empty before entering the VM if an exception happened + __ empty_expression_stack(); + // setup parameters + __ li(A1, (long)name); + if (pass_oop) { + __ call_VM(V0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); + } else { + __ li(A2, (long)message); + __ call_VM(V0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); + } + // throw exception + __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); + return entry; +} + + +address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { + address entry = __ pc(); + // NULL last_sp until next java call + __ st_d(R0,Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); + __ dispatch_next(state); + return entry; +} + + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + address entry = __ pc(); + + // Restore stack bottom in case i2c adjusted stack + __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); + // and NULL it as marker that sp is now tos until next java call + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + __ restore_bcp(); + __ restore_locals(); + + // mdp: T8 + // ret: FSR + // tmp: T4 + if (state == atos) { + Register mdp = T8; + Register tmp = T4; + __ profile_return_type(mdp, FSR, tmp); + } + + + const Register cache = T4; + const Register index = T3; + __ get_cache_and_index_at_bcp(cache, index, 1, index_size); + + const Register flags = cache; + __ alsl_d(AT, index, cache, Address::times_ptr - 1); + __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); + __ alsl_d(SP, flags, SP, Interpreter::stackElementScale() - 1); + + __ dispatch_next(state, step); + + return entry; +} + + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, + int step) { + address entry = __ pc(); + // NULL last_sp until next java call + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ restore_bcp(); + __ restore_locals(); + // handle exceptions + { + Label L; + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + __ dispatch_next(state, step); + return entry; +} + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : // fall through + case T_LONG : // fall through + case T_VOID : i = 4; break; + case T_FLOAT : i = 5; break; + case T_DOUBLE : i = 6; break; + case T_OBJECT : // fall through + case T_ARRAY : i = 7; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, + "index out of bounds"); + return i; +} + + +address TemplateInterpreterGenerator::generate_result_handler_for( + BasicType type) { + address entry = __ pc(); + switch (type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ bstrpick_d(V0, V0, 15, 0); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : /* nothing to do */ break; + case T_FLOAT : /* nothing to do */ break; + case T_DOUBLE : /* nothing to do */ break; + case T_OBJECT : + { + __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + __ verify_oop(V0); // and verify it + } + break; + default : ShouldNotReachHere(); + } + __ jr(RA); // return from result handler + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for( + TosState state, + address runtime_entry) { + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + return entry; +} + + + +// Helpers for commoning out cases in the various type of method entries. +// + + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// Rmethod: method +// T3 : invocation counter +// +void InterpreterGenerator::generate_counter_incr( + Label* overflow, + Label* profile_method, + Label* profile_method_continue) { + Label done; + if (TieredCompilation) { + int increment = InvocationCounter::count_increment; + int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; + Label no_mdo; + if (ProfileInterpreter) { + // Are we profiling? + __ ld_d(FSR, Address(Rmethod, Method::method_data_offset())); + __ beq(FSR, R0, no_mdo); + // Increment counter in the MDO + const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); + __ beq(R0, R0, done); + } + __ bind(no_mdo); + // Increment counter in MethodCounters + const Address invocation_counter(FSR, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + __ get_method_counters(Rmethod, FSR, done); + __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); + __ bind(done); + } else { + const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + + __ get_method_counters(Rmethod, FSR, done); + + if (ProfileInterpreter) { // %%% Merge this into methodDataOop + __ ld_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); + __ addi_d(T4, T4, 1); + __ st_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); + } + // Update standard invocation counters + __ ld_w(T3, invocation_counter); + __ increment(T3, InvocationCounter::count_increment); + __ st_w(T3, invocation_counter); // save invocation count + + __ ld_w(FSR, backedge_counter); // load backedge counter + __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits + __ andr(FSR, FSR, AT); + + __ add_d(T3, T3, FSR); // add both counters + + if (ProfileInterpreter && profile_method != NULL) { + // Test to see if we should create a method data oop + if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) { + __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); + __ bne_far(AT, R0, *profile_method_continue); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); + __ ld_w(AT, AT, 0); + __ blt_far(T3, AT, *profile_method_continue, true /* signed */); + } + + // if no method data exists, go to profile_method + __ test_method_data_pointer(FSR, *profile_method); + } + + if (Assembler::is_simm(CompileThreshold, 12)) { + __ srli_w(AT, T3, InvocationCounter::count_shift); + __ slti(AT, AT, CompileThreshold); + __ beq_far(AT, R0, *overflow); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); + __ ld_w(AT, AT, 0); + __ bge_far(T3, AT, *overflow, true /* signed */); + } + + __ bind(done); + } +} + +void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { + + // Asm interpreter on entry + // S7 - locals + // S0 - bcp + // Rmethod - method + // FP - interpreter frame + + // On return (i.e. jump to entry_point) + // Rmethod - method + // RA - return address of interpreter caller + // tos - the last parameter to Java method + // SP - sender_sp + + // the bcp is valid if and only if it's not null + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), R0); + __ ld_d(Rmethod, FP, method_offset); + // Preserve invariant that S0/S7 contain bcp/locals of sender frame + __ b_far(*do_continue); +} + +// See if we've got enough room on the stack for locals plus overhead. +// The expression stack grows down incrementally, so the normal guard +// page mechanism will work for that. +// +// NOTE: Since the additional locals are also always pushed (wasn't +// obvious in generate_method_entry) so the guard should work for them +// too. +// +// Args: +// T2: number of additional locals this frame needs (what we must check) +// T0: Method* +// +void InterpreterGenerator::generate_stack_overflow_check(void) { + // see if we've got enough room on the stack for locals plus overhead. + // the expression stack grows down incrementally, so the normal guard + // page mechanism will work for that. + // + // Registers live on entry: + // + // T0: Method* + // T2: number of additional locals this frame needs (what we must check) + + // NOTE: since the additional locals are also always pushed (wasn't obvious in + // generate_method_entry) so the guard should work for them too. + // + + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + // total overhead size: entry_size + (saved fp thru expr stack bottom). + // be sure to change this if you add/subtract anything to/from the overhead area + const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) + + entry_size; + + const int page_size = os::vm_page_size(); + Label after_frame_check; + + // see if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining + // for the additional locals. + __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize); + __ bge(AT, T2, after_frame_check); + + // compute sp as if this were going to be the last frame on + // the stack before the red zone +#ifndef OPT_THREAD + Register thread = T1; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + + // locals + overhead, in bytes + __ slli_d(T3, T2, Interpreter::stackElementScale()); + __ addi_d(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 + +#ifdef ASSERT + Label stack_base_okay, stack_size_okay; + // verify that thread stack base is non-zero + __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); + __ bne(AT, R0, stack_base_okay); + __ stop("stack base is zero"); + __ bind(stack_base_okay); + // verify that thread stack size is non-zero + __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); + __ bne(AT, R0, stack_size_okay); + __ stop("stack size is zero"); + __ bind(stack_size_okay); +#endif + + // Add stack base to locals and subtract stack size + __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT + __ add_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 + __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT + __ sub_d(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 + + + // add in the redzone and yellow size + __ li(AT, (StackRedPages+StackYellowPages) * page_size); + __ add_d(T3, T3, AT); + + // check against the current stack bottom + __ blt(T3, SP, after_frame_check); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + __ move(SP, Rsender); + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); + + // all done with frame size check + __ bind(after_frame_check); +} + +// Allocate monitor and lock method (asm interpreter) +// Rmethod - Method* +void InterpreterGenerator::lock_method(void) { + // synchronize method + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + +#ifdef ASSERT + { Label L; + __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); + __ bne(T0, R0, L); + __ stop("method doesn't need synchronization"); + __ bind(L); + } +#endif // ASSERT + // get synchronization object + { + Label done; + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T2, T0, JVM_ACC_STATIC); + __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0)); + __ beq(T2, R0, done); + __ ld_d(T0, Rmethod, in_bytes(Method::const_offset())); + __ ld_d(T0, T0, in_bytes(ConstMethod::constants_offset())); + __ ld_d(T0, T0, ConstantPool::pool_holder_offset_in_bytes()); + __ ld_d(T0, T0, mirror_offset); + __ bind(done); + } + // add space for monitor & lock + __ addi_d(SP, SP, (-1) * entry_size); // add space for a monitor entry + __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + // set new monitor block top + __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object + // FIXME: I do not know what lock_object will do and what it will need + __ move(c_rarg0, SP); // object address + __ lock_object(c_rarg0); +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address InterpreterGenerator::generate_CRC32_update_entry() { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rmethod: Method* + // Rsender: senderSP must preserved for slow path + // SP: args + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + __ li(AT, SafepointSynchronize::_not_synchronized); + __ li(T8, (long)SafepointSynchronize::address_of_state()); + __ bne(T8, AT, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + const Register crc = A0; // crc + const Register val = A1; // source java byte value + const Register tbl = A2; // scratch + + // Arguments are reversed on java expression stack + __ ld_w(val, SP, 0); // byte value + __ ld_w(crc, SP, wordSize); // Initial CRC + + __ li(tbl, (long)StubRoutines::crc_table_addr()); + + __ nor(crc, crc, R0); // ~crc + __ update_byte_crc32(crc, val, tbl); + __ nor(crc, crc, R0); // ~crc + + // restore caller SP + __ move(SP, Rsender); + __ jr(RA); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + + (void) generate_native_entry(false); + + return entry; + } + return generate_native_entry(false); +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rmethod: Method* + // Rsender: senderSP must preserved for slow path + // SP: args + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + __ li(AT, SafepointSynchronize::_not_synchronized); + __ li(T8, (long)SafepointSynchronize::address_of_state()); + __ bne(T8, AT, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + const Register crc = A0; // crc + const Register buf = A1; // source java byte array address + const Register len = A2; // length + const Register tmp = A3; + + const Register off = len; // offset (never overlaps with 'len') + + // Arguments are reversed on java expression stack + // Calculate address of start element + __ ld_w(off, SP, wordSize); // int offset + __ ld_d(buf, SP, 2 * wordSize); // byte[] buf | long buf + __ add_d(buf, buf, off); // + offset + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ ld_w(crc, SP, 4 * wordSize); // long crc + } else { + __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ ld_w(crc, SP, 3 * wordSize); // long crc + } + + // Can now load 'len' since we're finished with 'off' + __ ld_w(len, SP, 0); // length + + __ kernel_crc32(crc, buf, len, tmp); + + // restore caller SP + __ move(SP, Rsender); + __ jr(RA); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + + (void) generate_native_entry(false); + + return entry; + } + return generate_native_entry(false); +} + +// Generate a fixed interpreter frame. This is identical setup for +// interpreted methods and for native methods hence the shared code. +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + + // [ local var m-1 ] <--- sp + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- T0(sender's sp) + // ... + // [ argument word 0 ] <--- S7 + + // initialize fixed part of activation frame + // sender's sp in Rsender + int i = 0; + int frame_size = 9; +#ifndef CORE + ++frame_size; +#endif + __ addi_d(SP, SP, (-frame_size) * wordSize); + __ st_d(RA, SP, (frame_size - 1) * wordSize); // save return address + __ st_d(FP, SP, (frame_size - 2) * wordSize); // save sender's fp + __ addi_d(FP, SP, (frame_size - 2) * wordSize); + __ st_d(Rsender, FP, (-++i) * wordSize); // save sender's sp + __ st_d(R0, FP,(-++i) * wordSize); //save last_sp as null + __ st_d(LVP, FP, (-++i) * wordSize); // save locals offset + __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop + __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase + __ st_d(Rmethod, FP, (-++i) * wordSize); // save Method* +#ifndef CORE + if (ProfileInterpreter) { + Label method_data_continue; + __ ld_d(AT, Rmethod, in_bytes(Method::method_data_offset())); + __ beq(AT, R0, method_data_continue); + __ addi_d(AT, AT, in_bytes(MethodData::data_offset())); + __ bind(method_data_continue); + __ st_d(AT, FP, (-++i) * wordSize); + } else { + __ st_d(R0, FP, (-++i) * wordSize); + } +#endif // !CORE + + __ ld_d(T2, Rmethod, in_bytes(Method::const_offset())); + __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset())); + __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes()); + __ st_d(T2, FP, (-++i) * wordSize); // set constant pool cache + if (native_call) { + __ st_d(R0, FP, (-++i) * wordSize); // no bcp + } else { + __ st_d(BCP, FP, (-++i) * wordSize); // set bcp + } + __ st_d(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom + assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); +} + +// End of helpers + +// Various method entries +//------------------------------------------------------------------------------------------------------------------------ +// +// + +// Call an accessor method (assuming it is resolved, otherwise drop +// into vanilla (slow path) entry +address InterpreterGenerator::generate_accessor_entry(void) { + // Rmethod: Method* + // V0: receiver (preserve for slow entry into asm interpreter) + // Rsender: senderSP must preserved for slow path, set SP to it on fast path + + address entry_point = __ pc(); + Label xreturn_path; + // do fastpath for resolved accessor methods + if (UseFastAccessorMethods) { + Label slow_path; + __ li(T2, SafepointSynchronize::address_of_state()); + __ ld_w(AT, T2, 0); + __ addi_d(AT, AT, -(SafepointSynchronize::_not_synchronized)); + __ bne(AT, R0, slow_path); + // Code: _aload_0, _(i|a)getfield, _(i|a)return or any rewrites thereof; + // parameter size = 1 + // Note: We can only use this code if the getfield has been resolved + // and if we don't have a null-pointer exception => check for + // these conditions first and use slow path if necessary. + // Rmethod: method + // V0: receiver + + // [ receiver ] <-- sp + __ ld_d(T0, SP, 0); + + // check if local 0 != NULL and read field + __ beq(T0, R0, slow_path); + __ ld_d(T2, Rmethod, in_bytes(Method::const_offset())); + __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset())); + // read first instruction word and extract bytecode @ 1 and index @ 2 + __ ld_d(T3, Rmethod, in_bytes(Method::const_offset())); + __ ld_w(T3, T3, in_bytes(ConstMethod::codes_offset())); + // Shift codes right to get the index on the right. + // The bytecode fetched looks like <0xb4><0x2a> + __ srli_d(T3, T3, 2 * BitsPerByte); + // FIXME: maybe it's wrong + __ slli_d(T3, T3, exact_log2(in_words(ConstantPoolCacheEntry::size()))); + __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes()); + + // T0: local 0 + // Rmethod: method + // V0: receiver - do not destroy since it is needed for slow path! + // T1: scratch use which register instead ? + // T3: constant pool cache index + // T2: constant pool cache + // Rsender: send's sp + // check if getfield has been resolved and read constant pool cache entry + // check the validity of the cache entry by testing whether _indices field + // contains Bytecode::_getfield in b1 byte. + assert(in_words(ConstantPoolCacheEntry::size()) == 4, "adjust shift below"); + + __ slli_d(T8, T3, Address::times_8); + __ li(T1, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::indices_offset())); + __ add_d(T1, T8, T1); + __ ldx_w(T1, T1, T2); + __ srli_d(T1, T1, 2 * BitsPerByte); + __ andi(T1, T1, 0xFF); + __ addi_d(T1, T1, (-1) * Bytecodes::_getfield); + __ bne(T1, R0, slow_path); + + // Note: constant pool entry is not valid before bytecode is resolved + + __ li(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + __ add_d(T1, T1, T8); + __ ldx_w(AT, T1, T2); + + __ li(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ add_d(T1, T1, T8); + __ ldx_w(T3, T1, T2); + + Label notByte, notBool, notShort, notChar, notObj; + + // Need to differentiate between igetfield, agetfield, bgetfield etc. + // because they are different sizes. + // Use the type from the constant pool cache + __ srli_w(T3, T3, ConstantPoolCacheEntry::tos_state_shift); + // Make sure we don't need to mask T3 for tosBits after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + // btos = 0 + __ add_d(T0, T0, AT); + __ bne(T3, R0, notByte); + + __ ld_b(V0, T0, 0); + __ b(xreturn_path); + + //ztos + __ bind(notByte); + __ addi_d(T1, T3, (-1) * ztos); + __ bne(T1, R0, notBool); + __ ld_b(V0, T0, 0); + __ b(xreturn_path); + + //stos + __ bind(notBool); + __ addi_d(T1, T3, (-1) * stos); + __ bne(T1, R0, notShort); + __ ld_h(V0, T0, 0); + __ b(xreturn_path); + + //ctos + __ bind(notShort); + __ addi_d(T1, T3, (-1) * ctos); + __ bne(T1, R0, notChar); + __ ld_hu(V0, T0, 0); + __ b(xreturn_path); + + //atos + __ bind(notChar); + __ addi_d(T1, T3, (-1) * atos); + __ bne(T1, R0, notObj); + //add for compressedoops + __ load_heap_oop(V0, Address(T0, 0)); + __ b(xreturn_path); + + //itos + __ bind(notObj); +#ifdef ASSERT + Label okay; + __ addi_d(T1, T3, (-1) * itos); + __ beq(T1, R0, okay); + __ stop("what type is this?"); + __ bind(okay); +#endif // ASSERT + __ ld_w(V0, T0, 0); + + __ bind(xreturn_path); + + // _ireturn/_areturn + //FIXME + __ move(SP, Rsender);//FIXME, set sender's fp to SP + __ jr(RA); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + (void) generate_normal_entry(false); + } else { + (void) generate_normal_entry(false); + } + return entry_point; +} + +// Method entry for java.lang.ref.Reference.get. +address InterpreterGenerator::generate_Reference_get_entry(void) { +#if INCLUDE_ALL_GCS + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code for G1 (or any SATB based GC), + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * In the G1 code we do not check whether we need to block for + // a safepoint. If G1 is enabled then we must execute the specialized + // code for Reference.get (except when the Reference object is null) + // so that we can log the value in the referent field with an SATB + // update buffer. + // If the code for the getfield template is modified so that the + // G1 pre-barrier code is executed when the current method is + // Reference.get() then going through the normal method entry + // will be fine. + // * The G1 code can, however, check the receiver object (the instance + // of java.lang.Reference) and jump to the slow path if null. If the + // Reference object is null then we obviously cannot fetch the referent + // and so we don't need to call the G1 pre-barrier. Thus we can use the + // regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_enty. + // + // Rmethod: Method* + + // Rsender: senderSP must preserve for slow path, set SP to it on fast path (Rsender) + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + if (UseG1GC) { + Label slow_path; + + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ld_d(V0, SP, 0); + + __ beq(V0, R0, slow_path); + + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + + // Load the value of the referent field. + const Address field_address(V0, referent_offset); + __ load_heap_oop(V0, field_address); + + __ push(RA); + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + __ g1_write_barrier_pre(noreg /* obj */, + V0 /* pre_val */, + TREG /* thread */, + Rmethod /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ pop(RA); + + __ add_d(SP, Rsender, R0); // set sp to sender sp + __ jr(RA); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + (void) generate_normal_entry(false); + + return entry; + } +#endif // INCLUDE_ALL_GCS + + // If G1 is not enabled then attempt to go through the accessor entry point + // Reference.get is an accessor + return generate_accessor_entry(); +} + +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +address InterpreterGenerator::generate_native_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls; + // Rsender: sender's sp + // Rmethod: Method* + address entry_point = __ pc(); + +#ifndef CORE + const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset())); +#endif + // get parameter size (always needed) + // the size in the java stack + __ ld_d(V0, Rmethod, in_bytes(Method::const_offset())); + __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); + + // native calls don't need the stack size check since they have no expression stack + // and the arguments are already on the stack and we only add a handful of words + // to the stack + + // Rmethod: Method* + // V0: size of parameters + // Layout of frame at this point + // + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + + // for natives the size of locals is zero + + // compute beginning of parameters (S7) + __ slli_d(LVP, V0, Address::times_8); + __ addi_d(LVP, LVP, (-1) * wordSize); + __ add_d(LVP, LVP, SP); + + + // add 2 zero-initialized slots for native calls + // 1 slot for native oop temp offset (setup via runtime) + // 1 slot for static native result handler3 (setup via runtime) + __ push2(R0, R0); + + // Layout of frame at this point + // [ method holder mirror ] <--- sp + // [ result type info ] + // [ argument word n-1 ] <--- T0 + // ... + // [ argument word 0 ] <--- LVP + + +#ifndef CORE + if (inc_counter) __ ld_w(T3, invocation_counter); // (pre-)fetch invocation count +#endif + + // initialize fixed part of activation frame + generate_fixed_frame(true); + // after this function, the layout of frame is as following + // + // [ monitor block top ] <--- sp ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- sender's sp + // ... + // [ argument word 0 ] <--- S7 + + + // make sure method is native & not abstract +#ifdef ASSERT + __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); + { + Label L; + __ andi(AT, T0, JVM_ACC_NATIVE); + __ bne(AT, R0, L); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ andi(AT, T0, JVM_ACC_ABSTRACT); + __ beq(AT, R0, L); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation will + // check this flag. + Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ li(AT, (int)true); + __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + +#ifndef CORE + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; + __ bind(continue_after_compile); +#endif // CORE + + bang_stack_shadow_pages(true); + + // reset the _do_not_unlock_if_synchronized flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); + __ beq(AT, R0, L); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // after method_lock, the layout of frame is as following + // + // [ monitor entry ] <--- sp + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // start execution +#ifdef ASSERT + { + Label L; + __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ beq(AT, SP, L); + __ stop("broken stack frame setup in interpreter in asm"); + __ bind(L); + } +#endif + + // jvmti/jvmpi support + __ notify_method_entry(); + + // work registers + const Register method = Rmethod; + //const Register thread = T2; + const Register t = T8; + + __ get_method(method); + __ verify_oop(method); + { + Label L, Lstatic; + __ ld_d(t,method,in_bytes(Method::const_offset())); + __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); + // LoongArch ABI: caller does not reserve space for the register auguments. + // A0 and A1(if needed) + __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(AT, AT, JVM_ACC_STATIC); + __ beq(AT, R0, Lstatic); + __ addi_d(t, t, 1); + __ bind(Lstatic); + __ addi_d(t, t, -7); + __ bge(R0, t, L); + __ slli_d(t, t, Address::times_8); + __ sub_d(SP, SP, t); + __ bind(L); + } + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + __ move(AT, SP); + // [ ] <--- sp + // ... (size of parameters - 8 ) + // [ monitor entry ] + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + // get signature handler + { + Label L; + __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); + __ bne(T4, R0, L); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), method); + __ get_method(method); + __ ld_d(T4, method, in_bytes(Method::signature_handler_offset())); + __ bind(L); + } + + // call signature handler + // FIXME: when change codes in InterpreterRuntime, note this point + // from: begin of parameters + assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); + // to: current sp + assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); + // temp: T3 + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); + + __ jalr(T4); + __ get_method(method); + + // + // if native function is static, and its second parameter has type length of double word, + // and first parameter has type length of word, we have to reserve one word + // for the first parameter, according to LoongArch abi. + // if native function is not static, and its third parameter has type length of double word, + // and second parameter has type length of word, we have to reserve one word for the second + // parameter. + // + + + // result handler is in V0 + // set result handler + __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); + +#define FIRSTPARA_SHIFT_COUNT 5 +#define SECONDPARA_SHIFT_COUNT 9 +#define THIRDPARA_SHIFT_COUNT 13 +#define PARA_MASK 0xf + + // pass mirror handle if static call + { + Label L; + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ld_w(t, method, in_bytes(Method::access_flags_offset())); + __ andi(AT, t, JVM_ACC_STATIC); + __ beq(AT, R0, L); + + // get mirror + __ ld_d(t, method, in_bytes(Method:: const_offset())); + __ ld_d(t, t, in_bytes(ConstMethod::constants_offset())); //?? + __ ld_d(t, t, ConstantPool::pool_holder_offset_in_bytes()); + __ ld_d(t, t, mirror_offset); + // copy mirror into activation frame + //__ st_w(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + // pass handle to mirror + __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + __ move(A1, t); + __ bind(L); + } + + // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) + // [ ] | + // ... size of parameters(or +1) | + // [ monitor entry ] | + // ... | + // [ monitor entry ] | + // [ monitor block top ] ( the top monitor entry ) | + // [ byte code pointer (0) ] (if native, bcp = 0) | + // [ constant pool cache ] | + // [ Method* ] | + // [ locals offset ] | + // [ sender's sp ] | + // [ sender's fp ] | + // [ return address ] <--- fp | + // [ method holder mirror ] <----------------------------| + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // get native function entry point + { Label L; + __ ld_d(T4, method, in_bytes(Method::native_function_offset())); + __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); + __ bne(T6, T4, L); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); + __ get_method(method); + __ verify_oop(method); + __ ld_d(T4, method, in_bytes(Method::native_function_offset())); + __ bind(L); + } + + // pass JNIEnv + // native function in T4 +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset())); + __ move(A0, t); + // [ jni environment ] <--- sp + // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) + // [ ] | + // ... size of parameters | + // [ monitor entry ] | + // ... | + // [ monitor entry ] | + // [ monitor block top ] ( the top monitor entry ) | + // [ byte code pointer (0) ] (if native, bcp = 0) | + // [ constant pool cache ] | + // [ Method* ] | + // [ locals offset ] | + // [ sender's sp ] | + // [ sender's fp ] | + // [ return address ] <--- fp | + // [ method holder mirror ] <----------------------------| + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // Set the last Java PC in the frame anchor to be the return address from + // the call to the native method: this will allow the debugger to + // generate an accurate stack trace. + Label native_return; + __ set_last_Java_frame(thread, SP, FP, native_return); + + // change thread state +#ifdef ASSERT + { + Label L; + __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset())); + __ addi_d(t, t, (-1) * _thread_in_Java); + __ beq(t, R0, L); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif + + __ li(t, _thread_in_native); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); + + // call native method + __ jalr(T4); + __ bind(native_return); + // result potentially in V0 or F0 + + + // via _last_native_pc and not via _last_jave_sp + // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. + // If the order changes or anything else is added to the stack the code in + // interpreter_frame_result will have to be changed. + //FIXME, should modify here + // save return value to keep the value from being destroyed by other calls + __ push(dtos); + __ push(ltos); + + // change thread state + __ get_thread(thread); + __ li(t, _thread_in_native_trans); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); + + if(os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ membar(__ AnyAny); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(thread, A0); + } + } + + // check for safepoint operation in progress and/or pending suspend requests + { Label Continue; + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are + // preserved and correspond to the bcp/locals pointers. So we do a runtime call + // by hand. + // + Label L; + __ li(AT, SafepointSynchronize::address_of_state()); + __ ld_w(AT, AT, 0); + __ bne(AT, R0, L); + __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); + __ beq(AT, R0, Continue); + __ bind(L); + __ move(A0, thread); + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), + relocInfo::runtime_call_type); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + //add for compressedoops + __ reinit_heapbase(); + __ bind(Continue); + } + + // change thread state + __ li(t, _thread_in_Java); + if (os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release + } + __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset())); + __ reset_last_Java_frame(thread, true); + + // reset handle block + __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset())); + __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes()); + + // If result was an oop then unbox and save it in the frame + { + Label no_oop; + //FIXME, addi only support 12-bit imeditate + __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); + __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); + __ bne(AT, T0, no_oop); + __ pop(ltos); + // Unbox oop result, e.g. JNIHandles::resolve value. + __ resolve_jobject(V0, thread, T4); + __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); + // keep stack depth as expected by pushing oop which will eventually be discarded + __ push(ltos); + __ bind(no_oop); + } + { + Label no_reguard; + __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); + __ li(AT,(int) JavaThread::stack_guard_yellow_disabled); + __ bne(t, AT, no_reguard); + __ pushad(); + __ move(S5_heapbase, SP); + __ li(AT, -StackAlignmentInBytes); + __ andr(SP, SP, AT); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); + __ move(SP, S5_heapbase); + __ popad(); + //add for compressedoops + __ reinit_heapbase(); + __ bind(no_reguard); + } + // restore BCP to have legal interpreter frame, + // i.e., bci == 0 <=> BCP == code_base() + // Can't call_VM until bcp is within reasonable. + __ get_method(method); // method is junk from thread_in_native to now. + __ verify_oop(method); + __ ld_d(BCP, method, in_bytes(Method::const_offset())); + __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); + // handle exceptions (exception handling will handle unlocking!) + { + Label L; + __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(t, R0, L); + // Note: At some point we may want to unify this with the code used in + // call_VM_base(); + // i.e., we should use the StubRoutines::forward_exception code. For now this + // doesn't work here because the sp is not correctly set at this point. + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + // do unlocking if necessary + { + Label L; + __ ld_w(t, method, in_bytes(Method::access_flags_offset())); + __ andi(t, t, JVM_ACC_SYNCHRONIZED); + __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); + __ beq(t, R0, L); + // the code below should be shared with interpreter macro assembler implementation + { + Label unlock; + // BasicObjectLock will be first in list, + // since this is a synchronized method. However, need + // to check that the object has not been unlocked by + // an explicit monitorexit bytecode. + // address of first monitor + + __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ bne(t, R0, unlock); + + // Entry already unlocked, need to throw exception + __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + __ bind(unlock); + __ unlock_object(c_rarg0); + } + __ bind(L); + } + + // jvmti/jvmpi support + // Note: This must happen _after_ handling/throwing any exceptions since + // the exception handler code notifies the runtime of method exits + // too. If this happens before, method entry/exit notifications are + // not properly paired (was bug - gri 11/22/99). + __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); + + // restore potential result in V0, + // call result handler to restore potential result in ST0 & handle result + + __ pop(ltos); + __ pop(dtos); + + __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); + __ jalr(t); + + + // remove activation + __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp + __ ld_d(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address + __ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp + __ jr(RA); + +#ifndef CORE + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(&continue_after_compile); + // entry_point is the beginning of this + // function and checks again for compiled code + } +#endif + return entry_point; +} + +// +// Generic interpreted method entry to (asm) interpreter +// +// Layout of frame just at the entry +// +// [ argument word n-1 ] <--- sp +// ... +// [ argument word 0 ] +// assume Method* in Rmethod before call this method. +// prerequisites to the generated stub : the callee Method* in Rmethod +// note you must save the caller bcp before call the generated stub +// +address InterpreterGenerator::generate_normal_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls; + + // Rmethod: Method* + // Rsender: sender 's sp + address entry_point = __ pc(); + + const Address invocation_counter(Rmethod, + in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); + + // get parameter size (always needed) + __ ld_d(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod + __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); + + // Rmethod: Method* + // V0: size of parameters + // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i + // get size of locals in words to T2 + __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); + // T2 = no. of additional locals, locals include parameters + __ sub_d(T2, T2, V0); + + // see if we've got enough room on the stack for locals plus overhead. + // Layout of frame at this point + // + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + generate_stack_overflow_check(); + // after this function, the layout of frame does not change + + // compute beginning of parameters (LVP) + __ slli_d(LVP, V0, LogBytesPerWord); + __ addi_d(LVP, LVP, (-1) * wordSize); + __ add_d(LVP, LVP, SP); + + // T2 - # of additional locals + // allocate space for locals + // explicitly initialize locals + { + Label exit, loop; + __ beq(T2, R0, exit); + + __ bind(loop); + __ addi_d(SP, SP, (-1) * wordSize); + __ addi_d(T2, T2, -1); // until everything initialized + __ st_d(R0, SP, 0); // initialize local variables + __ bne(T2, R0, loop); + + __ bind(exit); + } + + // + // [ local var m-1 ] <--- sp + // ... + // [ local var 0 ] + // [ argument word n-1 ] <--- T0? + // ... + // [ argument word 0 ] <--- LVP + + // initialize fixed part of activation frame + + generate_fixed_frame(false); + + + // after this function, the layout of frame is as following + // + // [ monitor block top ] <--- sp ( the top monitor entry ) + // [ byte code pointer ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] <--- fp + // [ return address ] + // [ local var m-1 ] + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + + // make sure method is not native & not abstract +#ifdef ASSERT + __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset())); + { + Label L; + __ andi(T2, AT, JVM_ACC_NATIVE); + __ beq(T2, R0, L); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ andi(T2, AT, JVM_ACC_ABSTRACT); + __ beq(T2, R0, L); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation will + // check this flag. + +#ifndef OPT_THREAD + Register thread = T8; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + __ li(AT, (int)true); + __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + +#ifndef CORE + + // mdp : T8 + // tmp1: T4 + // tmp2: T2 + __ profile_parameters_type(T8, T4, T2); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + Label profile_method; + Label profile_method_continue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, + &profile_method, + &profile_method_continue); + if (ProfileInterpreter) { + __ bind(profile_method_continue); + } + } + + Label continue_after_compile; + __ bind(continue_after_compile); + +#endif // CORE + + bang_stack_shadow_pages(false); + + // reset the _do_not_unlock_if_synchronized flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + // + if (synchronized) { + // Allocate monitor and lock method + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { Label L; + __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); + __ beq(T2, R0, L); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // layout of frame after lock_method + // [ monitor entry ] <--- sp + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ local var m-1 ] + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + + // start execution +#ifdef ASSERT + { + Label L; + __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ beq(AT, SP, L); + __ stop("broken stack frame setup in interpreter in native"); + __ bind(L); + } +#endif + + // jvmti/jvmpi support + __ notify_method_entry(); + + __ dispatch_next(vtos); + + // invocation counter overflow + if (inc_counter) { + if (ProfileInterpreter) { + // We have decided to profile this method in the interpreter + __ bind(profile_method); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + __ get_method(Rmethod); + __ b(profile_method_continue); + } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(&continue_after_compile); + } + + return entry_point; +} + +// Entry points +// +// Here we generate the various kind of entries into the interpreter. +// The two main entry type are generic bytecode methods and native +// call method. These both come in synchronized and non-synchronized +// versions but the frame layout they create is very similar. The +// other method entry types are really just special purpose entries +// that are really entry and interpretation all in one. These are for +// trivial methods like accessor, empty, or special math methods. +// +// When control flow reaches any of the entry types for the interpreter +// the following holds -> +// +// Arguments: +// +// Rmethod: Method* +// V0: receiver +// +// +// Stack layout immediately at entry +// +// [ parameter n-1 ] <--- sp +// ... +// [ parameter 0 ] +// [ expression stack ] (caller's java expression stack) + +// Assuming that we don't go to one of the trivial specialized entries +// the stack will look like below when we are ready to execute the +// first bytecode (or call the native routine). The register usage +// will be as the template based interpreter expects (see +// interpreter_loongarch_64.hpp). +// +// local variables follow incoming parameters immediately; i.e. +// the return address is moved to the end of the locals). +// +// [ monitor entry ] <--- sp +// ... +// [ monitor entry ] +// [ monitor block top ] ( the top monitor entry ) +// [ byte code pointer ] (if native, bcp = 0) +// [ constant pool cache ] +// [ Method* ] +// [ locals offset ] +// [ sender's sp ] +// [ sender's fp ] +// [ return address ] <--- fp +// [ local var m-1 ] +// ... +// [ local var 0 ] +// [ argumnet word n-1 ] <--- ( sender's sp ) +// ... +// [ argument word 0 ] <--- S7 + +address AbstractInterpreterGenerator::generate_method_entry( + AbstractInterpreter::MethodKind kind) { + // determine code generation flags + bool synchronized = false; + address entry_point = NULL; + switch (kind) { + case Interpreter::zerolocals : + break; + case Interpreter::zerolocals_synchronized: + synchronized = true; + break; + case Interpreter::native : + entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); + break; + case Interpreter::native_synchronized : + entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true); + break; + case Interpreter::empty : + entry_point = ((InterpreterGenerator*)this)->generate_empty_entry(); + break; + case Interpreter::accessor : + entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); + break; + case Interpreter::abstract : + entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); + break; + + case Interpreter::java_lang_math_sin : // fall thru + case Interpreter::java_lang_math_cos : // fall thru + case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_log : // fall thru + case Interpreter::java_lang_math_log10 : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : break; + case Interpreter::java_lang_math_abs : // fall thru + case Interpreter::java_lang_math_sqrt : + entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break; + case Interpreter::java_lang_ref_reference_get: + entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; + case Interpreter::java_util_zip_CRC32_update: + entry_point = ((InterpreterGenerator*)this)->generate_CRC32_update_entry(); break; + case Interpreter::java_util_zip_CRC32_updateBytes: // fall thru + case Interpreter::java_util_zip_CRC32_updateByteBuffer: + entry_point = ((InterpreterGenerator*)this)->generate_CRC32_updateBytes_entry(kind); break; + default: + fatal(err_msg("unexpected method kind: %d", kind)); + break; + } + if (entry_point) return entry_point; + + return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized); +} + +// These should never be compiled since the interpreter will prefer +// the compiled version to the intrinsic version. +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + switch (method_kind(m)) { + case Interpreter::java_lang_math_sin : // fall thru + case Interpreter::java_lang_math_cos : // fall thru + case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_abs : // fall thru + case Interpreter::java_lang_math_log : // fall thru + case Interpreter::java_lang_math_log10 : // fall thru + case Interpreter::java_lang_math_sqrt : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : + return false; + default: + return true; + } +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + + const int entry_size = frame::interpreter_frame_monitor_size(); + + // total overhead size: entry_size + (saved fp thru expr stack bottom). + // be sure to change this if you add/subtract anything to/from the overhead area + const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; + + const int stub_code = 6; // see generate_call_stub + // return overhead_size + method->max_locals() + method->max_stack() + stub_code; + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return overhead_size + method_stack + stub_code; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + // If interpreter_frame!=NULL, set up the method, locals, and monitors. + // The frame interpreter_frame, if not NULL, is guaranteed to be the + // right size, as determined by a previous call to this method. + // It is also guaranteed to be walkable even though it is in a skeletal state + + // fixed size of an interpreter frame: + + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; + +#ifdef ASSERT + if (!EnableInvokeDynamic) { + // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? + // Probably, since deoptimization doesn't work yet. + assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); + } + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is fp+8 + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); + + //set last sp; + intptr_t* sp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(sp); + // All frames but the initial interpreter frame we fill in have a + // value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + // + if (extra_locals != 0 && + interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); + } + *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() { + // Entry point in previous activation (i.e., if the caller was + // interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + // Restore sp to interpreter_frame_last_sp even though we are going + // to empty the expression stack for the exception processing. + __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); + + // V0: exception + // V1: return address/pc that threw exception + __ restore_bcp(); // BCP points to call/send + __ restore_locals(); + + //add for compressedoops + __ reinit_heapbase(); + // Entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + // expression stack is undefined here + // V0: exception + // BCP: exception bcp + __ verify_oop(V0); + + // expression stack must be empty before entering the VM in case of an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + __ move(A1, V0); + __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); + // V0: exception handler entry point + // V1: preserved exception oop + // S0: bcp for exception handler + __ push(V1); // push exception which is now the only value on the stack + __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) + + // If the exception is not handled in the current frame the frame is removed and + // the exception is rethrown (i.e. exception continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bxi for the instruction which caused + // the exception and the expression stack is empty. Thus, for any VM calls + // at this point, GC will find a legal oop map (with empty expression stack). + + // In current activation + // V0: exception + // BCP: exception bcp + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + __ empty_expression_stack(); + // Set the popframe_processing bit in pending_popframe_condition indicating that we are + // currently handling popframe, so that call_VMs that may happen later do not trigger new + // popframe handling cycles. +#ifndef OPT_THREAD + Register thread = T2; + __ get_thread(T2); +#else + Register thread = TREG; +#endif + __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); + __ ori(T3, T3, JavaThread::popframe_processing_bit); + __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); + +#ifndef CORE + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + __ ld_d(A0, FP, frame::return_addr_offset * wordSize); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); + __ bne(V0, R0, caller_not_deoptimized); + + // Compute size of arguments for saving when returning to deoptimized caller + __ get_method(A1); + __ verify_oop(A1); + __ ld_d(A1, A1, in_bytes(Method::const_offset())); + __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); + __ shl(A1, Interpreter::logStackElementSize); + __ restore_locals(); + __ sub_d(A2, LVP, A1); + __ addi_d(A2, A2, wordSize); + // Save these arguments +#ifndef OPT_THREAD + __ get_thread(A0); +#else + __ move(A0, TREG); +#endif + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); + + __ remove_activation(vtos, T4, false, false, false); + + // Inform deoptimization that it is responsible for restoring these arguments +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit); + __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + // Continue in deoptimization handler + __ jr(T4); + + __ bind(caller_not_deoptimized); + } +#endif /* !CORE */ + + __ remove_activation(vtos, T3, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Clear the popframe condition flag + // Finish with popframe handling + // A previous I2C followed by a deoptimization might have moved the + // outgoing arguments further up the stack. PopFrame expects the + // mutations to those outgoing arguments to be preserved and other + // constraints basically require this frame to look exactly as + // though it had previously invoked an interpreted activation with + // no space between the top of the expression stack (current + // last_sp) and the top of stack. Rather than force deopt to + // maintain this kind of invariant all the time we call a small + // fixup routine to move the mutated arguments onto the top of our + // expression stack if necessary. + __ move(T8, SP); + __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // PC must point into interpreter here + Label L; + __ bind(L); + __ set_last_Java_frame(thread, noreg, FP, L); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); + __ reset_last_Java_frame(thread, true); + // Restore the last_sp and null it out + __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + + + __ li(AT, JavaThread::popframe_inactive); + __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + + // Finish with popframe handling + __ restore_bcp(); + __ restore_locals(); +#ifndef CORE + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } +#endif // !CORE + // Clear the popframe condition flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ li(AT, JavaThread::popframe_inactive); + __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + +#if INCLUDE_JVMTI + { + Label L_done; + + __ ld_bu(AT, BCP, 0); + __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic); + __ bne(AT, R0, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ get_method(T4); + __ ld_d(T8, LVP, 0); + __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP); + + __ beq(T8, R0, L_done); + + __ st_d(T8, SP, 0); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + + __ dispatch_next(vtos); + // end of PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + + // preserve exception over this code sequence + __ pop(T0); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset())); + // remove the activation (without doing throws on illegalMonitorExceptions) + __ remove_activation(vtos, T3, false, true, false); + // restore exception + __ get_vm_result(T0, thread); + __ verify_oop(T0); + + // In between activations - previous activation type unknown yet + // compute continuation point - the continuation point expects + // the following registers set up: + // + // T0: exception + // T1: return address/pc that threw exception + // SP: expression stack of caller + // FP: fp of caller + __ push2(T0, T3); // save exception and return address + __ move(A1, T3); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); + __ move(T4, V0); // save exception handler + __ pop2(V0, V1); // restore return address and exception + + // Note that an "issuing PC" is actually the next PC after the call + __ jr(T4); // jump to exception handler of caller +} + + +// +// JVMTI ForceEarlyReturn support +// +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + address entry = __ pc(); + __ restore_bcp(); + __ restore_locals(); + __ empty_expression_stack(); + __ empty_FPU_stack(); + __ load_earlyret_value(state); + +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); + const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset())); + // Clear the earlyret state + __ li(AT, JvmtiThreadState::earlyret_inactive); + __ st_w(AT, cond_addr); + __ membar(__ AnyAny);//no membar here for aarch64 + + + __ remove_activation(state, T0, + false, /* throw_monitor_exception */ + false, /* install_monitor_exception */ + true); /* notify_jvmdi */ + __ membar(__ AnyAny); + __ jr(T0); + + return entry; +} // end of ForceEarlyReturn support + + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + fep = __ pc(); __ push(ftos); __ b(L); + dep = __ pc(); __ push(dtos); __ b(L); + lep = __ pc(); __ push(ltos); __ b(L); + aep =__ pc(); __ push(atos); __ b(L); + bep = cep = sep = + iep = __ pc(); __ push(itos); + vep = __ pc(); + __ bind(L); + generate_and_dispatch(t); +} + + +//----------------------------------------------------------------------------- +// Generation of individual instructions + +// helpers for generate_and_dispatch + + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : TemplateInterpreterGenerator(code) { + generate_all(); // down here so it can be "virtual" +} + +//----------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + + // prepare expression stack + __ push(state); // save tosca + + // tos & tos2 + // trace_bytecode need actually 4 args, the last two is tos&tos2 + // this work fine for x86. but LA ABI calling convention will store A2-A3 + // to the stack position it think is the tos&tos2 + // when the expression stack have no more than 2 data, error occur. + __ ld_d(A2, SP, 0); + __ ld_d(A3, SP, 1 * wordSize); + + // pass arguments & call tracer + __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), RA, A2, A3); + __ move(RA, V0); // make sure return address is not destroyed by pop(state) + + // restore expression stack + __ pop(state); // restore tosca + + // return + __ jr(RA); + return entry; +} + +void TemplateInterpreterGenerator::count_bytecode() { + __ li(T8, (long)&BytecodeCounter::_counter_value); + __ ld_w(AT, T8, 0); + __ addi_d(AT, AT, 1); + __ st_w(AT, T8, 0); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { + __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); + __ ld_w(AT, T8, 0); + __ addi_d(AT, AT, 1); + __ st_w(AT, T8, 0); +} + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { + __ li(T8, (long)&BytecodePairHistogram::_index); + __ ld_w(T4, T8, 0); + __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes); + __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); + __ orr(T4, T4, T8); + __ li(T8, (long)&BytecodePairHistogram::_index); + __ st_w(T4, T8, 0); + __ slli_d(T4, T4, 2); + __ li(T8, (long)BytecodePairHistogram::_counters); + __ add_d(T8, T8, T4); + __ ld_w(AT, T8, 0); + __ addi_d(AT, AT, 1); + __ st_w(AT, T8, 0); +} + + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + address entry = Interpreter::trace_code(t->tos_in()); + assert(entry != NULL, "entry must have been generated"); + __ call(entry, relocInfo::none); + //add for compressedoops + __ reinit_heapbase(); +} + + +void TemplateInterpreterGenerator::stop_interpreter_at() { + Label L; + __ li(T8, long(&BytecodeCounter::_counter_value)); + __ ld_w(T8, T8, 0); + __ li(AT, StopInterpreterAt); + __ bne(T8, AT, L); + __ brk(5); + __ bind(L); +} +#endif // !PRODUCT +#endif // ! CC_INTERP diff --git a/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp new file mode 100644 index 00000000000..228217f0017 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp @@ -0,0 +1,4024 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.inline.hpp" +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "utilities/macros.hpp" + + +#ifndef CC_INTERP + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +// Platform-dependent initialization + +void TemplateTable::pd_initialize() { + // No LoongArch specific initialization +} + +// Address computation: local variables + +static inline Address iaddress(int n) { + return Address(LVP, Interpreter::local_offset_in_bytes(n)); +} + +static inline Address laddress(int n) { + return iaddress(n + 1); +} + +static inline Address faddress(int n) { + return iaddress(n); +} + +static inline Address daddress(int n) { + return laddress(n); +} + +static inline Address aaddress(int n) { + return iaddress(n); +} +static inline Address haddress(int n) { return iaddress(n + 0); } + + +static inline Address at_sp() { return Address(SP, 0); } +static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } +static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } + +// At top of Java expression stack which may be different than sp(). +// It isn't for category 1 objects. +static inline Address at_tos () { + Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); + return tos; +} + +static inline Address at_tos_p1() { + return Address(SP, Interpreter::expr_offset_in_bytes(1)); +} + +static inline Address at_tos_p2() { + return Address(SP, Interpreter::expr_offset_in_bytes(2)); +} + +static inline Address at_tos_p3() { + return Address(SP, Interpreter::expr_offset_in_bytes(3)); +} + +// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator +Address TemplateTable::at_bcp(int offset) { + assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); + return Address(BCP, offset); +} + +// Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL + +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address obj, + Register val, + BarrierSet::Name barrier, + bool precise) { + assert(val == noreg || val == FSR, "parameter is just for looks"); + switch (barrier) { +#if INCLUDE_ALL_GCS + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + // flatten object address if needed + if (obj.index() == noreg && obj.disp() == 0) { + if (obj.base() != T3) { + __ move(T3, obj.base()); + } + } else { + __ lea(T3, obj); + } + __ g1_write_barrier_pre(T3 /* obj */, + T1 /* pre_val */, + TREG /* thread */, + T4 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + if (val == noreg) { + __ store_heap_oop_null(Address(T3, 0)); + } else { + // G1 barrier needs uncompressed oop for region cross check. + Register new_val = val; + if (UseCompressedOops) { + new_val = T1; + __ move(new_val, val); + } + __ store_heap_oop(Address(T3, 0), val); + __ g1_write_barrier_post(T3 /* store_adr */, + new_val /* new_val */, + TREG /* thread */, + T4 /* tmp */, + T1 /* tmp2 */); + } + } + break; +#endif // INCLUDE_ALL_GCS + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (val == noreg) { + __ store_heap_oop_null(obj); + } else { + __ store_heap_oop(obj, val); + // flatten object address if needed + if (!precise || (obj.index() == noreg && obj.disp() == 0)) { + __ store_check(obj.base()); + } else { + //TODO: LA + __ lea(T4, obj); + __ store_check(T4); + } + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + if (val == noreg) { + __ store_heap_oop_null(obj); + } else { + __ store_heap_oop(obj, val); + } + break; + default : + ShouldNotReachHere(); + + } +} + +// bytecode folding +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, + Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, + int byte_no) { + if (!RewriteBytecodes) return; + Label L_patch_done; + + switch (bc) { + case Bytecodes::_fast_aputfield: + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_dputfield: + case Bytecodes::_fast_fputfield: + case Bytecodes::_fast_iputfield: + case Bytecodes::_fast_lputfield: + case Bytecodes::_fast_sputfield: + { + // We skip bytecode quickening for putfield instructions when + // the put_code written to the constant pool cache is zero. + // This is required so that every execution of this instruction + // calls out to InterpreterRuntime::resolve_get_put to do + // additional, required work. + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + assert(load_bc_into_bc_reg, "we use bc_reg as temp"); + __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); + __ addi_d(bc_reg, R0, bc); + __ beq(tmp_reg, R0, L_patch_done); + } + break; + default: + assert(byte_no == -1, "sanity"); + // the pair bytecodes have already done the load. + if (load_bc_into_bc_reg) { + __ li(bc_reg, bc); + } + } + + if (JvmtiExport::can_post_breakpoint()) { + Label L_fast_patch; + // if a breakpoint is present we can't rewrite the stream directly + __ ld_bu(tmp_reg, at_bcp(0)); + __ li(AT, Bytecodes::_breakpoint); + __ bne(tmp_reg, AT, L_fast_patch); + + __ get_method(tmp_reg); + // Let breakpoint table handling rewrite to quicker bytecode + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); + + __ b(L_patch_done); + __ bind(L_fast_patch); + } + +#ifdef ASSERT + Label L_okay; + __ ld_bu(tmp_reg, at_bcp(0)); + __ li(AT, (int)Bytecodes::java_code(bc)); + __ beq(tmp_reg, AT, L_okay); + __ beq(tmp_reg, bc_reg, L_patch_done); + __ stop("patching the wrong bytecode"); + __ bind(L_okay); +#endif + + // patch bytecode + __ st_b(bc_reg, at_bcp(0)); + __ bind(L_patch_done); +} + + +// Individual instructions + +void TemplateTable::nop() { + transition(vtos, vtos); + // nothing to do +} + +void TemplateTable::shouldnotreachhere() { + transition(vtos, vtos); + __ stop("shouldnotreachhere bytecode"); +} + +void TemplateTable::aconst_null() { + transition(vtos, atos); + __ move(FSR, R0); +} + +void TemplateTable::iconst(int value) { + transition(vtos, itos); + if (value == 0) { + __ move(FSR, R0); + } else { + __ li(FSR, value); + } +} + +void TemplateTable::lconst(int value) { + transition(vtos, ltos); + if (value == 0) { + __ move(FSR, R0); + } else { + __ li(FSR, value); + } +} + +void TemplateTable::fconst(int value) { + transition(vtos, ftos); + switch( value ) { + case 0: __ movgr2fr_w(FSF, R0); return; + case 1: __ addi_d(AT, R0, 1); break; + case 2: __ addi_d(AT, R0, 2); break; + default: ShouldNotReachHere(); + } + __ movgr2fr_w(FSF, AT); + __ ffint_s_w(FSF, FSF); +} + +void TemplateTable::dconst(int value) { + transition(vtos, dtos); + switch( value ) { + case 0: __ movgr2fr_d(FSF, R0); + return; + case 1: __ addi_d(AT, R0, 1); + __ movgr2fr_d(FSF, AT); + __ ffint_d_w(FSF, FSF); + break; + default: ShouldNotReachHere(); + } +} + +void TemplateTable::bipush() { + transition(vtos, itos); + __ ld_b(FSR, at_bcp(1)); +} + +void TemplateTable::sipush() { + transition(vtos, itos); + __ ld_b(FSR, BCP, 1); + __ ld_bu(AT, BCP, 2); + __ slli_d(FSR, FSR, 8); + __ orr(FSR, FSR, AT); +} + +// T1 : tags +// T2 : index +// T3 : cpool +// T8 : tag +void TemplateTable::ldc(bool wide) { + transition(vtos, vtos); + Label call_ldc, notFloat, notClass, Done; + // get index in cpool + if (wide) { + __ get_unsigned_2_byte_index_at_bcp(T2, 1); + } else { + __ ld_bu(T2, at_bcp(1)); + } + + __ get_cpool_and_tags(T3, T1); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + __ add_d(AT, T1, T2); + __ ld_b(T1, AT, tags_offset); + if(os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + } + //now T1 is the tag + + // unresolved class - get the resolved class + __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass); + __ beq(AT, R0, call_ldc); + + // unresolved class in error (resolution failed) - call into runtime + // so that the same error from first resolution attempt is thrown. + __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); + __ beq(AT, R0, call_ldc); + + // resolved class - need to call vm to get java mirror of the class + __ addi_d(AT, T1, - JVM_CONSTANT_Class); + __ slli_d(T2, T2, Address::times_8); + __ bne(AT, R0, notClass); + + __ bind(call_ldc); + __ li(A1, wide); + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); + //__ push(atos); + __ addi_d(SP, SP, - Interpreter::stackElementSize); + __ st_d(FSR, SP, 0); + __ b(Done); + + __ bind(notClass); + __ addi_d(AT, T1, -JVM_CONSTANT_Float); + __ bne(AT, R0, notFloat); + // ftos + __ add_d(AT, T3, T2); + __ fld_s(FSF, AT, base_offset); + //__ push_f(); + __ addi_d(SP, SP, - Interpreter::stackElementSize); + __ fst_s(FSF, SP, 0); + __ b(Done); + + __ bind(notFloat); +#ifdef ASSERT + { + Label L; + __ addi_d(AT, T1, -JVM_CONSTANT_Integer); + __ beq(AT, R0, L); + __ stop("unexpected tag type in ldc"); + __ bind(L); + } +#endif + // itos JVM_CONSTANT_Integer only + __ add_d(T0, T3, T2); + __ ld_w(FSR, T0, base_offset); + __ push(itos); + __ bind(Done); +} + +// Fast path for caching oop constants. +void TemplateTable::fast_aldc(bool wide) { + transition(vtos, atos); + + Register result = FSR; + Register tmp = SSR; + int index_size = wide ? sizeof(u2) : sizeof(u1); + + Label resolved; + + // We are resolved if the resolved reference cache entry contains a + // non-null object (String, MethodType, etc.) + assert_different_registers(result, tmp); + __ get_cache_index_at_bcp(tmp, 1, index_size); + __ load_resolved_reference_at_index(result, tmp); + __ bne(result, R0, resolved); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + // first time invocation - must resolve first + int i = (int)bytecode(); + __ li(tmp, i); + __ call_VM(result, entry, tmp); + + __ bind(resolved); + + if (VerifyOops) { + __ verify_oop(result); + } +} + + +// used register: T2, T3, T1 +// T2 : index +// T3 : cpool +// T1 : tag +void TemplateTable::ldc2_w() { + transition(vtos, vtos); + Label Long, Done; + + // get index in cpool + __ get_unsigned_2_byte_index_at_bcp(T2, 1); + + __ get_cpool_and_tags(T3, T1); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type in T1 + __ add_d(AT, T1, T2); + __ ld_b(T1, AT, tags_offset); + + __ addi_d(AT, T1, - JVM_CONSTANT_Double); + __ slli_d(T2, T2, Address::times_8); + __ bne(AT, R0, Long); + + // dtos + __ add_d(AT, T3, T2); + __ fld_d(FSF, AT, base_offset); + __ push(dtos); + __ b(Done); + + // ltos + __ bind(Long); + __ add_d(AT, T3, T2); + __ ld_d(FSR, AT, base_offset); + __ push(ltos); + + __ bind(Done); +} + +// we compute the actual local variable address here +void TemplateTable::locals_index(Register reg, int offset) { + __ ld_bu(reg, at_bcp(offset)); + __ slli_d(reg, reg, Address::times_8); + __ sub_d(reg, LVP, reg); +} + +// this method will do bytecode folding of the two form: +// iload iload iload caload +// used register : T2, T3 +// T2 : bytecode +// T3 : folded code +void TemplateTable::iload() { + transition(vtos, itos); + if (RewriteFrequentPairs) { + Label rewrite, done; + // get the next bytecode in T2 + __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ li(AT, Bytecodes::_iload); + __ beq(AT, T2, done); + + __ li(T3, Bytecodes::_fast_iload2); + __ li(AT, Bytecodes::_fast_iload); + __ beq(AT, T2, rewrite); + + // if _caload, rewrite to fast_icaload + __ li(T3, Bytecodes::_fast_icaload); + __ li(AT, Bytecodes::_caload); + __ beq(AT, T2, rewrite); + + // rewrite so iload doesn't check again. + __ li(T3, Bytecodes::_fast_iload); + + // rewrite + // T3 : fast bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, T3, T2, false); + __ bind(done); + } + + // Get the local value into tos + locals_index(T2); + __ ld_w(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::fast_iload2() { + transition(vtos, itos); + locals_index(T2); + __ ld_w(FSR, T2, 0); + __ push(itos); + locals_index(T2, 3); + __ ld_w(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::fast_iload() { + transition(vtos, itos); + locals_index(T2); + __ ld_w(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::lload() { + transition(vtos, ltos); + locals_index(T2); + __ ld_d(FSR, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::fload() { + transition(vtos, ftos); + locals_index(T2); + __ fld_s(FSF, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::dload() { + transition(vtos, dtos); + locals_index(T2); + __ fld_d(FSF, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::aload() { + transition(vtos, atos); + locals_index(T2); + __ ld_d(FSR, T2, 0); +} + +void TemplateTable::locals_index_wide(Register reg) { + __ get_unsigned_2_byte_index_at_bcp(reg, 2); + __ slli_d(reg, reg, Address::times_8); + __ sub_d(reg, LVP, reg); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_iload() { + transition(vtos, itos); + locals_index_wide(T2); + __ ld_d(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_lload() { + transition(vtos, ltos); + locals_index_wide(T2); + __ ld_d(FSR, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_fload() { + transition(vtos, ftos); + locals_index_wide(T2); + __ fld_s(FSF, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_dload() { + transition(vtos, dtos); + locals_index_wide(T2); + __ fld_d(FSF, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_aload() { + transition(vtos, atos); + locals_index_wide(T2); + __ ld_d(FSR, T2, 0); +} + +// we use A2 as the regiser for index, BE CAREFUL! +// we dont use our tge 29 now, for later optimization +void TemplateTable::index_check(Register array, Register index) { + // Pop ptr into array + __ pop_ptr(array); + index_check_without_pop(array, index); +} + +void TemplateTable::index_check_without_pop(Register array, Register index) { + // destroys A2 + // check array + __ null_check(array, arrayOopDesc::length_offset_in_bytes()); + + // sign extend since tos (index) might contain garbage in upper bits + __ slli_w(index, index, 0); + + // check index + Label ok; + __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes()); + __ bltu(index, AT, ok); + + //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 + if (A2 != index) __ move(A2, index); + __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ bind(ok); +} + +void TemplateTable::iaload() { + transition(itos, itos); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, 1); + __ ld_w(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); +} + +void TemplateTable::laload() { + transition(itos, ltos); + index_check(SSR, FSR); + __ alsl_d(AT, FSR, SSR, Address::times_8 - 1); + __ ld_d(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG)); +} + +void TemplateTable::faload() { + transition(itos, ftos); + index_check(SSR, FSR); + __ shl(FSR, 2); + __ add_d(FSR, SSR, FSR); + __ fld_s(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); +} + +void TemplateTable::daload() { + transition(itos, dtos); + index_check(SSR, FSR); + __ alsl_d(AT, FSR, SSR, 2); + __ fld_d(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); +} + +void TemplateTable::aaload() { + transition(itos, atos); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1); + //add for compressedoops + __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); +} + +void TemplateTable::baload() { + transition(itos, itos); + index_check(SSR, FSR); + __ add_d(FSR, SSR, FSR); + __ ld_b(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); +} + +void TemplateTable::caload() { + transition(itos, itos); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); + __ ld_hu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); +} + +// iload followed by caload frequent pair +// used register : T2 +// T2 : index +void TemplateTable::fast_icaload() { + transition(vtos, itos); + // load index out of locals + locals_index(T2); + __ ld_w(FSR, T2, 0); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, 0); + __ ld_hu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); +} + +void TemplateTable::saload() { + transition(itos, itos); + index_check(SSR, FSR); + __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); + __ ld_h(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); +} + +void TemplateTable::iload(int n) { + transition(vtos, itos); + __ ld_w(FSR, iaddress(n)); +} + +void TemplateTable::lload(int n) { + transition(vtos, ltos); + __ ld_d(FSR, laddress(n)); +} + +void TemplateTable::fload(int n) { + transition(vtos, ftos); + __ fld_s(FSF, faddress(n)); +} + +void TemplateTable::dload(int n) { + transition(vtos, dtos); + __ fld_d(FSF, laddress(n)); +} + +void TemplateTable::aload(int n) { + transition(vtos, atos); + __ ld_d(FSR, aaddress(n)); +} + +// used register : T2, T3 +// T2 : bytecode +// T3 : folded code +void TemplateTable::aload_0() { + transition(vtos, atos); + // According to bytecode histograms, the pairs: + // + // _aload_0, _fast_igetfield + // _aload_0, _fast_agetfield + // _aload_0, _fast_fgetfield + // + // occur frequently. If RewriteFrequentPairs is set, the (slow) + // _aload_0 bytecode checks if the next bytecode is either + // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then + // rewrites the current bytecode into a pair bytecode; otherwise it + // rewrites the current bytecode into _fast_aload_0 that doesn't do + // the pair check anymore. + // + // Note: If the next bytecode is _getfield, the rewrite must be + // delayed, otherwise we may miss an opportunity for a pair. + // + // Also rewrite frequent pairs + // aload_0, aload_1 + // aload_0, iload_1 + // These bytecodes with a small amount of code are most profitable + // to rewrite + if (RewriteFrequentPairs) { + Label rewrite, done; + // get the next bytecode in T2 + __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + + // do actual aload_0 + aload(0); + + // if _getfield then wait with rewrite + __ li(AT, Bytecodes::_getfield); + __ beq(AT, T2, done); + + // if _igetfield then reqrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ li(T3, Bytecodes::_fast_iaccess_0); + __ li(AT, Bytecodes::_fast_igetfield); + __ beq(AT, T2, rewrite); + + // if _agetfield then reqrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ li(T3, Bytecodes::_fast_aaccess_0); + __ li(AT, Bytecodes::_fast_agetfield); + __ beq(AT, T2, rewrite); + + // if _fgetfield then reqrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ li(T3, Bytecodes::_fast_faccess_0); + __ li(AT, Bytecodes::_fast_fgetfield); + __ beq(AT, T2, rewrite); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ li(T3, Bytecodes::_fast_aload_0); + + // rewrite + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, T3, T2, false); + + __ bind(done); + } else { + aload(0); + } +} + +void TemplateTable::istore() { + transition(itos, vtos); + locals_index(T2); + __ st_w(FSR, T2, 0); +} + +void TemplateTable::lstore() { + transition(ltos, vtos); + locals_index(T2); + __ st_d(FSR, T2, -wordSize); +} + +void TemplateTable::fstore() { + transition(ftos, vtos); + locals_index(T2); + __ fst_s(FSF, T2, 0); +} + +void TemplateTable::dstore() { + transition(dtos, vtos); + locals_index(T2); + __ fst_d(FSF, T2, -wordSize); +} + +void TemplateTable::astore() { + transition(vtos, vtos); + __ pop_ptr(FSR); + locals_index(T2); + __ st_d(FSR, T2, 0); +} + +void TemplateTable::wide_istore() { + transition(vtos, vtos); + __ pop_i(FSR); + locals_index_wide(T2); + __ st_d(FSR, T2, 0); +} + +void TemplateTable::wide_lstore() { + transition(vtos, vtos); + __ pop_l(FSR); + locals_index_wide(T2); + __ st_d(FSR, T2, -wordSize); +} + +void TemplateTable::wide_fstore() { + wide_istore(); +} + +void TemplateTable::wide_dstore() { + wide_lstore(); +} + +void TemplateTable::wide_astore() { + transition(vtos, vtos); + __ pop_ptr(FSR); + locals_index_wide(T2); + __ st_d(FSR, T2, 0); +} + +// used register : T2 +void TemplateTable::iastore() { + transition(itos, vtos); + __ pop_i(SSR); // T2: array SSR: index + index_check(T2, SSR); // prefer index in SSR + __ slli_d(SSR, SSR, Address::times_4); + __ add_d(T2, T2, SSR); + __ st_w(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT)); +} + + + +// used register T2, T3 +void TemplateTable::lastore() { + transition(ltos, vtos); + __ pop_i (T2); + index_check(T3, T2); + __ slli_d(T2, T2, Address::times_8); + __ add_d(T3, T3, T2); + __ st_d(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG)); +} + +// used register T2 +void TemplateTable::fastore() { + transition(ftos, vtos); + __ pop_i(SSR); + index_check(T2, SSR); + __ slli_d(SSR, SSR, Address::times_4); + __ add_d(T2, T2, SSR); + __ fst_s(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); +} + +// used register T2, T3 +void TemplateTable::dastore() { + transition(dtos, vtos); + __ pop_i (T2); + index_check(T3, T2); + __ slli_d(T2, T2, Address::times_8); + __ add_d(T3, T3, T2); + __ fst_d(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); +} + +// used register : T2, T3, T8 +// T2 : array +// T3 : subklass +// T8 : supklass +void TemplateTable::aastore() { + Label is_null, ok_is_subtype, done; + transition(vtos, vtos); + // stack: ..., array, index, value + __ ld_d(FSR, at_tos()); // Value + __ ld_w(SSR, at_tos_p1()); // Index + __ ld_d(T2, at_tos_p2()); // Array + + // index_check(T2, SSR); + index_check_without_pop(T2, SSR); + // do array store check - check for NULL value first + __ beq(FSR, R0, is_null); + + // Move subklass into T3 + //add for compressedoops + __ load_klass(T3, FSR); + // Move superklass into T8 + //add for compressedoops + __ load_klass(T8, T2); + __ ld_d(T8, Address(T8, ObjArrayKlass::element_klass_offset())); + // Compress array+index*4+12 into a single register. T2 + __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); + __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + + // Generate subtype check. + // Superklass in T8. Subklass in T3. + __ gen_subtype_check(T8, T3, ok_is_subtype); + // Come here on failure + // object is at FSR + __ jmp(Interpreter::_throw_ArrayStoreException_entry); + // Come here on success + __ bind(ok_is_subtype); + do_oop_store(_masm, Address(T2, 0), FSR, _bs->kind(), true); + __ b(done); + + // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] + __ bind(is_null); + __ profile_null_seen(T4); + __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1); + do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, _bs->kind(), true); + + __ bind(done); + __ addi_d(SP, SP, 3 * Interpreter::stackElementSize); +} + +void TemplateTable::bastore() { + transition(itos, vtos); + __ pop_i(SSR); + index_check(T2, SSR); + + // Need to check whether array is boolean or byte + // since both types share the bastore bytecode. + __ load_klass(T4, T2); + __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset())); + + int diffbit = Klass::layout_helper_boolean_diffbit(); + __ li(AT, diffbit); + + Label L_skip; + __ andr(AT, T4, AT); + __ beq(AT, R0, L_skip); + __ andi(FSR, FSR, 0x1); + __ bind(L_skip); + + __ add_d(SSR, T2, SSR); + __ st_b(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); +} + +void TemplateTable::castore() { + transition(itos, vtos); + __ pop_i(SSR); + index_check(T2, SSR); + __ alsl_d(SSR, SSR, T2, Address::times_2 - 1); + __ st_h(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); +} + +void TemplateTable::sastore() { + castore(); +} + +void TemplateTable::istore(int n) { + transition(itos, vtos); + __ st_w(FSR, iaddress(n)); +} + +void TemplateTable::lstore(int n) { + transition(ltos, vtos); + __ st_d(FSR, laddress(n)); +} + +void TemplateTable::fstore(int n) { + transition(ftos, vtos); + __ fst_s(FSF, faddress(n)); +} + +void TemplateTable::dstore(int n) { + transition(dtos, vtos); + __ fst_d(FSF, laddress(n)); +} + +void TemplateTable::astore(int n) { + transition(vtos, vtos); + __ pop_ptr(FSR); + __ st_d(FSR, aaddress(n)); +} + +void TemplateTable::pop() { + transition(vtos, vtos); + __ addi_d(SP, SP, Interpreter::stackElementSize); +} + +void TemplateTable::pop2() { + transition(vtos, vtos); + __ addi_d(SP, SP, 2 * Interpreter::stackElementSize); +} + +void TemplateTable::dup() { + transition(vtos, vtos); + // stack: ..., a + __ load_ptr(0, FSR); + __ push_ptr(FSR); + // stack: ..., a, a +} + +// blows FSR +void TemplateTable::dup_x1() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr(0, FSR); // load b + __ load_ptr(1, A5); // load a + __ store_ptr(1, FSR); // store b + __ store_ptr(0, A5); // store a + __ push_ptr(FSR); // push b + // stack: ..., b, a, b +} + +// blows FSR +void TemplateTable::dup_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr(0, FSR); // load c + __ load_ptr(2, A5); // load a + __ store_ptr(2, FSR); // store c in a + __ push_ptr(FSR); // push c + // stack: ..., c, b, c, c + __ load_ptr(2, FSR); // load b + __ store_ptr(2, A5); // store a in b + // stack: ..., c, a, c, c + __ store_ptr(1, FSR); // store b in c + // stack: ..., c, a, b, c +} + +// blows FSR +void TemplateTable::dup2() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr(1, FSR); // load a + __ push_ptr(FSR); // push a + __ load_ptr(1, FSR); // load b + __ push_ptr(FSR); // push b + // stack: ..., a, b, a, b +} + +// blows FSR +void TemplateTable::dup2_x1() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr(0, T2); // load c + __ load_ptr(1, FSR); // load b + __ push_ptr(FSR); // push b + __ push_ptr(T2); // push c + // stack: ..., a, b, c, b, c + __ store_ptr(3, T2); // store c in b + // stack: ..., a, c, c, b, c + __ load_ptr(4, T2); // load a + __ store_ptr(2, T2); // store a in 2nd c + // stack: ..., a, c, a, b, c + __ store_ptr(4, FSR); // store b in a + // stack: ..., b, c, a, b, c + + // stack: ..., b, c, a, b, c +} + +// blows FSR, SSR +void TemplateTable::dup2_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c, d + // stack: ..., a, b, c, d + __ load_ptr(0, T2); // load d + __ load_ptr(1, FSR); // load c + __ push_ptr(FSR); // push c + __ push_ptr(T2); // push d + // stack: ..., a, b, c, d, c, d + __ load_ptr(4, FSR); // load b + __ store_ptr(2, FSR); // store b in d + __ store_ptr(4, T2); // store d in b + // stack: ..., a, d, c, b, c, d + __ load_ptr(5, T2); // load a + __ load_ptr(3, FSR); // load c + __ store_ptr(3, T2); // store a in c + __ store_ptr(5, FSR); // store c in a + // stack: ..., c, d, a, b, c, d + + // stack: ..., c, d, a, b, c, d +} + +// blows FSR +void TemplateTable::swap() { + transition(vtos, vtos); + // stack: ..., a, b + + __ load_ptr(1, A5); // load a + __ load_ptr(0, FSR); // load b + __ store_ptr(0, A5); // store a in b + __ store_ptr(1, FSR); // store b in a + + // stack: ..., b, a +} + +void TemplateTable::iop2(Operation op) { + transition(itos, itos); + + __ pop_i(SSR); + switch (op) { + case add : __ add_w(FSR, SSR, FSR); break; + case sub : __ sub_w(FSR, SSR, FSR); break; + case mul : __ mul_w(FSR, SSR, FSR); break; + case _and : __ andr(FSR, SSR, FSR); break; + case _or : __ orr(FSR, SSR, FSR); break; + case _xor : __ xorr(FSR, SSR, FSR); break; + case shl : __ sll_w(FSR, SSR, FSR); break; + case shr : __ sra_w(FSR, SSR, FSR); break; + case ushr : __ srl_w(FSR, SSR, FSR); break; + default : ShouldNotReachHere(); + } +} + +// the result stored in FSR, SSR, +// used registers : T2, T3 +void TemplateTable::lop2(Operation op) { + transition(ltos, ltos); + __ pop_l(T2); + + switch (op) { + case add : __ add_d(FSR, T2, FSR); break; + case sub : __ sub_d(FSR, T2, FSR); break; + case _and: __ andr(FSR, T2, FSR); break; + case _or : __ orr(FSR, T2, FSR); break; + case _xor: __ xorr(FSR, T2, FSR); break; + default : ShouldNotReachHere(); + } +} + +// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, +// the result is 0x80000000 +// the godson2 cpu do the same, so we need not handle this specially like x86 +void TemplateTable::idiv() { + transition(itos, itos); + Label not_zero; + + __ bne(FSR, R0, not_zero); + __ jmp(Interpreter::_throw_ArithmeticException_entry); + __ bind(not_zero); + + __ pop_i(SSR); + __ div_w(FSR, SSR, FSR); +} + +void TemplateTable::irem() { + transition(itos, itos); + Label not_zero; + __ pop_i(SSR); + + __ bne(FSR, R0, not_zero); + //__ brk(7); + __ jmp(Interpreter::_throw_ArithmeticException_entry); + + __ bind(not_zero); + __ mod_w(FSR, SSR, FSR); +} + +void TemplateTable::lmul() { + transition(ltos, ltos); + __ pop_l(T2); + __ mul_d(FSR, T2, FSR); +} + +// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry +void TemplateTable::ldiv() { + transition(ltos, ltos); + Label normal; + + __ bne(FSR, R0, normal); + + //__ brk(7); //generate FPE + __ jmp(Interpreter::_throw_ArithmeticException_entry); + + __ bind(normal); + __ pop_l(A2); + __ div_d(FSR, A2, FSR); +} + +// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry +void TemplateTable::lrem() { + transition(ltos, ltos); + Label normal; + + __ bne(FSR, R0, normal); + + __ jmp(Interpreter::_throw_ArithmeticException_entry); + + __ bind(normal); + __ pop_l (A2); + + __ mod_d(FSR, A2, FSR); +} + +// result in FSR +// used registers : T0 +void TemplateTable::lshl() { + transition(itos, ltos); + __ pop_l(T0); + __ sll_d(FSR, T0, FSR); +} + +// used registers : T0 +void TemplateTable::lshr() { + transition(itos, ltos); + __ pop_l(T0); + __ sra_d(FSR, T0, FSR); +} + +// used registers : T0 +void TemplateTable::lushr() { + transition(itos, ltos); + __ pop_l(T0); + __ srl_d(FSR, T0, FSR); +} + +// result in FSF +void TemplateTable::fop2(Operation op) { + transition(ftos, ftos); + switch (op) { + case add: + __ fld_s(fscratch, at_sp()); + __ fadd_s(FSF, fscratch, FSF); + break; + case sub: + __ fld_s(fscratch, at_sp()); + __ fsub_s(FSF, fscratch, FSF); + break; + case mul: + __ fld_s(fscratch, at_sp()); + __ fmul_s(FSF, fscratch, FSF); + break; + case div: + __ fld_s(fscratch, at_sp()); + __ fdiv_s(FSF, fscratch, FSF); + break; + case rem: + __ fmov_s(FA1, FSF); + __ fld_s(FA0, at_sp()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); + break; + default : ShouldNotReachHere(); + } + + __ addi_d(SP, SP, 1 * wordSize); +} + +// result in SSF||FSF +// i dont handle the strict flags +void TemplateTable::dop2(Operation op) { + transition(dtos, dtos); + switch (op) { + case add: + __ fld_d(fscratch, at_sp()); + __ fadd_d(FSF, fscratch, FSF); + break; + case sub: + __ fld_d(fscratch, at_sp()); + __ fsub_d(FSF, fscratch, FSF); + break; + case mul: + __ fld_d(fscratch, at_sp()); + __ fmul_d(FSF, fscratch, FSF); + break; + case div: + __ fld_d(fscratch, at_sp()); + __ fdiv_d(FSF, fscratch, FSF); + break; + case rem: + __ fmov_d(FA1, FSF); + __ fld_d(FA0, at_sp()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); + break; + default : ShouldNotReachHere(); + } + + __ addi_d(SP, SP, 2 * wordSize); +} + +void TemplateTable::ineg() { + transition(itos, itos); + __ sub_w(FSR, R0, FSR); +} + +void TemplateTable::lneg() { + transition(ltos, ltos); + __ sub_d(FSR, R0, FSR); +} + +void TemplateTable::fneg() { + transition(ftos, ftos); + __ fneg_s(FSF, FSF); +} + +void TemplateTable::dneg() { + transition(dtos, dtos); + __ fneg_d(FSF, FSF); +} + +// used registers : T2 +void TemplateTable::iinc() { + transition(vtos, vtos); + locals_index(T2); + __ ld_w(FSR, T2, 0); + __ ld_b(AT, at_bcp(2)); // get constant + __ add_d(FSR, FSR, AT); + __ st_w(FSR, T2, 0); +} + +// used register : T2 +void TemplateTable::wide_iinc() { + transition(vtos, vtos); + locals_index_wide(T2); + __ get_2_byte_integer_at_bcp(FSR, AT, 4); + __ hswap(FSR); + __ ld_w(AT, T2, 0); + __ add_d(FSR, AT, FSR); + __ st_w(FSR, T2, 0); +} + +void TemplateTable::convert() { + // Checking +#ifdef ASSERT + { + TosState tos_in = ilgl; + TosState tos_out = ilgl; + switch (bytecode()) { + case Bytecodes::_i2l: // fall through + case Bytecodes::_i2f: // fall through + case Bytecodes::_i2d: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_in = itos; break; + case Bytecodes::_l2i: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_l2d: tos_in = ltos; break; + case Bytecodes::_f2i: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_f2d: tos_in = ftos; break; + case Bytecodes::_d2i: // fall through + case Bytecodes::_d2l: // fall through + case Bytecodes::_d2f: tos_in = dtos; break; + default : ShouldNotReachHere(); + } + switch (bytecode()) { + case Bytecodes::_l2i: // fall through + case Bytecodes::_f2i: // fall through + case Bytecodes::_d2i: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_out = itos; break; + case Bytecodes::_i2l: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_d2l: tos_out = ltos; break; + case Bytecodes::_i2f: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_d2f: tos_out = ftos; break; + case Bytecodes::_i2d: // fall through + case Bytecodes::_l2d: // fall through + case Bytecodes::_f2d: tos_out = dtos; break; + default : ShouldNotReachHere(); + } + transition(tos_in, tos_out); + } +#endif // ASSERT + // Conversion + switch (bytecode()) { + case Bytecodes::_i2l: + __ slli_w(FSR, FSR, 0); + break; + case Bytecodes::_i2f: + __ movgr2fr_w(FSF, FSR); + __ ffint_s_w(FSF, FSF); + break; + case Bytecodes::_i2d: + __ movgr2fr_w(FSF, FSR); + __ ffint_d_w(FSF, FSF); + break; + case Bytecodes::_i2b: + __ ext_w_b(FSR, FSR); + break; + case Bytecodes::_i2c: + __ bstrpick_d(FSR, FSR, 15, 0); // truncate upper 56 bits + break; + case Bytecodes::_i2s: + __ ext_w_h(FSR, FSR); + break; + case Bytecodes::_l2i: + __ slli_w(FSR, FSR, 0); + break; + case Bytecodes::_l2f: + __ movgr2fr_d(FSF, FSR); + __ ffint_s_l(FSF, FSF); + break; + case Bytecodes::_l2d: + __ movgr2fr_d(FSF, FSR); + __ ffint_d_l(FSF, FSF); + break; + case Bytecodes::_f2i: + __ ftintrz_w_s(fscratch, FSF); + __ movfr2gr_s(FSR, fscratch); + break; + case Bytecodes::_f2l: + __ ftintrz_l_s(fscratch, FSF); + __ movfr2gr_d(FSR, fscratch); + break; + case Bytecodes::_f2d: + __ fcvt_d_s(FSF, FSF); + break; + case Bytecodes::_d2i: + __ ftintrz_w_d(fscratch, FSF); + __ movfr2gr_s(FSR, fscratch); + break; + case Bytecodes::_d2l: + __ ftintrz_l_d(fscratch, FSF); + __ movfr2gr_d(FSR, fscratch); + break; + case Bytecodes::_d2f: + __ fcvt_s_d(FSF, FSF); + break; + default : + ShouldNotReachHere(); + } +} + +void TemplateTable::lcmp() { + transition(ltos, itos); + + __ pop(T0); + __ pop(R0); + + __ slt(AT, T0, FSR); + __ slt(FSR, FSR, T0); + __ sub_d(FSR, FSR, AT); +} + +void TemplateTable::float_cmp(bool is_float, int unordered_result) { + if (is_float) { + __ fld_s(fscratch, at_sp()); + __ addi_d(SP, SP, 1 * wordSize); + + if (unordered_result < 0) { + __ fcmp_clt_s(FCC0, FSF, fscratch); + __ fcmp_cult_s(FCC1, fscratch, FSF); + } else { + __ fcmp_cult_s(FCC0, FSF, fscratch); + __ fcmp_clt_s(FCC1, fscratch, FSF); + } + } else { + __ fld_d(fscratch, at_sp()); + __ addi_d(SP, SP, 2 * wordSize); + + if (unordered_result < 0) { + __ fcmp_clt_d(FCC0, FSF, fscratch); + __ fcmp_cult_d(FCC1, fscratch, FSF); + } else { + __ fcmp_cult_d(FCC0, FSF, fscratch); + __ fcmp_clt_d(FCC1, fscratch, FSF); + } + } + + __ movcf2gr(FSR, FCC0); + __ movcf2gr(AT, FCC1); + __ sub_d(FSR, FSR, AT); +} + + +// used registers : T3, A7, Rnext +// FSR : return bci, this is defined by the vm specification +// T2 : MDO taken count +// T3 : method +// A7 : offset +// Rnext : next bytecode, this is required by dispatch_base +void TemplateTable::branch(bool is_jsr, bool is_wide) { + __ get_method(T3); + __ profile_taken_branch(A7, T2); // only C2 meaningful + + const ByteSize be_offset = MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset(); + const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset(); + + // Load up T4 with the branch displacement + if (!is_wide) { + __ ld_b(A7, BCP, 1); + __ ld_bu(AT, BCP, 2); + __ slli_d(A7, A7, 8); + __ orr(A7, A7, AT); + } else { + __ get_4_byte_integer_at_bcp(A7, 1); + __ swap(A7); + } + + // Handle all the JSR stuff here, then exit. + // It's much shorter and cleaner than intermingling with the non-JSR + // normal-branch stuff occuring below. + if (is_jsr) { + // Pre-load the next target bytecode into Rnext + __ ldx_bu(Rnext, BCP, A7); + + // compute return address as bci in FSR + __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); + __ ld_d(AT, T3, in_bytes(Method::const_offset())); + __ sub_d(FSR, FSR, AT); + // Adjust the bcp in BCP by the displacement in A7 + __ add_d(BCP, BCP, A7); + // jsr returns atos that is not an oop + // Push return address + __ push_i(FSR); + // jsr returns vtos + __ dispatch_only_noverify(vtos); + + return; + } + + // Normal (non-jsr) branch handling + + // Adjust the bcp in S0 by the displacement in T4 + __ add_d(BCP, BCP, A7); + + assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; + Label profile_method; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches + // T3: method + // T4: target offset + // BCP: target bcp + // LVP: locals pointer + __ blt(R0, A7, dispatch); // check if forward or backward branch + + // check if MethodCounters exists + Label has_counters; + __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP + __ bne(AT, R0, has_counters); + __ push2(T3, A7); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), + T3); + __ pop2(T3, A7); + __ ld_d(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP + __ beq(AT, R0, dispatch); + __ bind(has_counters); + + if (TieredCompilation) { + Label no_mdo; + int increment = InvocationCounter::count_increment; + int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; + if (ProfileInterpreter) { + // Are we profiling? + __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset()))); + __ beq(T0, R0, no_mdo); + // Increment the MDO backedge counter + const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, + T1, false, Assembler::zero, &backedge_counter_overflow); + __ beq(R0, R0, dispatch); + } + __ bind(no_mdo); + // Increment backedge counter in MethodCounters* + __ ld_d(T0, Address(T3, Method::method_counters_offset())); + __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, + T1, false, Assembler::zero, &backedge_counter_overflow); + if (!UseOnStackReplacement) { + __ bind(backedge_counter_overflow); + } + } else { + // increment back edge counter + __ ld_d(T1, T3, in_bytes(Method::method_counters_offset())); + __ ld_w(T0, T1, in_bytes(be_offset)); + __ increment(T0, InvocationCounter::count_increment); + __ st_w(T0, T1, in_bytes(be_offset)); + + // load invocation counter + __ ld_w(T1, T1, in_bytes(inv_offset)); + // buffer bit added, mask no needed + + // dadd backedge counter & invocation counter + __ add_d(T1, T1, T0); + + if (ProfileInterpreter) { + // Test to see if we should create a method data oop + // T1 : backedge counter & invocation counter + if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) { + __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); + __ bne(AT, R0, dispatch); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); + __ ld_w(AT, AT, 0); + __ blt(T1, AT, dispatch); + } + + // if no method data exists, go to profile method + __ test_method_data_pointer(T1, profile_method); + + if (UseOnStackReplacement) { + if (Assembler::is_simm(InvocationCounter::InterpreterBackwardBranchLimit, 12)) { + __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); + __ bne(AT, R0, dispatch); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); + __ ld_w(AT, AT, 0); + __ blt(T2, AT, dispatch); + } + + // When ProfileInterpreter is on, the backedge_count comes + // from the methodDataOop, which value does not get reset on + // the call to frequency_counter_overflow(). + // To avoid excessive calls to the overflow routine while + // the method is being compiled, dadd a second test to make + // sure the overflow function is called only once every + // overflow_frequency. + const int overflow_frequency = 1024; + __ andi(AT, T2, overflow_frequency-1); + __ beq(AT, R0, backedge_counter_overflow); + } + } else { + if (UseOnStackReplacement) { + // check for overflow against AT, which is the sum of the counters + __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); + __ ld_w(AT, AT, 0); + __ bge(T1, AT, backedge_counter_overflow); + } + } + } + __ bind(dispatch); + } + + // Pre-load the next target bytecode into Rnext + __ ld_bu(Rnext, BCP, 0); + + // continue with the bytecode @ target + // FSR: return bci for jsr's, unused otherwise + // Rnext: target bytecode + // BCP: target bcp + __ dispatch_only(vtos); + + if (UseLoopCounter) { + if (ProfileInterpreter) { + // Out-of-line code to allocate method data oop. + __ bind(profile_method); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ ld_bu(Rnext, BCP, 0); + __ set_method_data_pointer_for_bcp(); + __ b(dispatch); + } + + if (UseOnStackReplacement) { + // invocation counter overflow + __ bind(backedge_counter_overflow); + __ sub_d(A7, BCP, A7); // branch bcp + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), A7); + __ ld_bu(Rnext, BCP, 0); + + // V0: osr nmethod (osr ok) or NULL (osr not possible) + // V1: osr adapter frame return address + // Rnext: target bytecode + // LVP: locals pointer + // BCP: bcp + __ beq(V0, R0, dispatch); + // nmethod may have been invalidated (VM may block upon call_VM return) + __ ld_w(T3, V0, nmethod::entry_bci_offset()); + __ li(AT, InvalidOSREntryBci); + __ beq(AT, T3, dispatch); + // We need to prepare to execute the OSR method. First we must + // migrate the locals and monitors off of the stack. + //V0: osr nmethod (osr ok) or NULL (osr not possible) + //V1: osr adapter frame return address + //Rnext: target bytecode + //LVP: locals pointer + //BCP: bcp + __ move(BCP, V0); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + + // V0 is OSR buffer, move it to expected parameter location + // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp + __ move(T0, V0); + + // pop the interpreter frame + __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); + // remove frame anchor + __ leave(); + __ move(LVP, RA); + __ move(SP, A7); + + __ li(AT, -(StackAlignmentInBytes)); + __ andr(SP , SP , AT); + + // push the (possibly adjusted) return address + // refer to osr_entry in c1_LIRAssembler_loongarch.cpp + __ ld_d(AT, BCP, nmethod::osr_entry_point_offset()); + __ jr(AT); + } + } +} + + +void TemplateTable::if_0cmp(Condition cc) { + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + switch(cc) { + case not_equal: + __ beq(FSR, R0, not_taken); + break; + case equal: + __ bne(FSR, R0, not_taken); + break; + case less: + __ bge(FSR, R0, not_taken); + break; + case less_equal: + __ blt(R0, FSR, not_taken); + break; + case greater: + __ bge(R0, FSR, not_taken); + break; + case greater_equal: + __ blt(FSR, R0, not_taken); + break; + } + + branch(false, false); + + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_icmp(Condition cc) { + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + + __ pop_i(SSR); + switch(cc) { + case not_equal: + __ beq(SSR, FSR, not_taken); + break; + case equal: + __ bne(SSR, FSR, not_taken); + break; + case less: + __ bge(SSR, FSR, not_taken); + break; + case less_equal: + __ blt(FSR, SSR, not_taken); + break; + case greater: + __ bge(FSR, SSR, not_taken); + break; + case greater_equal: + __ blt(SSR, FSR, not_taken); + break; + } + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_nullcmp(Condition cc) { + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + switch(cc) { + case not_equal: + __ beq(FSR, R0, not_taken); + break; + case equal: + __ bne(FSR, R0, not_taken); + break; + default: + ShouldNotReachHere(); + } + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + + +void TemplateTable::if_acmp(Condition cc) { + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + // __ ld_w(SSR, SP, 0); + __ pop_ptr(SSR); + switch(cc) { + case not_equal: + __ beq(SSR, FSR, not_taken); + break; + case equal: + __ bne(SSR, FSR, not_taken); + break; + default: + ShouldNotReachHere(); + } + + branch(false, false); + + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +// used registers : T1, T2, T3 +// T1 : method +// T2 : returb bci +void TemplateTable::ret() { + transition(vtos, vtos); + + locals_index(T2); + __ ld_d(T2, T2, 0); + __ profile_ret(T2, T3); + + __ get_method(T1); + __ ld_d(BCP, T1, in_bytes(Method::const_offset())); + __ add_d(BCP, BCP, T2); + __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); + + __ dispatch_next(vtos); +} + +// used registers : T1, T2, T3 +// T1 : method +// T2 : returb bci +void TemplateTable::wide_ret() { + transition(vtos, vtos); + + locals_index_wide(T2); + __ ld_d(T2, T2, 0); // get return bci, compute return bcp + __ profile_ret(T2, T3); + + __ get_method(T1); + __ ld_d(BCP, T1, in_bytes(Method::const_offset())); + __ add_d(BCP, BCP, T2); + __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); + + __ dispatch_next(vtos); +} + +// used register T2, T3, A7, Rnext +// T2 : bytecode pointer +// T3 : low +// A7 : high +// Rnext : dest bytecode, required by dispatch_base +void TemplateTable::tableswitch() { + Label default_case, continue_execution; + transition(itos, vtos); + + // align BCP + __ addi_d(T2, BCP, BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(T2, T2, AT); + + // load lo & hi + __ ld_w(T3, T2, 1 * BytesPerInt); + __ swap(T3); + __ ld_w(A7, T2, 2 * BytesPerInt); + __ swap(A7); + + // check against lo & hi + __ blt(FSR, T3, default_case); + __ blt(A7, FSR, default_case); + + // lookup dispatch offset, in A7 big endian + __ sub_d(FSR, FSR, T3); + __ alsl_d(AT, FSR, T2, Address::times_4 - 1); + __ ld_w(A7, AT, 3 * BytesPerInt); + __ profile_switch_case(FSR, T4, T3); + + __ bind(continue_execution); + __ swap(A7); + __ add_d(BCP, BCP, A7); + __ ld_bu(Rnext, BCP, 0); + __ dispatch_only(vtos); + + // handle default + __ bind(default_case); + __ profile_switch_default(FSR); + __ ld_w(A7, T2, 0); + __ b(continue_execution); +} + +void TemplateTable::lookupswitch() { + transition(itos, itos); + __ stop("lookupswitch bytecode should have been rewritten"); +} + +// used registers : T2, T3, A7, Rnext +// T2 : bytecode pointer +// T3 : pair index +// A7 : offset +// Rnext : dest bytecode +// the data after the opcode is the same as lookupswitch +// see Rewriter::rewrite_method for more information +void TemplateTable::fast_linearswitch() { + transition(itos, vtos); + Label loop_entry, loop, found, continue_execution; + + // swap FSR so we can avoid swapping the table entries + __ swap(FSR); + + // align BCP + __ addi_d(T2, BCP, BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(T2, T2, AT); + + // set counter + __ ld_w(T3, T2, BytesPerInt); + __ swap(T3); + __ b(loop_entry); + + // table search + __ bind(loop); + // get the entry value + __ alsl_d(AT, T3, T2, Address::times_8 - 1); + __ ld_w(AT, AT, 2 * BytesPerInt); + + // found? + __ beq(FSR, AT, found); + + __ bind(loop_entry); + Label L1; + __ bge(R0, T3, L1); + __ addi_d(T3, T3, -1); + __ b(loop); + __ bind(L1); + __ addi_d(T3, T3, -1); + + // default case + __ profile_switch_default(FSR); + __ ld_w(A7, T2, 0); + __ b(continue_execution); + + // entry found -> get offset + __ bind(found); + __ alsl_d(AT, T3, T2, Address::times_8 - 1); + __ ld_w(A7, AT, 3 * BytesPerInt); + __ profile_switch_case(T3, FSR, T2); + + // continue execution + __ bind(continue_execution); + __ swap(A7); + __ add_d(BCP, BCP, A7); + __ ld_bu(Rnext, BCP, 0); + __ dispatch_only(vtos); +} + +// used registers : T0, T1, T2, T3, A7, Rnext +// T2 : pairs address(array) +// Rnext : dest bytecode +// the data after the opcode is the same as lookupswitch +// see Rewriter::rewrite_method for more information +void TemplateTable::fast_binaryswitch() { + transition(itos, vtos); + // Implementation using the following core algorithm: + // + // int binary_search(int key, LookupswitchPair* array, int n) { + // // Binary search according to "Methodik des Programmierens" by + // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. + // int i = 0; + // int j = n; + // while (i+1 < j) { + // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) + // // with Q: for all i: 0 <= i < n: key < a[i] + // // where a stands for the array and assuming that the (inexisting) + // // element a[n] is infinitely big. + // int h = (i + j) >> 1; + // // i < h < j + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // } + // // R: a[i] <= key < a[i+1] or Q + // // (i.e., if key is within array, i is the correct index) + // return i; + // } + + // register allocation + const Register array = T2; + const Register i = T3, j = A7; + const Register h = T1; + const Register temp = T0; + const Register key = FSR; + + // setup array + __ addi_d(array, BCP, 3*BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(array, array, AT); + + // initialize i & j + __ move(i, R0); + __ ld_w(j, array, - 1 * BytesPerInt); + // Convert j into native byteordering + __ swap(j); + + // and start + Label entry; + __ b(entry); + + // binary search loop + { + Label loop; + __ bind(loop); + // int h = (i + j) >> 1; + __ add_d(h, i, j); + __ srli_d(h, h, 1); + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // Convert array[h].match to native byte-ordering before compare + __ alsl_d(AT, h, array, Address::times_8 - 1); + __ ld_w(temp, AT, 0 * BytesPerInt); + __ swap(temp); + + __ slt(AT, key, temp); + __ maskeqz(i, i, AT); + __ masknez(temp, h, AT); + __ OR(i, i, temp); + __ masknez(j, j, AT); + __ maskeqz(temp, h, AT); + __ OR(j, j, temp); + + // while (i+1 < j) + __ bind(entry); + __ addi_d(h, i, 1); + __ blt(h, j, loop); + } + + // end of binary search, result index is i (must check again!) + Label default_case; + // Convert array[i].match to native byte-ordering before compare + __ alsl_d(AT, i, array, Address::times_8 - 1); + __ ld_w(temp, AT, 0 * BytesPerInt); + __ swap(temp); + __ bne(key, temp, default_case); + + // entry found -> j = offset + __ alsl_d(AT, i, array, Address::times_8 - 1); + __ ld_w(j, AT, 1 * BytesPerInt); + __ profile_switch_case(i, key, array); + __ swap(j); + + __ add_d(BCP, BCP, j); + __ ld_bu(Rnext, BCP, 0); + __ dispatch_only(vtos); + + // default case -> j = default offset + __ bind(default_case); + __ profile_switch_default(i); + __ ld_w(j, array, - 2 * BytesPerInt); + __ swap(j); + __ add_d(BCP, BCP, j); + __ ld_bu(Rnext, BCP, 0); + __ dispatch_only(vtos); +} + +void TemplateTable::_return(TosState state) { + transition(state, state); + assert(_desc->calls_vm(), + "inconsistent calls_vm information"); // call in remove_activation + + if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { + assert(state == vtos, "only valid state"); + __ ld_d(T1, aaddress(0)); + __ load_klass(LVP, T1); + __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset())); + __ li(AT, JVM_ACC_HAS_FINALIZER); + __ andr(AT, AT, LVP); + Label skip_register_finalizer; + __ beq(AT, R0, skip_register_finalizer); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::register_finalizer), T1); + __ bind(skip_register_finalizer); + } + + // Narrow result if state is itos but result type is smaller. + // Need to narrow in the return bytecode rather than in generate_return_entry + // since compiled code callers expect the result to already be narrowed. + if (state == itos) { + __ narrow(FSR); + } + + __ remove_activation(state, T4); + __ membar(__ StoreStore); + + __ jr(T4); +} + +// we dont shift left 2 bits in get_cache_and_index_at_bcp +// for we always need shift the index we use it. the ConstantPoolCacheEntry +// is 16-byte long, index is the index in +// ConstantPoolCache, so cache + base_offset() + index * 16 is +// the corresponding ConstantPoolCacheEntry +// used registers : T2 +// NOTE : the returned index need also shift left 4 to get the address! +void TemplateTable::resolve_cache_and_index(int byte_no, + Register Rcache, + Register index, + size_t index_size) { + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + const Register temp = A1; + assert_different_registers(Rcache, index); + + Label resolved; + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); + // is resolved? + int i = (int)bytecode(); + __ addi_d(temp, temp, -i); + __ beq(temp, R0, resolved); + // resolve first time through + address entry; + switch (bytecode()) { + case Bytecodes::_getstatic : // fall through + case Bytecodes::_putstatic : // fall through + case Bytecodes::_getfield : // fall through + case Bytecodes::_putfield : + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); + break; + case Bytecodes::_invokevirtual : // fall through + case Bytecodes::_invokespecial : // fall through + case Bytecodes::_invokestatic : // fall through + case Bytecodes::_invokeinterface: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); + break; + case Bytecodes::_invokehandle: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); + break; + case Bytecodes::_invokedynamic: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); + break; + default : + fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode()))); + break; + } + + __ li(temp, i); + __ call_VM(NOREG, entry, temp); + + // Update registers with resolved info + __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); + __ bind(resolved); +} + +// The Rcache and index registers must be set before call +void TemplateTable::load_field_cp_cache_entry(Register obj, + Register cache, + Register index, + Register off, + Register flags, + bool is_static = false) { + assert_different_registers(cache, index, flags, off); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + // Field offset + __ alsl_d(AT, index, cache, Address::times_ptr - 1); + __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); + // Flags + __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); + + // klass overwrite register + if (is_static) { + __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ld_d(obj, Address(obj, mirror_offset)); + + __ verify_oop(obj); + } +} + +// get the method, itable_index and flags of the current invoke +void TemplateTable::load_invoke_cp_cache_entry(int byte_no, + Register method, + Register itable_index, + Register flags, + bool is_invokevirtual, + bool is_invokevfinal, /*unused*/ + bool is_invokedynamic) { + // setup registers + const Register cache = T3; + const Register index = T1; + assert_different_registers(method, flags); + assert_different_registers(method, cache, index); + assert_different_registers(itable_index, flags); + assert_different_registers(itable_index, cache, index); + assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); + // determine constant pool cache field offsets + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + ((byte_no == f2_byte) + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset())); + const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()); + // access constant pool cache fields + const int index_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()); + + size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); + resolve_cache_and_index(byte_no, cache, index, index_size); + + __ alsl_d(AT, index, cache, Address::times_ptr - 1); + __ ld_d(method, AT, method_offset); + + if (itable_index != NOREG) { + __ ld_d(itable_index, AT, index_offset); + } + __ ld_d(flags, AT, flags_offset); +} + +// The registers cache and index expected to be set before call. +// Correct values of the cache and index registers are preserved. +void TemplateTable::jvmti_post_field_access(Register cache, Register index, + bool is_static, bool has_tos) { + // do the JVMTI work here to avoid disturbing the register state below + // We use c_rarg registers here because we want to use the register used in + // the call to the VM + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + // kill FSR + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + assert_different_registers(cache, index, AT); + __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); + __ ld_w(AT, AT, 0); + __ beq(AT, R0, L1); + + __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); + + // cache entry pointer + __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); + __ shl(tmp3, LogBytesPerWord); + __ add_d(tmp2, tmp2, tmp3); + if (is_static) { + __ move(tmp1, R0); + } else { + __ ld_d(tmp1, SP, 0); + __ verify_oop(tmp1); + } + // tmp1: object pointer or NULL + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), + tmp1, tmp2, tmp3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::pop_and_check_object(Register r) { + __ pop_ptr(r); + __ null_check(r); // for field access must check obj. + __ verify_oop(r); +} + +// used registers : T1, T2, T3, T1 +// T1 : flags +// T2 : off +// T3 : obj +// T1 : field address +// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the +// following mapping to the TosState states: +// btos: 0 +// ctos: 1 +// stos: 2 +// itos: 3 +// ltos: 4 +// ftos: 5 +// dtos: 6 +// atos: 7 +// vtos: 8 +// see ConstantPoolCacheEntry::set_field for more info +void TemplateTable::getfield_or_static(int byte_no, bool is_static) { + transition(vtos, vtos); + + const Register cache = T3; + const Register index = T0; + + const Register obj = T3; + const Register off = T2; + const Register flags = T1; + + const Register scratch = T8; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_access(cache, index, is_static, false); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + { + __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, flags); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + + if (!is_static) pop_and_check_object(obj); + __ add_d(index, obj, off); + + + Label Done, notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble; + + assert(btos == 0, "change code, btos != 0"); + __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); + __ bne(flags, R0, notByte); + + // btos + __ ld_b(FSR, index, 0); + __ push(btos); + + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); + } + __ b(Done); + + __ bind(notByte); + __ li(AT, ztos); + __ bne(flags, AT, notBool); + + // ztos + __ ld_b(FSR, index, 0); + __ push(ztos); + + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); + } + __ b(Done); + + __ bind(notBool); + __ li(AT, itos); + __ bne(flags, AT, notInt); + + // itos + __ ld_w(FSR, index, 0); + __ push(itos); + + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); + } + __ b(Done); + + __ bind(notInt); + __ li(AT, atos); + __ bne(flags, AT, notObj); + + // atos + //add for compressedoops + __ load_heap_oop(FSR, Address(index, 0)); + __ push(atos); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); + } + __ b(Done); + + __ bind(notObj); + __ li(AT, ctos); + __ bne(flags, AT, notChar); + + // ctos + __ ld_hu(FSR, index, 0); + __ push(ctos); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); + } + __ b(Done); + + __ bind(notChar); + __ li(AT, stos); + __ bne(flags, AT, notShort); + + // stos + __ ld_h(FSR, index, 0); + __ push(stos); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); + } + __ b(Done); + + __ bind(notShort); + __ li(AT, ltos); + __ bne(flags, AT, notLong); + + // ltos + __ ld_d(FSR, index, 0 * wordSize); + __ push(ltos); + + // Don't rewrite to _fast_lgetfield for potential volatile case. + __ b(Done); + + __ bind(notLong); + __ li(AT, ftos); + __ bne(flags, AT, notFloat); + + // ftos + __ fld_s(FSF, index, 0); + __ push(ftos); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); + } + __ b(Done); + + __ bind(notFloat); + __ li(AT, dtos); +#ifdef ASSERT + __ bne(flags, AT, notDouble); +#endif + + // dtos + __ fld_d(FSF, index, 0 * wordSize); + __ push(dtos); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); + } + +#ifdef ASSERT + __ b(Done); + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + __ bind(notVolatile); + } +} + + +void TemplateTable::getfield(int byte_no) { + getfield_or_static(byte_no, false); +} + +void TemplateTable::getstatic(int byte_no) { + getfield_or_static(byte_no, true); +} + +// The registers cache and index expected to be set before call. +// The function may destroy various registers, just not the cache and index registers. +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { + transition(vtos, vtos); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L1; + //kill AT, T1, T2, T3, T4 + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + Register tmp4 = T4; + assert_different_registers(cache, index, tmp4); + + __ li(AT, JvmtiExport::get_field_modification_count_addr()); + __ ld_w(AT, AT, 0); + __ beq(AT, R0, L1); + + __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); + + if (is_static) { + __ move(tmp1, R0); + } else { + // Life is harder. The stack holds the value on top, followed by + // the object. We don't know the size of the value, though; it + // could be one or two words depending on its type. As a result, + // we must find the type to determine where the object is. + Label two_word, valsize_known; + __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1); + __ ld_d(tmp3, AT, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset())); + __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); + + ConstantPoolCacheEntry::verify_tos_state_shift(); + __ move(tmp1, SP); + __ li(AT, ltos); + __ beq(tmp3, AT, two_word); + __ li(AT, dtos); + __ beq(tmp3, AT, two_word); + __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); + __ b(valsize_known); + + __ bind(two_word); + __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); + + __ bind(valsize_known); + // setup object pointer + __ ld_d(tmp1, tmp1, 0 * wordSize); + } + // cache entry pointer + __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset)); + __ shl(tmp4, LogBytesPerWord); + __ add_d(tmp2, tmp2, tmp4); + // object (tos) + __ move(tmp3, SP); + // tmp1: object pointer set up above (NULL if static) + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + tmp1, tmp2, tmp3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +// used registers : T0, T1, T2, T3, T8 +// T1 : flags +// T2 : off +// T3 : obj +// T8 : volatile bit +// see ConstantPoolCacheEntry::set_field for more info +void TemplateTable::putfield_or_static(int byte_no, bool is_static) { + transition(vtos, vtos); + + const Register cache = T3; + const Register index = T0; + const Register obj = T3; + const Register off = T2; + const Register flags = T1; + const Register bc = T3; + + const Register scratch = T8; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_mod(cache, index, is_static); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + Label Done; + { + __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, flags); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); + __ bind(notVolatile); + } + + Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; + + assert(btos == 0, "change code, btos != 0"); + + // btos + __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); + __ bne(flags, R0, notByte); + + __ pop(btos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(AT, obj, off); + __ st_b(FSR, AT, 0); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); + } + __ b(Done); + + // ztos + __ bind(notByte); + __ li(AT, ztos); + __ bne(flags, AT, notBool); + + __ pop(ztos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(AT, obj, off); + __ andi(FSR, FSR, 0x1); + __ st_b(FSR, AT, 0); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); + } + __ b(Done); + + // itos + __ bind(notBool); + __ li(AT, itos); + __ bne(flags, AT, notInt); + + __ pop(itos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(AT, obj, off); + __ st_w(FSR, AT, 0); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); + } + __ b(Done); + + // atos + __ bind(notInt); + __ li(AT, atos); + __ bne(flags, AT, notObj); + + __ pop(atos); + if (!is_static) { + pop_and_check_object(obj); + } + + do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR, _bs->kind(), false); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); + } + __ b(Done); + + // ctos + __ bind(notObj); + __ li(AT, ctos); + __ bne(flags, AT, notChar); + + __ pop(ctos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(AT, obj, off); + __ st_h(FSR, AT, 0); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); + } + __ b(Done); + + // stos + __ bind(notChar); + __ li(AT, stos); + __ bne(flags, AT, notShort); + + __ pop(stos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(AT, obj, off); + __ st_h(FSR, AT, 0); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); + } + __ b(Done); + + // ltos + __ bind(notShort); + __ li(AT, ltos); + __ bne(flags, AT, notLong); + + __ pop(ltos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(AT, obj, off); + __ st_d(FSR, AT, 0); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); + } + __ b(Done); + + // ftos + __ bind(notLong); + __ li(AT, ftos); + __ bne(flags, AT, notFloat); + + __ pop(ftos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(AT, obj, off); + __ fst_s(FSF, AT, 0); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); + } + __ b(Done); + + + // dtos + __ bind(notFloat); + __ li(AT, dtos); +#ifdef ASSERT + __ bne(flags, AT, notDouble); +#endif + + __ pop(dtos); + if (!is_static) { + pop_and_check_object(obj); + } + __ add_d(AT, obj, off); + __ fst_d(FSF, AT, 0); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); + } + +#ifdef ASSERT + __ b(Done); + + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); + __ bind(notVolatile); + } +} + +void TemplateTable::putfield(int byte_no) { + putfield_or_static(byte_no, false); +} + +void TemplateTable::putstatic(int byte_no) { + putfield_or_static(byte_no, true); +} + +// used registers : T1, T2, T3 +// T1 : cp_entry +// T2 : obj +// T3 : value pointer +void TemplateTable::jvmti_post_fast_field_mod() { + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L2; + //kill AT, T1, T2, T3, T4 + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + Register tmp4 = T4; + __ li(AT, JvmtiExport::get_field_modification_count_addr()); + __ ld_w(tmp3, AT, 0); + __ beq(tmp3, R0, L2); + __ pop_ptr(tmp1); + __ verify_oop(tmp1); + __ push_ptr(tmp1); + switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ push_i(FSR); break; + case Bytecodes::_fast_dputfield: __ push_d(FSF); break; + case Bytecodes::_fast_fputfield: __ push_f(); break; + case Bytecodes::_fast_lputfield: __ push_l(FSR); break; + default: ShouldNotReachHere(); + } + __ move(tmp3, SP); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); + __ verify_oop(tmp1); + // tmp1: object pointer copied above + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + tmp1, tmp2, tmp3); + + switch (bytecode()) { // restore tos values + case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; + case Bytecodes::_fast_dputfield: __ pop_d(); break; + case Bytecodes::_fast_fputfield: __ pop_f(); break; + case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; + } + __ bind(L2); + } +} + +// used registers : T2, T3, T1 +// T2 : index & off & field address +// T3 : cache & obj +// T1 : flags +void TemplateTable::fast_storefield(TosState state) { + transition(state, vtos); + + const Register scratch = T8; + + ByteSize base = ConstantPoolCache::base_offset(); + + jvmti_post_fast_field_mod(); + + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ membar(__ LoadLoad); + + // test for volatile with T1 + __ alsl_d(AT, T2, T3, Address::times_8 - 1); + __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); + + // replace index with field offset from cache entry + __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); + + Label Done; + { + __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, T1); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore)); + __ bind(notVolatile); + } + + // Get object from stack + pop_and_check_object(T3); + + if (bytecode() != Bytecodes::_fast_aputfield) { + // field address + __ add_d(T2, T3, T2); + } + + // access field + switch (bytecode()) { + case Bytecodes::_fast_zputfield: + __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 + // fall through to bputfield + case Bytecodes::_fast_bputfield: + __ st_b(FSR, T2, 0); + break; + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: + __ st_h(FSR, T2, 0); + break; + case Bytecodes::_fast_iputfield: + __ st_w(FSR, T2, 0); + break; + case Bytecodes::_fast_lputfield: + __ st_d(FSR, T2, 0 * wordSize); + break; + case Bytecodes::_fast_fputfield: + __ fst_s(FSF, T2, 0); + break; + case Bytecodes::_fast_dputfield: + __ fst_d(FSF, T2, 0 * wordSize); + break; + case Bytecodes::_fast_aputfield: + do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR, _bs->kind(), false); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore)); + __ bind(notVolatile); + } +} + +// used registers : T2, T3, T1 +// T3 : cp_entry & cache +// T2 : index & offset +void TemplateTable::fast_accessfield(TosState state) { + transition(atos, state); + + const Register scratch = T8; + + // do the JVMTI work here to avoid disturbing the register state below + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we take + // the time to call into the VM. + Label L1; + __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); + __ ld_w(T3, AT, 0); + __ beq(T3, R0, L1); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(T3, T1, 1); + __ move(TSR, FSR); + __ verify_oop(FSR); + // FSR: object pointer copied above + // T3: cache entry pointer + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), + FSR, T3); + __ move(FSR, TSR); + __ bind(L1); + } + + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ membar(__ LoadLoad); + + // replace index with field offset from cache entry + __ alsl_d(AT, T2, T3, Address::times_8 - 1); + __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + + { + __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, AT); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + + // FSR: object + __ verify_oop(FSR); + __ null_check(FSR); + // field addresses + __ add_d(FSR, FSR, T2); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_bgetfield: + __ ld_b(FSR, FSR, 0); + break; + case Bytecodes::_fast_sgetfield: + __ ld_h(FSR, FSR, 0); + break; + case Bytecodes::_fast_cgetfield: + __ ld_hu(FSR, FSR, 0); + break; + case Bytecodes::_fast_igetfield: + __ ld_w(FSR, FSR, 0); + break; + case Bytecodes::_fast_lgetfield: + __ stop("should not be rewritten"); + break; + case Bytecodes::_fast_fgetfield: + __ fld_s(FSF, FSR, 0); + break; + case Bytecodes::_fast_dgetfield: + __ fld_d(FSF, FSR, 0); + break; + case Bytecodes::_fast_agetfield: + __ load_heap_oop(FSR, Address(FSR, 0)); + __ verify_oop(FSR); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + __ bind(notVolatile); + } +} + +// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 +// used registers : T1, T2, T3, T1 +// T1 : obj & field address +// T2 : off +// T3 : cache +// T1 : index +void TemplateTable::fast_xaccess(TosState state) { + transition(vtos, state); + + const Register scratch = T8; + + // get receiver + __ ld_d(T1, aaddress(0)); + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 2); + __ alsl_d(AT, T2, T3, Address::times_8 - 1); + __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + + { + __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, AT); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(MacroAssembler::AnyAny); + __ bind(notVolatile); + } + + // make sure exception is reported in correct bcp range (getfield is + // next instruction) + __ addi_d(BCP, BCP, 1); + __ null_check(T1); + __ add_d(T1, T1, T2); + + if (state == itos) { + __ ld_w(FSR, T1, 0); + } else if (state == atos) { + __ load_heap_oop(FSR, Address(T1, 0)); + __ verify_oop(FSR); + } else if (state == ftos) { + __ fld_s(FSF, T1, 0); + } else { + ShouldNotReachHere(); + } + __ addi_d(BCP, BCP, -1); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + __ bind(notVolatile); + } +} + + + +//----------------------------------------------------------------------------- +// Calls + +void TemplateTable::count_calls(Register method, Register temp) { + // implemented elsewhere + ShouldNotReachHere(); +} + +// method, index, recv, flags: T1, T2, T3, T1 +// byte_no = 2 for _invokevirtual, 1 else +// T0 : return address +// get the method & index of the invoke, and push the return address of +// the invoke(first word in the frame) +// this address is where the return code jmp to. +// NOTE : this method will set T3&T1 as recv&flags +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. + Register recv, // if caller wants to see it + Register flags // if caller wants to test it + ) { + + + // determine flags + const Bytecodes::Code code = bytecode(); + const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; + const bool is_invokehandle = code == Bytecodes::_invokehandle; + const bool is_invokevirtual = code == Bytecodes::_invokevirtual; + const bool is_invokespecial = code == Bytecodes::_invokespecial; + const bool load_receiver = (recv != noreg); + const bool save_flags = (flags != noreg); + assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); + assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); + assert(flags == noreg || flags == T1, "error flags reg."); + assert(recv == noreg || recv == T3, "error recv reg."); + + // setup registers & access constant pool cache + if(recv == noreg) recv = T3; + if(flags == noreg) flags = T1; + assert_different_registers(method, index, recv, flags); + + // save 'interpreter return address' + __ save_bcp(); + + load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + + if (is_invokedynamic || is_invokehandle) { + Label L_no_push; + __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); + __ andr(AT, AT, flags); + __ beq(AT, R0, L_no_push); + // Push the appendix as a trailing parameter. + // This must be done before we get the receiver, + // since the parameter_size includes it. + Register tmp = SSR; + __ push(tmp); + __ move(tmp, index); + assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); + __ load_resolved_reference_at_index(index, tmp); + __ pop(tmp); + __ push(index); // push appendix (MethodType, CallSite, etc.) + __ bind(L_no_push); + } + + // load receiver if needed (after appendix is pushed so parameter size is correct) + // Note: no return address pushed yet + if (load_receiver) { + __ li(AT, ConstantPoolCacheEntry::parameter_size_mask); + __ andr(recv, flags, AT); + // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. + const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address + const int receiver_is_at_end = -1; // back off one slot to get receiver + Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); + __ ld_d(recv, recv_addr); + __ verify_oop(recv); + } + if(save_flags) { + __ move(BCP, flags); + } + + // compute return type + __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, 0xf); + + // Make sure we don't need to mask flags for tos_state_shift after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + // load return address + { + const address table = (address) Interpreter::invoke_return_entry_table_for(code); + __ li(AT, (long)table); + __ slli_d(flags, flags, LogBytesPerWord); + __ add_d(AT, AT, flags); + __ ld_d(RA, AT, 0); + } + + if (save_flags) { + __ move(flags, BCP); + __ restore_bcp(); + } +} + +// used registers : T0, T3, T1, T2 +// T3 : recv, this two register using convention is by prepare_invoke +// T1 : flags, klass +// Rmethod : method, index must be Rmethod +void TemplateTable::invokevirtual_helper(Register index, + Register recv, + Register flags) { + + assert_different_registers(index, recv, flags, T2); + + // Test for an invoke of a final method + Label notFinal; + __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); + __ andr(AT, flags, AT); + __ beq(AT, R0, notFinal); + + Register method = index; // method must be Rmethod + assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); + + // do the call - the index is actually the method to call + // the index is indeed methodOop, for this is vfinal, + // see ConstantPoolCacheEntry::set_method for more info + + __ verify_oop(method); + + // It's final, need a null check here! + __ null_check(recv); + + // profile this call + __ profile_final_call(T2); + + // T2: tmp, used for mdp + // method: callee + // T4: tmp + // is_virtual: true + __ profile_arguments_type(T2, method, T4, true); + + __ jump_from_interpreted(method, T2); + + __ bind(notFinal); + + // get receiver klass + __ null_check(recv, oopDesc::klass_offset_in_bytes()); + __ load_klass(T2, recv); + __ verify_oop(T2); + + // profile this call + __ profile_virtual_call(T2, T0, T1); + + // get target methodOop & entry point + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + // T2: receiver + __ alsl_d(AT, index, T2, Address::times_ptr - 1); + //this is a ualign read + __ ld_d(method, AT, base + vtableEntry::method_offset_in_bytes()); + __ profile_arguments_type(T2, method, T4, true); + __ jump_from_interpreted(method, T2); +} + +void TemplateTable::invokevirtual(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); + // now recv & flags in T3, T1 + invokevirtual_helper(Rmethod, T3, T1); +} + +// T4 : entry +// Rmethod : method +void TemplateTable::invokespecial(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG, T3); + // now recv & flags in T3, T1 + __ verify_oop(T3); + __ null_check(T3); + __ profile_call(T4); + + // T8: tmp, used for mdp + // Rmethod: callee + // T4: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T4, false); + + __ jump_from_interpreted(Rmethod, T4); + __ move(T0, T3); +} + +void TemplateTable::invokestatic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG); + __ verify_oop(Rmethod); + + __ profile_call(T4); + + // T8: tmp, used for mdp + // Rmethod: callee + // T4: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T4, false); + + __ jump_from_interpreted(Rmethod, T4); +} + +// i have no idea what to do here, now. for future change. FIXME. +void TemplateTable::fast_invokevfinal(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + __ stop("fast_invokevfinal not used on LoongArch64"); +} + +// used registers : T0, T1, T2, T3, T1, A7 +// T0 : itable, vtable, entry +// T1 : interface +// T3 : receiver +// T1 : flags, klass +// Rmethod : index, method, this is required by interpreter_entry +void TemplateTable::invokeinterface(int byte_no) { + transition(vtos, vtos); + //this method will use T1-T4 and T0 + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, T2, Rmethod, T3, T1); + // T2: reference klass + // Rmethod: method + // T3: receiver + // T1: flags + + // Special case of invokeinterface called for virtual method of + // java.lang.Object. See cpCacheOop.cpp for details. + // This code isn't produced by javac, but could be produced by + // another compliant java compiler. + Label notMethod; + __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); + __ andr(AT, T1, AT); + __ beq(AT, R0, notMethod); + + invokevirtual_helper(Rmethod, T3, T1); + __ bind(notMethod); + // Get receiver klass into T1 - also a null check + //add for compressedoops + __ load_klass(T1, T3); + __ verify_oop(T1); + + Label no_such_interface, no_such_method; + + // Receiver subtype check against REFC. + // Superklass in T2. Subklass in T1. + __ lookup_interface_method(// inputs: rec. class, interface, itable index + T1, T2, noreg, + // outputs: scan temp. reg, scan temp. reg + T0, FSR, + no_such_interface, + /*return_method=*/false); + + // profile this call + __ profile_virtual_call(T1, T0, FSR); + + // Get declaring interface class from method, and itable index + __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); + __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); + __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); + __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); + __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max); + __ sub_w(Rmethod, R0, Rmethod); + + __ lookup_interface_method(// inputs: rec. class, interface, itable index + T1, T2, Rmethod, + // outputs: method, scan temp. reg + Rmethod, T0, + no_such_interface); + + // Rmethod: Method* to call + // T3: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ beq(Rmethod, R0, no_such_method); + + __ profile_arguments_type(T1, Rmethod, T0, true); + + // do the call + // T3: receiver + // Rmethod: Method* + __ jump_from_interpreted(Rmethod, T1); + __ should_not_reach_here(); + + // exception handling code follows... + // note: must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); + // throw exception + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + __ bind(no_such_interface); + // throw exception + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_IncompatibleClassChangeError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); +} + + +void TemplateTable::invokehandle(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + const Register T2_method = Rmethod; + const Register FSR_mtype = FSR; + const Register T3_recv = T3; + + if (!EnableInvokeDynamic) { + // rewriter does not generate this bytecode + __ should_not_reach_here(); + return; + } + + prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); + //??__ verify_method_ptr(T2_method); + __ verify_oop(T3_recv); + __ null_check(T3_recv); + + // T4: MethodType object (from cpool->resolved_references[f1], if necessary) + // T2_method: MH.invokeExact_MT method (from f2) + + // Note: T4 is already pushed (if necessary) by prepare_invoke + + // FIXME: profile the LambdaForm also + __ profile_final_call(T4); + + // T8: tmp, used for mdp + // T2_method: callee + // T4: tmp + // is_virtual: true + __ profile_arguments_type(T8, T2_method, T4, true); + + __ jump_from_interpreted(T2_method, T4); +} + + void TemplateTable::invokedynamic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + if (!EnableInvokeDynamic) { + // We should not encounter this bytecode if !EnableInvokeDynamic. + // The verifier will stop it. However, if we get past the verifier, + // this will stop the thread in a reasonable way, without crashing the JVM. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_IncompatibleClassChangeError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + return; + } + + const Register T2_callsite = T2; + + prepare_invoke(byte_no, Rmethod, T2_callsite); + + // T2: CallSite object (from cpool->resolved_references[f1]) + // Rmethod: MH.linkToCallSite method (from f2) + + // Note: T2_callsite is already pushed by prepare_invoke + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(T4); + + // T8: tmp, used for mdp + // Rmethod: callee + // T4: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T4, false); + + __ verify_oop(T2_callsite); + + __ jump_from_interpreted(Rmethod, T4); + } + +//----------------------------------------------------------------------------- +// Allocation +// T1 : tags & buffer end & thread +// T2 : object end +// T3 : klass +// T1 : object size +// A1 : cpool +// A2 : cp index +// return object in FSR +void TemplateTable::_new() { + transition(vtos, atos); + __ get_unsigned_2_byte_index_at_bcp(A2, 1); + + Label slow_case; + Label done; + Label initialize_header; + Label initialize_object; // including clearing the fields + Label allocate_shared; + + // get InstanceKlass in T3 + __ get_cpool_and_tags(A1, T1); + + __ alsl_d(AT, A2, A1, Address::times_8 - 1); + __ ld_d(T3, AT, sizeof(ConstantPool)); + + // make sure the class we're about to instantiate has been resolved. + // Note: slow_case does a pop of stack, which is why we loaded class/pushed above + const int tags_offset = Array::base_offset_in_bytes(); + __ add_d(T1, T1, A2); + __ ld_b(AT, T1, tags_offset); + if(os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + } + __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class); + __ bne(AT, R0, slow_case); + + // make sure klass is initialized & doesn't have finalizer + // make sure klass is fully initialized + __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); + __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized); + __ bne(AT, R0, slow_case); + + // has_finalizer + __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) ); + __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); + __ bne(AT, R0, slow_case); + + // Allocate the instance + // 1) Try to allocate in the TLAB + // 2) if fail and the object is large allocate in the shared Eden + // 3) if the above fails (or is not applicable), go to a slow case + // (creates a new TLAB, etc.) + + const bool allow_shared_alloc = + Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode; + +#ifndef OPT_THREAD + const Register thread = T8; + if (UseTLAB || allow_shared_alloc) { + __ get_thread(thread); + } +#else + const Register thread = TREG; +#endif + + if (UseTLAB) { + // get tlab_top + __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); + // get tlab_end + __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset())); + __ add_d(T2, FSR, T0); + __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case); + __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset())); + + if (ZeroTLAB) { + // the fields have been already cleared + __ beq(R0, R0, initialize_header); + } else { + // initialize both the header and fields + __ beq(R0, R0, initialize_object); + } + } + + // Allocation in the shared Eden , if allowed + // T0 : instance size in words + if(allow_shared_alloc){ + __ bind(allocate_shared); + + Label done, retry; + Address heap_top(T1); + __ li(T1, (long)Universe::heap()->top_addr()); + __ ld_d(FSR, heap_top); + + __ bind(retry); + __ li(AT, (long)Universe::heap()->end_addr()); + __ ld_d(AT, AT, 0); + __ add_d(T2, FSR, T0); + __ blt(AT, T2, slow_case); + + // Compare FSR with the top addr, and if still equal, store the new + // top addr in T2 at the address of the top addr pointer. Sets AT if was + // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. + // + // FSR: object begin + // T2: object end + // T0: instance size in words + + // if someone beat us on the allocation, try again, otherwise continue + __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); + + __ bind(done); + __ incr_allocated_bytes(thread, T0, 0); + } + + if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { + // The object is initialized before the header. If the object size is + // zero, go directly to the header initialization. + __ bind(initialize_object); + __ li(AT, - sizeof(oopDesc)); + __ add_d(T0, T0, AT); + __ beq(T0, R0, initialize_header); + + // initialize remaining object fields: T0 is a multiple of 2 + { + Label loop; + __ add_d(T1, FSR, T0); + __ addi_d(T1, T1, -oopSize); + + __ bind(loop); + __ st_d(R0, T1, sizeof(oopDesc) + 0 * oopSize); + Label L1; + __ beq(T1, FSR, L1); //dont clear header + __ addi_d(T1, T1, -oopSize); + __ b(loop); + __ bind(L1); + __ addi_d(T1, T1, -oopSize); + } + + // klass in T3, + // initialize object header only. + __ bind(initialize_header); + if (UseBiasedLocking) { + __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset())); + __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ()); + } else { + __ li(AT, (long)markOopDesc::prototype()); + __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes()); + } + + __ store_klass_gap(FSR, R0); + __ store_klass(FSR, T3); + + { + SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); + // Trigger dtrace event for fastpath + __ push(atos); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); + __ pop(atos); + + } + __ b(done); + } + + // slow case + __ bind(slow_case); + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); + + // continue + __ bind(done); + __ membar(__ StoreStore); +} + +void TemplateTable::newarray() { + transition(itos, atos); + __ ld_bu(A1, at_bcp(1)); + // type, count + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); + __ membar(__ StoreStore); +} + +void TemplateTable::anewarray() { + transition(itos, atos); + __ get_2_byte_integer_at_bcp(A2, AT, 1); + __ huswap(A2); + __ get_constant_pool(A1); + // cp, index, count + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); + __ membar(__ StoreStore); +} + +void TemplateTable::arraylength() { + transition(atos, itos); + __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); + __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); +} + +// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) +// T2 : sub klass +// T3 : cpool +// T3 : super klass +void TemplateTable::checkcast() { + transition(atos, atos); + Label done, is_null, ok_is_subtype, quicked, resolved; + __ beq(FSR, R0, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(T3, T1); + __ get_2_byte_integer_at_bcp(T2, AT, 1); + __ huswap(T2); + + // See if bytecode has already been quicked + __ add_d(AT, T1, T2); + __ ld_b(AT, AT, Array::base_offset_in_bytes()); + if(os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + } + __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class); + __ beq(AT, R0, quicked); + + // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. + // Then, GC will move the object in V0 to another places in heap. + // Therefore, We should never save such an object in register. + // Instead, we should save it in the stack. It can be modified automatically by the GC thread. + // After GC, the object address in FSR is changed to a new place. + // + __ push(atos); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + __ get_vm_result_2(T3, thread); + __ pop_ptr(FSR); + __ b(resolved); + + // klass already in cp, get superklass in T3 + __ bind(quicked); + __ alsl_d(AT, T2, T3, Address::times_8 - 1); + __ ld_d(T3, AT, sizeof(ConstantPool)); + + __ bind(resolved); + + // get subklass in T2 + __ load_klass(T2, FSR); + // Superklass in T3. Subklass in T2. + __ gen_subtype_check(T3, T2, ok_is_subtype); + + // Come here on failure + // object is at FSR + __ jmp(Interpreter::_throw_ClassCastException_entry); + + // Come here on success + __ bind(ok_is_subtype); + + // Collect counts on whether this check-cast sees NULLs a lot or not. + if (ProfileInterpreter) { + __ b(done); + __ bind(is_null); + __ profile_null_seen(T3); + } else { + __ bind(is_null); + } + __ bind(done); +} + +// T3 as cpool, T1 as tags, T2 as index +// object always in FSR, superklass in T3, subklass in T2 +void TemplateTable::instanceof() { + transition(atos, itos); + Label done, is_null, ok_is_subtype, quicked, resolved; + + __ beq(FSR, R0, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(T3, T1); + // get index + __ get_2_byte_integer_at_bcp(T2, AT, 1); + __ hswap(T2); + + // See if bytecode has already been quicked + // quicked + __ add_d(AT, T1, T2); + __ ld_b(AT, AT, Array::base_offset_in_bytes()); + if(os::is_MP()) { + __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore)); + } + __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class); + __ beq(AT, R0, quicked); + + __ push(atos); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + __ get_vm_result_2(T3, thread); + __ pop_ptr(FSR); + __ b(resolved); + + // get superklass in T3, subklass in T2 + __ bind(quicked); + __ alsl_d(AT, T2, T3, Address::times_8 - 1); + __ ld_d(T3, AT, sizeof(ConstantPool)); + + __ bind(resolved); + // get subklass in T2 + __ load_klass(T2, FSR); + + // Superklass in T3. Subklass in T2. + __ gen_subtype_check(T3, T2, ok_is_subtype); + // Come here on failure + __ move(FSR, R0); + __ b(done); + + // Come here on success + __ bind(ok_is_subtype); + __ li(FSR, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ beq(R0, R0, done); + __ bind(is_null); + __ profile_null_seen(T3); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); + // FSR = 0: obj == NULL or obj is not an instanceof the specified klass + // FSR = 1: obj != NULL and obj is an instanceof the specified klass +} + +//-------------------------------------------------------- +//-------------------------------------------- +// Breakpoints +void TemplateTable::_breakpoint() { + // Note: We get here even if we are single stepping.. + // jbug inists on setting breakpoints at every bytecode + // even if we are in single step mode. + + transition(vtos, vtos); + + // get the unpatched byte code + __ get_method(A1); + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::get_original_bytecode_at), + A1, BCP); + __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal + + // post the breakpoint event + __ get_method(A1); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); + + // complete the execution of original bytecode + __ dispatch_only_normal(vtos); +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateTable::athrow() { + transition(atos, vtos); + __ null_check(FSR); + __ jmp(Interpreter::throw_exception_entry()); +} + +//----------------------------------------------------------------------------- +// Synchronization +// +// Note: monitorenter & exit are symmetric routines; which is reflected +// in the assembly code structure as well +// +// Stack layout: +// +// [expressions ] <--- SP = expression stack top +// .. +// [expressions ] +// [monitor entry] <--- monitor block top = expression stack bot +// .. +// [monitor entry] +// [frame data ] <--- monitor block bot +// ... +// [return addr ] <--- FP + +// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer +// object always in FSR +void TemplateTable::monitorenter() { + transition(atos, vtos); + + // check for NULL object + __ null_check(FSR); + + const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset + * wordSize); + const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); + Label allocated; + + // initialize entry pointer + __ move(c_rarg0, R0); + + // find a free slot in the monitor block (result in c_rarg0) + { + Label entry, loop, exit, next; + __ ld_d(T2, monitor_block_top); + __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + __ b(entry); + + // free slot? + __ bind(loop); + __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes()); + __ bne(AT, R0, next); + __ move(c_rarg0, T2); + + __ bind(next); + __ beq(FSR, AT, exit); + __ addi_d(T2, T2, entry_size); + + __ bind(entry); + __ bne(T3, T2, loop); + __ bind(exit); + } + + __ bne(c_rarg0, R0, allocated); + + // allocate one if there's no free slot + { + Label entry, loop; + // 1. compute new pointers // SP: old expression stack top + __ ld_d(c_rarg0, monitor_block_top); + __ addi_d(SP, SP, -entry_size); + __ addi_d(c_rarg0, c_rarg0, -entry_size); + __ st_d(c_rarg0, monitor_block_top); + __ move(T3, SP); + __ b(entry); + + // 2. move expression stack contents + __ bind(loop); + __ ld_d(AT, T3, entry_size); + __ st_d(AT, T3, 0); + __ addi_d(T3, T3, wordSize); + __ bind(entry); + __ bne(T3, c_rarg0, loop); + } + + __ bind(allocated); + // Increment bcp to point to the next bytecode, + // so exception handling for async. exceptions work correctly. + // The object has already been poped from the stack, so the + // expression stack looks correct. + __ addi_d(BCP, BCP, 1); + __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ lock_object(c_rarg0); + // check to make sure this monitor doesn't cause stack overflow after locking + __ save_bcp(); // in case of exception + __ generate_stack_overflow_check(0); + // The bcp has already been incremented. Just need to dispatch to next instruction. + + __ dispatch_next(vtos); +} + +// T2 : top +// c_rarg0 : entry +void TemplateTable::monitorexit() { + transition(atos, vtos); + + __ null_check(FSR); + + const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); + Label found; + + // find matching slot + { + Label entry, loop; + __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + __ b(entry); + + __ bind(loop); + __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ beq(FSR, AT, found); + __ addi_d(c_rarg0, c_rarg0, entry_size); + __ bind(entry); + __ bne(T2, c_rarg0, loop); + } + + // error handling. Unlocking was not block-structured + Label end; + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + // call run-time routine + // c_rarg0: points to monitor entry + __ bind(found); + __ move(TSR, FSR); + __ unlock_object(c_rarg0); + __ move(FSR, TSR); + __ bind(end); +} + + +// Wide instructions +void TemplateTable::wide() { + transition(vtos, vtos); + __ ld_bu(Rnext, at_bcp(1)); + __ slli_d(T4, Rnext, Address::times_8); + __ li(AT, (long)Interpreter::_wentry_point); + __ add_d(AT, T4, AT); + __ ld_d(T4, AT, 0); + __ jr(T4); +} + + +void TemplateTable::multianewarray() { + transition(vtos, atos); + // last dim is on top of stack; we want address of first one: + // first_addr = last_addr + (ndims - 1) * wordSize + __ ld_bu(A1, at_bcp(3)); // dimension + __ addi_d(A1, A1, -1); + __ slli_d(A1, A1, Address::times_8); + __ add_d(A1, SP, A1); // now A1 pointer to the count array on the stack + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); + __ ld_bu(AT, at_bcp(3)); + __ slli_d(AT, AT, Address::times_8); + __ add_d(SP, SP, AT); + __ membar(__ AnyAny);//no membar here for aarch64 +} +#endif // !CC_INTERP diff --git a/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp new file mode 100644 index 00000000000..c48d76e0a2a --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP +#define CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP + + static void prepare_invoke(int byte_no, + Register method, + Register index = noreg, + Register recv = noreg, + Register flags = noreg + ); + static void invokevirtual_helper(Register index, Register recv, + Register flags); + //static void volatile_barrier(Assembler::Membar_mask_bits order_constraint); + static void volatile_barrier(); + + // Helpers + static void index_check(Register array, Register index); + static void index_check_without_pop(Register array, Register index); + +#endif // CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP diff --git a/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp new file mode 100644 index 00000000000..7c3ce68010d --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* JavaCallWrapper */ \ + /******************************/ \ + /******************************/ \ + /* JavaFrameAnchor */ \ + /******************************/ \ + volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ + \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ + /* be present there) */ + + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ + /* be present there) */ + + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ + /* be present there) */ + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ + /* be present there) */ + +#endif // CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp new file mode 100644 index 00000000000..c71f64e132b --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "memory/allocation.inline.hpp" +#include "vm_version_ext_loongarch.hpp" + +// VM_Version_Ext statics +int VM_Version_Ext::_no_of_threads = 0; +int VM_Version_Ext::_no_of_cores = 0; +int VM_Version_Ext::_no_of_sockets = 0; +bool VM_Version_Ext::_initialized = false; +char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; +char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; + +void VM_Version_Ext::initialize_cpu_information(void) { + // do nothing if cpu info has been initialized + if (_initialized) { + return; + } + + _no_of_cores = os::processor_count(); + _no_of_threads = _no_of_cores; + _no_of_sockets = _no_of_cores; + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", cpu_features()); + _initialized = true; +} + +int VM_Version_Ext::number_of_threads(void) { + initialize_cpu_information(); + return _no_of_threads; +} + +int VM_Version_Ext::number_of_cores(void) { + initialize_cpu_information(); + return _no_of_cores; +} + +int VM_Version_Ext::number_of_sockets(void) { + initialize_cpu_information(); + return _no_of_sockets; +} + +const char* VM_Version_Ext::cpu_name(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); + return tmp; +} + +const char* VM_Version_Ext::cpu_description(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); + return tmp; +} diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp new file mode 100644 index 00000000000..682dd9c78ff --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP + +#include "runtime/vm_version.hpp" +#include "utilities/macros.hpp" + +class VM_Version_Ext : public VM_Version { + private: + static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; + static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; + + static int _no_of_threads; + static int _no_of_cores; + static int _no_of_sockets; + static bool _initialized; + static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; + static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + + public: + static int number_of_threads(void); + static int number_of_cores(void); + static int number_of_sockets(void); + + static const char* cpu_name(void); + static const char* cpu_description(void); + static void initialize_cpu_information(void); +}; + +#endif // CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp new file mode 100644 index 00000000000..81ea3b230cb --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp @@ -0,0 +1,443 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/java.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "vm_version_loongarch.hpp" +#ifdef TARGET_OS_FAMILY_linux +# include "os_linux.inline.hpp" +#endif + +#include +#include + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +int VM_Version::_cpuFeatures; +unsigned long VM_Version::auxv; +const char* VM_Version::_features_str = ""; +VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; +bool VM_Version::_cpu_info_is_initialized = false; + +static BufferBlob* stub_blob; +static const int stub_size = 600; + +extern "C" { + typedef void (*get_cpu_info_stub_t)(void*); +} +static get_cpu_info_stub_t get_cpu_info_stub = NULL; + + +class VM_Version_StubGenerator: public StubCodeGenerator { + public: + + VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} + + address generate_get_cpu_info() { + assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); + StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); +# define __ _masm-> + + address start = __ pc(); + + __ enter(); + __ push(AT); + __ push(T5); + + __ li(AT, (long)0); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); + + __ li(AT, 1); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); + + __ li(AT, 2); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); + + __ li(AT, 3); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset())); + + __ li(AT, 4); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset())); + + __ li(AT, 5); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset())); + + __ li(AT, 6); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset())); + + __ li(AT, 10); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset())); + + __ li(AT, 11); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset())); + + __ li(AT, 12); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset())); + + __ li(AT, 13); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset())); + + __ li(AT, 14); + __ cpucfg(T5, AT); + __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset())); + + __ pop(T5); + __ pop(AT); + __ leave(); + __ jr(RA); +# undef __ + return start; + }; +}; + +uint32_t VM_Version::get_feature_flags_by_cpucfg() { + uint32_t result = 0; + if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) { + result |= CPU_LA32; + } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) { + result |= CPU_LA64; + } + if (_cpuid_info.cpucfg_info_id1.bits.UAL != 0) + result |= CPU_UAL; + + if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0) + result |= CPU_FP; + if (_cpuid_info.cpucfg_info_id2.bits.COMPLEX != 0) + result |= CPU_COMPLEX; + if (_cpuid_info.cpucfg_info_id2.bits.CRYPTO != 0) + result |= CPU_CRYPTO; + if (_cpuid_info.cpucfg_info_id2.bits.LBT_X86 != 0) + result |= CPU_LBT_X86; + if (_cpuid_info.cpucfg_info_id2.bits.LBT_ARM != 0) + result |= CPU_LBT_ARM; + if (_cpuid_info.cpucfg_info_id2.bits.LBT_MIPS != 0) + result |= CPU_LBT_MIPS; + if (_cpuid_info.cpucfg_info_id2.bits.LAM != 0) + result |= CPU_LAM; + + if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0) + result |= CPU_CCDMA; + if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0) + result |= CPU_LLDBAR; + if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0) + result |= CPU_SCDLY; + if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0) + result |= CPU_LLEXC; + + result |= CPU_ULSYNC; + + return result; +} + +void VM_Version::get_processor_features() { + + clean_cpuFeatures(); + + get_cpu_info_stub(&_cpuid_info); + _cpuFeatures = get_feature_flags_by_cpucfg(); + + _supports_cx8 = true; + + if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { + FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650); + } + + auxv = getauxval(AT_HWCAP); + + if (supports_lsx()) { + if (FLAG_IS_DEFAULT(UseLSX)) { + FLAG_SET_DEFAULT(UseLSX, true); + } + } else if (UseLSX) { + warning("LSX instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseLSX, false); + } + + if (supports_lasx()) { + if (FLAG_IS_DEFAULT(UseLASX)) { + FLAG_SET_DEFAULT(UseLASX, true); + } + } else if (UseLASX) { + warning("LASX instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseLASX, false); + } + + if (UseLASX && !UseLSX) { + warning("LASX instructions depends on LSX, setting UseLASX to false"); + FLAG_SET_DEFAULT(UseLASX, false); + } + +#ifdef COMPILER2 + int max_vector_size = 0; + int min_vector_size = 0; + if (UseLASX) { + max_vector_size = 32; + min_vector_size = 16; + } + else if (UseLSX) { + max_vector_size = 16; + min_vector_size = 16; + } + + if (!FLAG_IS_DEFAULT(MaxVectorSize)) { + if (MaxVectorSize == 0) { + // do nothing + } else if (MaxVectorSize > max_vector_size) { + warning("MaxVectorSize must be at most %i on this platform", max_vector_size); + FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); + } else if (MaxVectorSize < min_vector_size) { + warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size); + FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); + } else if (!is_power_of_2(MaxVectorSize)) { + warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); + FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); + } + } else { + // If default, use highest supported configuration + FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); + } +#endif + + if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 1000); + } + } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 2000); + } + } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 3000); + } + } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 4000); + } + } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 10000); + } + } else { + assert(false, "Should Not Reach Here, what is the cpu type?"); + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 10000); + } + } + + char buf[256]; + + // A note on the _features_string format: + // There are jtreg tests checking the _features_string for various properties. + // For some strange reason, these tests require the string to contain + // only _lowercase_ characters. Keep that in mind when being surprised + // about the unusual notation of features - and when adding new ones. + // Features may have one comma at the end. + // Furthermore, use one, and only one, separator space between features. + // Multiple spaces are considered separate tokens, messing up everything. + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, " + "0x%lx, fp_ver: %d, lvz_ver: %d, " + "usesynclevel:%d", + (is_la64() ? "la64" : ""), + (is_la32() ? "la32" : ""), + (supports_lsx() ? ", lsx" : ""), + (supports_lasx() ? ", lasx" : ""), + (supports_crypto() ? ", crypto" : ""), + (supports_lam() ? ", am" : ""), + (supports_ual() ? ", ual" : ""), + (supports_lldbar() ? ", lldbar" : ""), + (supports_scdly() ? ", scdly" : ""), + (supports_llexc() ? ", llexc" : ""), + (supports_lbt_x86() ? ", lbt_x86" : ""), + (supports_lbt_arm() ? ", lbt_arm" : ""), + (supports_lbt_mips() ? ", lbt_mips" : ""), + (needs_llsync() ? ", needs_llsync" : ""), + (needs_tgtsync() ? ", needs_tgtsync": ""), + (needs_ulsync() ? ", needs_ulsync": ""), + _cpuid_info.cpucfg_info_id0.bits.PRID, + _cpuid_info.cpucfg_info_id2.bits.FP_VER, + _cpuid_info.cpucfg_info_id2.bits.LVZ_VER, + UseSyncLevel); + _features_str = strdup(buf); + + assert(!is_la32(), "Should Not Reach Here, what is the cpu type?"); + assert( is_la64(), "Should be LoongArch64"); + + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { + FLAG_SET_DEFAULT(AllocatePrefetchLines, 3); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192); + } + + if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { + FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); + } + + // Basic instructions are used to implement SHA Intrinsics on LA, so sha + // instructions support is not needed. + if (/*supports_crypto()*/ 1) { + if (FLAG_IS_DEFAULT(UseSHA)) { + FLAG_SET_DEFAULT(UseSHA, true); + } + } else if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (UseSHA/* && supports_crypto()*/) { + if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); + } + } else if (UseSHA1Intrinsics) { + warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + } + + if (UseSHA/* && supports_crypto()*/) { + if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); + } + } else if (UseSHA256Intrinsics) { + warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + } + + if (UseSHA512Intrinsics) { + warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA, false); + } + + // Basic instructions are used to implement AES Intrinsics on LA, so AES + // instructions support is not needed. + if (/*supports_crypto()*/ 1) { + if (FLAG_IS_DEFAULT(UseAES)) { + FLAG_SET_DEFAULT(UseAES, true); + } + } else if (UseAES) { + if (!FLAG_IS_DEFAULT(UseAES)) + warning("AES instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + + if (UseAES/* && supports_crypto()*/) { + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + FLAG_SET_DEFAULT(UseAESIntrinsics, true); + } + } else if (UseAESIntrinsics) { + if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) + warning("AES intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + + if (FLAG_IS_DEFAULT(UseCRC32)) { + FLAG_SET_DEFAULT(UseCRC32, true); + } + + if (UseCRC32) { + if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + UseCRC32Intrinsics = true; + } + } + + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + UseMontgomeryMultiplyIntrinsic = true; + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + UseMontgomerySquareIntrinsic = true; + } + + // This machine allows unaligned memory accesses + if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { + FLAG_SET_DEFAULT(UseUnalignedAccesses, true); + } + + if (CriticalJNINatives) { + if (FLAG_IS_CMDLINE(CriticalJNINatives)) { + warning("CriticalJNINatives specified, but not supported in this VM"); + } + FLAG_SET_DEFAULT(CriticalJNINatives, false); + } +} + +void VM_Version::initialize() { + ResourceMark rm; + // Making this stub must be FIRST use of assembler + + stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); + if (stub_blob == NULL) { + vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); + } + CodeBuffer c(stub_blob); + VM_Version_StubGenerator g(&c); + get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, + g.generate_get_cpu_info()); + + get_processor_features(); +} diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp new file mode 100644 index 00000000000..3b5f907a793 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp @@ -0,0 +1,299 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP + +#include "runtime/globals_extension.hpp" +#include "runtime/vm_version.hpp" + +#ifndef HWCAP_LOONGARCH_LSX +#define HWCAP_LOONGARCH_LSX (1 << 4) +#endif + +#ifndef HWCAP_LOONGARCH_LASX +#define HWCAP_LOONGARCH_LASX (1 << 5) +#endif + +class VM_Version: public Abstract_VM_Version { +public: + + union LoongArch_Cpucfg_Id0 { + uint32_t value; + struct { + uint32_t PRID : 32; + } bits; + }; + + union LoongArch_Cpucfg_Id1 { + uint32_t value; + struct { + uint32_t ARCH : 2, + PGMMU : 1, + IOCSR : 1, + PALEN : 8, + VALEN : 8, + UAL : 1, // unaligned access + RI : 1, + EP : 1, + RPLV : 1, + HP : 1, + IOCSR_BRD : 1, + MSG_INT : 1, + : 5; + } bits; + }; + + union LoongArch_Cpucfg_Id2 { + uint32_t value; + struct { + uint32_t FP_CFG : 1, // FP is used, use FP_CFG instead + FP_SP : 1, + FP_DP : 1, + FP_VER : 3, + LSX : 1, + LASX : 1, + COMPLEX : 1, + CRYPTO : 1, + LVZ : 1, + LVZ_VER : 3, + LLFTP : 1, + LLFTP_VER : 3, + LBT_X86 : 1, + LBT_ARM : 1, + LBT_MIPS : 1, + LSPW : 1, + LAM : 1, + : 9; + } bits; + }; + + union LoongArch_Cpucfg_Id3 { + uint32_t value; + struct { + uint32_t CCDMA : 1, + SFB : 1, + UCACC : 1, + LLEXC : 1, + SCDLY : 1, + LLDBAR : 1, + ITLBHMC : 1, + ICHMC : 1, + SPW_LVL : 3, + SPW_HP_HF : 1, + RVA : 1, + RVAMAXM1 : 4, + : 15; + } bits; + }; + + union LoongArch_Cpucfg_Id4 { + uint32_t value; + struct { + uint32_t CC_FREQ : 32; + } bits; + }; + + union LoongArch_Cpucfg_Id5 { + uint32_t value; + struct { + uint32_t CC_MUL : 16, + CC_DIV : 16; + } bits; + }; + + union LoongArch_Cpucfg_Id6 { + uint32_t value; + struct { + uint32_t PMP : 1, + PMVER : 3, + PMNUM : 4, + PMBITS : 6, + UPM : 1, + : 17; + } bits; + }; + + union LoongArch_Cpucfg_Id10 { + uint32_t value; + struct { + uint32_t L1IU_PRESENT : 1, + L1IU_UNIFY : 1, + L1D_PRESENT : 1, + L2IU_PRESENT : 1, + L2IU_UNIFY : 1, + L2IU_PRIVATE : 1, + L2IU_INCLUSIVE : 1, + L2D_PRESENT : 1, + L2D_PRIVATE : 1, + L2D_INCLUSIVE : 1, + L3IU_PRESENT : 1, + L3IU_UNIFY : 1, + L3IU_PRIVATE : 1, + L3IU_INCLUSIVE : 1, + L3D_PRESENT : 1, + L3D_PRIVATE : 1, + L3D_INCLUSIVE : 1, + : 15; + } bits; + }; + + union LoongArch_Cpucfg_Id11 { + uint32_t value; + struct { + uint32_t WAYM1 : 16, + INDEXMLOG2 : 8, + LINESIZELOG2 : 7, + : 1; + } bits; + }; + + union LoongArch_Cpucfg_Id12 { + uint32_t value; + struct { + uint32_t WAYM1 : 16, + INDEXMLOG2 : 8, + LINESIZELOG2 : 7, + : 1; + } bits; + }; + + union LoongArch_Cpucfg_Id13 { + uint32_t value; + struct { + uint32_t WAYM1 : 16, + INDEXMLOG2 : 8, + LINESIZELOG2 : 7, + : 1; + } bits; + }; + + union LoongArch_Cpucfg_Id14 { + uint32_t value; + struct { + uint32_t WAYM1 : 16, + INDEXMLOG2 : 8, + LINESIZELOG2 : 7, + : 1; + } bits; + }; + +protected: + + enum { + CPU_LAM = (1 << 1), + CPU_UAL = (1 << 2), + CPU_LSX = (1 << 4), + CPU_LASX = (1 << 5), + CPU_COMPLEX = (1 << 7), + CPU_CRYPTO = (1 << 8), + CPU_LBT_X86 = (1 << 10), + CPU_LBT_ARM = (1 << 11), + CPU_LBT_MIPS = (1 << 12), + /* flags above must follow Linux HWCAP */ + CPU_LA32 = (1 << 13), + CPU_LA64 = (1 << 14), + CPU_FP = (1 << 15), + CPU_LLEXC = (1 << 16), + CPU_SCDLY = (1 << 17), + CPU_LLDBAR = (1 << 18), + CPU_CCDMA = (1 << 19), + CPU_LLSYNC = (1 << 20), + CPU_TGTSYNC = (1 << 21), + CPU_ULSYNC = (1 << 22), + + //////////////////////add some other feature here////////////////// + } cpuFeatureFlags; + + static int _cpuFeatures; + static unsigned long auxv; + static const char* _features_str; + static bool _cpu_info_is_initialized; + + struct CpuidInfo { + LoongArch_Cpucfg_Id0 cpucfg_info_id0; + LoongArch_Cpucfg_Id1 cpucfg_info_id1; + LoongArch_Cpucfg_Id2 cpucfg_info_id2; + LoongArch_Cpucfg_Id3 cpucfg_info_id3; + LoongArch_Cpucfg_Id4 cpucfg_info_id4; + LoongArch_Cpucfg_Id5 cpucfg_info_id5; + LoongArch_Cpucfg_Id6 cpucfg_info_id6; + LoongArch_Cpucfg_Id10 cpucfg_info_id10; + LoongArch_Cpucfg_Id11 cpucfg_info_id11; + LoongArch_Cpucfg_Id12 cpucfg_info_id12; + LoongArch_Cpucfg_Id13 cpucfg_info_id13; + LoongArch_Cpucfg_Id14 cpucfg_info_id14; + }; + + // The actual cpuid info block + static CpuidInfo _cpuid_info; + + static uint32_t get_feature_flags_by_cpucfg(); + static int get_feature_flags_by_cpuinfo(int features); + static void get_processor_features(); + +public: + // Offsets for cpuid asm stub + static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } + static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } + static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } + static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } + static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } + static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } + static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } + static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); } + static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); } + static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); } + static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); } + static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); } + + static void clean_cpuFeatures() { _cpuFeatures = 0; } + + // Initialization + static void initialize(); + + static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } + + static bool is_la32() { return _cpuFeatures & CPU_LA32; } + static bool is_la64() { return _cpuFeatures & CPU_LA64; } + static bool supports_crypto() { return _cpuFeatures & CPU_CRYPTO; } + static bool supports_lsx() { return auxv & HWCAP_LOONGARCH_LSX; } + static bool supports_lasx() { return auxv & HWCAP_LOONGARCH_LASX; } + static bool supports_lam() { return _cpuFeatures & CPU_LAM; } + static bool supports_llexc() { return _cpuFeatures & CPU_LLEXC; } + static bool supports_scdly() { return _cpuFeatures & CPU_SCDLY; } + static bool supports_lldbar() { return _cpuFeatures & CPU_LLDBAR; } + static bool supports_ual() { return _cpuFeatures & CPU_UAL; } + static bool supports_lbt_x86() { return _cpuFeatures & CPU_LBT_X86; } + static bool supports_lbt_arm() { return _cpuFeatures & CPU_LBT_ARM; } + static bool supports_lbt_mips() { return _cpuFeatures & CPU_LBT_MIPS; } + static bool needs_llsync() { return !supports_lldbar(); } + static bool needs_tgtsync() { return 1; } + static bool needs_ulsync() { return 1; } + + static const char* cpu_features() { return _features_str; } +}; + +#endif // CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp new file mode 100644 index 00000000000..52bccfc1834 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "code/vmreg.hpp" + + + +void VMRegImpl::set_regName() { + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { + regName[i++] = reg->name(); + regName[i++] = reg->name(); + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + regName[i++] = freg->name(); + regName[i++] = freg->name(); + freg = freg->successor(); + } + + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { + regName[i] = "NON-GPR-FPR"; + } +} diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp new file mode 100644 index 00000000000..80a1fc57de5 --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP +#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP + +bool is_Register(); +Register as_Register(); + +bool is_FloatRegister(); +FloatRegister as_FloatRegister(); + +#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp new file mode 100644 index 00000000000..f822d4c355b --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP +#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if( this==noreg ) return VMRegImpl::Bad(); + return VMRegImpl::as_VMReg(encoding() << 1 ); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); +} + +inline bool VMRegImpl::is_Register() { + return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; +} + +inline bool VMRegImpl::is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; +} + +inline Register VMRegImpl::as_Register() { + + assert( is_Register(), "must be"); + return ::as_Register(value() >> 1); +} + +inline FloatRegister VMRegImpl::as_FloatRegister() { + assert( is_FloatRegister(), "must be" ); + assert( is_even(value()), "must be" ); + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); +} + +inline bool VMRegImpl::is_concrete() { + assert(is_reg(), "must be"); + if(is_Register()) return true; + if(is_FloatRegister()) return true; + assert(false, "what register?"); + return false; +} + +#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP diff --git a/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp new file mode 100644 index 00000000000..df0d176b8bd --- /dev/null +++ b/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_loongarch_64.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_loongarch.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + + +// machine-dependent part of VtableStubs: create VtableStub of correct size and +// initialize its code + +#define __ masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, + oop receiver, + int index); +#endif + +// used by compiler only; reciever in T0. +// used registers : +// Rmethod : receiver klass & method +// NOTE: If this code is used by the C1, the receiver_location is always 0. +// when reach here, receiver in T0, klass in T8 +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + const int la_code_length = VtableStub::pd_code_size_limit(true); + VtableStub* s = new(la_code_length) VtableStub(true, vtable_index); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), la_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + Register t1 = T8, t2 = Rmethod; +#ifndef PRODUCT + if (CountCompiledCalls) { + __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); + __ ld_w(t1, AT , 0); + __ addi_w(t1, t1, 1); + __ st_w(t1, AT,0); + } +#endif + + // get receiver (need to skip return address on top of stack) + //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); + + // get receiver klass + address npe_addr = __ pc(); + __ load_klass(t1, T0); + // compute entry offset (in words) + int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size(); +#ifndef PRODUCT + if (DebugVtables) { + Label L; + // check offset vs vtable length + __ ld_w(t2, t1, InstanceKlass::vtable_length_offset()*wordSize); + assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); + __ li(AT, vtable_index*vtableEntry::size()); + __ blt(AT, t2, L); + __ li(A2, vtable_index); + __ move(A1, A0); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); + __ bind(L); + } +#endif // PRODUCT + // load methodOop and target address + const Register method = Rmethod; + int offset = entry_offset*wordSize + vtableEntry::method_offset_in_bytes(); + if (Assembler::is_simm(offset, 12)) { + __ ld_ptr(method, t1, offset); + } else { + __ li(AT, offset); + __ ld_ptr(method, t1, AT); + } + if (DebugVtables) { + Label L; + __ beq(method, R0, L); + __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); + __ bne(AT, R0, L); + __ stop("Vtable entry is NULL"); + __ bind(L); + } + // T8: receiver klass + // T0: receiver + // Rmethod: methodOop + // T4: entry + address ame_addr = __ pc(); + __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset())); + __ jr(T4); + masm->flush(); + s->set_exception_points(npe_addr, ame_addr); + return s; +} + + +// used registers : +// T1 T2 +// when reach here, the receiver in T0, klass in T1 +VtableStub* VtableStubs::create_itable_stub(int itable_index) { + // Note well: pd_code_size_limit is the absolute minimum we can get + // away with. If you add code here, bump the code stub size + // returned by pd_code_size_limit! + const int la_code_length = VtableStub::pd_code_size_limit(false); + VtableStub* s = new(la_code_length) VtableStub(false, itable_index); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), la_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + // we T8,T4 as temparary register, they are free from register allocator + Register t1 = T8, t2 = T2; + // Entry arguments: + // T1: Interface + // T0: Receiver + +#ifndef PRODUCT + if (CountCompiledCalls) { + __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); + __ ld_w(T8, AT, 0); + __ addi_w(T8, T8, 1); + __ st_w(T8, AT, 0); + } +#endif /* PRODUCT */ + const Register holder_klass_reg = T1; // declaring interface klass (DECC) + const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) + const Register icholder_reg = T1; + __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); + __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); + + // get receiver klass (also an implicit null-check) + address npe_addr = __ pc(); + __ load_klass(t1, T0); + { + // x86 use lookup_interface_method, but lookup_interface_method does not work on LoongArch. + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); + assert(Assembler::is_simm16(base), "change this code"); + __ addi_d(t2, t1, base); + assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code"); + __ ld_w(AT, t1, InstanceKlass::vtable_length_offset() * wordSize); + __ alsl_d(t2, AT, t2, Address::times_8 - 1); + if (HeapWordsPerLong > 1) { + __ round_to(t2, BytesPerLong); + } + + Label hit, entry; + assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code"); + __ bind(entry); + +#ifdef ASSERT + // Check that the entry is non-null + if (DebugVtables) { + Label L; + assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); + __ ld_w(AT, t1, itableOffsetEntry::interface_offset_in_bytes()); + __ bne(AT, R0, L); + __ stop("null entry point found in itable's offset table"); + __ bind(L); + } +#endif + assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); + __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); + __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize); + __ bne(AT, resolved_klass_reg, entry); + + } + + // add for compressedoops + __ load_klass(t1, T0); + // compute itable entry offset (in words) + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); + assert(Assembler::is_simm16(base), "change this code"); + __ addi_d(t2, t1, base); + assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code"); + __ ld_w(AT, t1, InstanceKlass::vtable_length_offset() * wordSize); + __ alsl_d(t2, AT, t2, Address::times_8 - 1); + if (HeapWordsPerLong > 1) { + __ round_to(t2, BytesPerLong); + } + + Label hit, entry; + assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code"); + __ bind(entry); + +#ifdef ASSERT + // Check that the entry is non-null + if (DebugVtables) { + Label L; + assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); + __ ld_w(AT, t1, itableOffsetEntry::interface_offset_in_bytes()); + __ bne(AT, R0, L); + __ stop("null entry point found in itable's offset table"); + __ bind(L); + } +#endif + assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); + __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); + __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize); + __ bne(AT, holder_klass_reg, entry); + + // We found a hit, move offset into T4 + __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize); + + // Compute itableMethodEntry. + const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + + itableMethodEntry::method_offset_in_bytes(); + + // Get methodOop and entrypoint for compiler + const Register method = Rmethod; + + __ slli_d(AT, t2, Address::times_1); + __ add_d(AT, AT, t1 ); + if (Assembler::is_simm(method_offset, 12)) { + __ ld_ptr(method, AT, method_offset); + } else { + __ li(t1, method_offset); + __ ld_ptr(method, AT, t1); + } + +#ifdef ASSERT + if (DebugVtables) { + Label L1; + __ beq(method, R0, L1); + __ ld_d(AT, method,in_bytes(Method::from_compiled_offset())); + __ bne(AT, R0, L1); + __ stop("methodOop is null"); + __ bind(L1); + } +#endif // ASSERT + + // Rmethod: methodOop + // T0: receiver + // T4: entry point + address ame_addr = __ pc(); + __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset())); + __ jr(T4); + masm->flush(); + s->set_exception_points(npe_addr, ame_addr); + return s; +} + +// NOTE : whenever you change the code above, dont forget to change the const here +int VtableStub::pd_code_size_limit(bool is_vtable_stub) { + if (is_vtable_stub) { + return ( DebugVtables ? 600 : 28) + (CountCompiledCalls ? 24 : 0)+ + (UseCompressedOops ? 16 : 0); + } else { + return ( DebugVtables ? 636 : 152) + (CountCompiledCalls ? 24 : 0)+ + (UseCompressedOops ? 32 : 0); + } +} + +int VtableStub::pd_code_alignment() { + return wordSize; +} diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.cpp b/hotspot/src/cpu/mips/vm/assembler_mips.cpp new file mode 100644 index 00000000000..6c720972ad6 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/assembler_mips.cpp @@ -0,0 +1,774 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#ifndef PRODUCT +#include "compiler/disassembler.hpp" +#endif +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Implementation of AddressLiteral + +AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { + _is_lval = false; + _target = target; + _rspec = rspec_from_rtype(rtype, target); +} + +// Implementation of Address + +Address Address::make_array(ArrayAddress adr) { + AddressLiteral base = adr.base(); + Address index = adr.index(); + assert(index._disp == 0, "must not have disp"); // maybe it can? + Address array(index._base, index._index, index._scale, (intptr_t) base.target()); + array._rspec = base._rspec; + return array; +} + +// exceedingly dangerous constructor +Address::Address(address loc, RelocationHolder spec) { + _base = noreg; + _index = noreg; + _scale = no_scale; + _disp = (intptr_t) loc; + _rspec = spec; +} + + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +// Implementation of Assembler +const char *Assembler::ops_name[] = { + "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz", + "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui", + "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl", + "daddi", "daddiu", "ldl", "ldr", "", "", "", "", + "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu", + "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache", + "ll", "lwc1", "", "", "lld", "ldc1", "", "ld", + "sc", "swc1", "", "", "scd", "sdc1", "", "sd" +}; + +const char* Assembler::special_name[] = { + "sll", "", "srl", "sra", "sllv", "", "srlv", "srav", + "jr", "jalr", "movz", "movn", "syscall", "break", "", "sync", + "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra", + "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu", + "add", "addu", "sub", "subu", "and", "or", "xor", "nor", + "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu", + "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "", + "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32" +}; + +const char* Assembler::cop1_name[] = { + "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg", + "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "c.f", "c.un", "c.eq", "c.ueq", "c.olt", "c.ult", "c.ole", "c.ule", + "c.sf", "c.ngle", "c.seq", "c.ngl", "c.lt", "c.nge", "c.le", "c.ngt" +}; + +const char* Assembler::cop1x_name[] = { + "lwxc1", "ldxc1", "", "", "", "luxc1", "", "", + "swxc1", "sdxc1", "", "", "", "suxc1", "", "prefx", + "", "", "", "", "", "", "alnv.ps", "", + "", "", "", "", "", "", "", "", + "madd.s", "madd.d", "", "", "", "", "madd.ps", "", + "msub.s", "msub.d", "", "", "", "", "msub.ps", "", + "nmadd.s", "nmadd.d", "", "", "", "", "nmadd.ps", "", + "nmsub.s", "nmsub.d", "", "", "", "", "nmsub.ps", "" +}; + +const char* Assembler::special2_name[] = { + "madd", "", "mul", "", "msub", "", "", "", + "", "", "", "", "", "", "", "", + "", "gsdmult", "", "", "gsdiv", "gsddiv", "", "", + "", "", "", "", "gsmod", "gsdmod", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "" +}; + +const char* Assembler::special3_name[] = { + "ext", "", "", "", "ins", "dinsm", "dinsu", "dins", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "bshfl", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", +}; + +const char* Assembler::regimm_name[] = { + "bltz", "bgez", "bltzl", "bgezl", "", "", "", "", + "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "", + "bltzal", "bgezal", "bltzall", "bgezall" +}; + +const char* Assembler::gs_ldc2_name[] = { + "gslbx", "gslhx", "gslwx", "gsldx", "", "", "gslwxc1", "gsldxc1" +}; + + +const char* Assembler::gs_lwc2_name[] = { + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "gslble", "gslbgt", "gslhle", "gslhgt", "gslwle", "gslwgt", "gsldle", "gsldgt", + "", "", "", "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/ + "gslq", "" +}; + +const char* Assembler::gs_sdc2_name[] = { + "gssbx", "gsshx", "gsswx", "gssdx", "", "", "gsswxc1", "gssdxc1" +}; + +const char* Assembler::gs_swc2_name[] = { + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "gssble", "gssbgt", "gsshle", "gsshgt", "gsswle", "gsswgt", "gssdle", "gssdgt", + "", "", "", "", "gsswlec1", "gsswgtc1", "gssdlec1", "gssdgtc1", + "gssq", "" +}; + +//misleading name, print only branch/jump instruction +void Assembler::print_instruction(int inst) { + const char *s; + switch( opcode(inst) ) { + default: + s = ops_name[opcode(inst)]; + break; + case special_op: + s = special_name[special(inst)]; + break; + case regimm_op: + s = special_name[rt(inst)]; + break; + } + + ::tty->print("%s", s); +} + +int Assembler::is_int_mask(int x) { + int xx = x; + int count = 0; + + while (x != 0) { + x &= (x - 1); + count++; + } + + if ((1<>2; + switch(opcode(inst)) { + case j_op: + case jal_op: + case lui_op: + case ori_op: + case daddiu_op: + ShouldNotReachHere(); + break; + default: + assert(is_simm16(v), "must be simm16"); +#ifndef PRODUCT + if(!is_simm16(v)) + { + tty->print_cr("must be simm16"); + tty->print_cr("Inst: %x", inst); + } +#endif + + v = low16(v); + inst &= 0xffff0000; + break; + } + + return inst | v; +} + +int Assembler::branch_destination(int inst, int pos) { + int off; + + switch(opcode(inst)) { + case j_op: + case jal_op: + assert(false, "should not use j/jal here"); + break; + default: + off = expand(low16(inst), 15); + break; + } + + return off ? pos + 4 + (off<<2) : 0; +} + +int AbstractAssembler::code_fill_byte() { + return 0x00; // illegal instruction 0x00000000 +} + +// Now the Assembler instruction (identical for 32/64 bits) + +void Assembler::lb(Register rt, Address src) { + assert(src.index() == NOREG, "index is unimplemented"); + lb(rt, src.base(), src.disp()); +} + +void Assembler::lbu(Register rt, Address src) { + assert(src.index() == NOREG, "index is unimplemented"); + lbu(rt, src.base(), src.disp()); +} + +void Assembler::ld(Register rt, Address dst){ + Register src = rt; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if (Assembler::is_simm16(disp)) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + gsldx(src, base, index, disp); + } else { + dsll(AT, index, scale); + gsldx(src, base, AT, disp); + } + } else { + if (scale == 0) { + daddu(AT, base, index); + } else { + dsll(AT, index, scale); + daddu(AT, base, AT); + } + ld(src, AT, disp); + } + } else { + if (scale == 0) { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, AT, base); + if (UseLEXT1) { + gsldx(src, AT, index, 0); + } else { + daddu(AT, AT, index); + ld(src, AT, 0); + } + } else { + assert_different_registers(src, AT); + dsll(AT, index, scale); + daddu(AT, base, AT); + lui(src, split_low(disp >> 16)); + if (split_low(disp)) ori(src, src, split_low(disp)); + if (UseLEXT1) { + gsldx(src, AT, src, 0); + } else { + daddu(AT, AT, src); + ld(src, AT, 0); + } + } + } + } else { + if (Assembler::is_simm16(disp)) { + ld(src, base, disp); + } else { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + + if (UseLEXT1) { + gsldx(src, base, AT, 0); + } else { + daddu(AT, base, AT); + ld(src, AT, 0); + } + } + } +} + +void Assembler::ldl(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + ldl(rt, src.base(), src.disp()); +} + +void Assembler::ldr(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + ldr(rt, src.base(), src.disp()); +} + +void Assembler::lh(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lh(rt, src.base(), src.disp()); +} + +void Assembler::lhu(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lhu(rt, src.base(), src.disp()); +} + +void Assembler::ll(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + ll(rt, src.base(), src.disp()); +} + +void Assembler::lld(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lld(rt, src.base(), src.disp()); +} + +void Assembler::lw(Register rt, Address dst){ + Register src = rt; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if (Assembler::is_simm16(disp)) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + gslwx(src, base, index, disp); + } else { + dsll(AT, index, scale); + gslwx(src, base, AT, disp); + } + } else { + if (scale == 0) { + daddu(AT, base, index); + } else { + dsll(AT, index, scale); + daddu(AT, base, AT); + } + lw(src, AT, disp); + } + } else { + if (scale == 0) { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, AT, base); + if (UseLEXT1) { + gslwx(src, AT, index, 0); + } else { + daddu(AT, AT, index); + lw(src, AT, 0); + } + } else { + assert_different_registers(src, AT); + dsll(AT, index, scale); + daddu(AT, base, AT); + lui(src, split_low(disp >> 16)); + if (split_low(disp)) ori(src, src, split_low(disp)); + if (UseLEXT1) { + gslwx(src, AT, src, 0); + } else { + daddu(AT, AT, src); + lw(src, AT, 0); + } + } + } + } else { + if (Assembler::is_simm16(disp)) { + lw(src, base, disp); + } else { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + + if (UseLEXT1) { + gslwx(src, base, AT, 0); + } else { + daddu(AT, base, AT); + lw(src, AT, 0); + } + } + } +} + +void Assembler::lea(Register rt, Address src) { + Register dst = rt; + Register base = src.base(); + Register index = src.index(); + + int scale = src.scale(); + int disp = src.disp(); + + if (index == noreg) { + if (is_simm16(disp)) { + daddiu(dst, base, disp); + } else { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(dst, base, AT); + } + } else { + if (scale == 0) { + if (is_simm16(disp)) { + daddu(AT, base, index); + daddiu(dst, AT, disp); + } else { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, base, AT); + daddu(dst, AT, index); + } + } else { + if (is_simm16(disp)) { + dsll(AT, index, scale); + daddu(AT, AT, base); + daddiu(dst, AT, disp); + } else { + assert_different_registers(dst, AT); + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, AT, base); + dsll(dst, index, scale); + daddu(dst, dst, AT); + } + } + } +} + +void Assembler::lwl(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lwl(rt, src.base(), src.disp()); +} + +void Assembler::lwr(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lwr(rt, src.base(), src.disp()); +} + +void Assembler::lwu(Register rt, Address src){ + assert(src.index() == NOREG, "index is unimplemented"); + lwu(rt, src.base(), src.disp()); +} + +void Assembler::sb(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sb(rt, dst.base(), dst.disp()); +} + +void Assembler::sc(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sc(rt, dst.base(), dst.disp()); +} + +void Assembler::scd(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + scd(rt, dst.base(), dst.disp()); +} + +void Assembler::sd(Register rt, Address dst) { + Register src = rt; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if (is_simm16(disp)) { + if ( UseLEXT1 && is_simm(disp, 8)) { + if (scale == 0) { + gssdx(src, base, index, disp); + } else { + assert_different_registers(rt, AT); + dsll(AT, index, scale); + gssdx(src, base, AT, disp); + } + } else { + assert_different_registers(rt, AT); + if (scale == 0) { + daddu(AT, base, index); + } else { + dsll(AT, index, scale); + daddu(AT, base, AT); + } + sd(src, AT, disp); + } + } else { + assert_different_registers(rt, AT); + if (scale == 0) { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, AT, base); + if (UseLEXT1) { + gssdx(src, AT, index, 0); + } else { + daddu(AT, AT, index); + sd(src, AT, 0); + } + } else { + daddiu(SP, SP, -wordSize); + sd(T9, SP, 0); + + dsll(AT, index, scale); + daddu(AT, base, AT); + lui(T9, split_low(disp >> 16)); + if (split_low(disp)) ori(T9, T9, split_low(disp)); + daddu(AT, AT, T9); + ld(T9, SP, 0); + daddiu(SP, SP, wordSize); + sd(src, AT, 0); + } + } + } else { + if (is_simm16(disp)) { + sd(src, base, disp); + } else { + assert_different_registers(rt, AT); + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + + if (UseLEXT1) { + gssdx(src, base, AT, 0); + } else { + daddu(AT, base, AT); + sd(src, AT, 0); + } + } + } +} + +void Assembler::sdl(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sdl(rt, dst.base(), dst.disp()); +} + +void Assembler::sdr(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sdr(rt, dst.base(), dst.disp()); +} + +void Assembler::sh(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sh(rt, dst.base(), dst.disp()); +} + +void Assembler::sw(Register rt, Address dst) { + Register src = rt; + Register base = dst.base(); + Register index = dst.index(); + + int scale = dst.scale(); + int disp = dst.disp(); + + if (index != noreg) { + if ( Assembler::is_simm16(disp) ) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + gsswx(src, base, index, disp); + } else { + assert_different_registers(rt, AT); + dsll(AT, index, scale); + gsswx(src, base, AT, disp); + } + } else { + assert_different_registers(rt, AT); + if (scale == 0) { + daddu(AT, base, index); + } else { + dsll(AT, index, scale); + daddu(AT, base, AT); + } + sw(src, AT, disp); + } + } else { + assert_different_registers(rt, AT); + if (scale == 0) { + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + daddu(AT, AT, base); + if (UseLEXT1) { + gsswx(src, AT, index, 0); + } else { + daddu(AT, AT, index); + sw(src, AT, 0); + } + } else { + daddiu(SP, SP, -wordSize); + sd(T9, SP, 0); + + dsll(AT, index, scale); + daddu(AT, base, AT); + lui(T9, split_low(disp >> 16)); + if (split_low(disp)) ori(T9, T9, split_low(disp)); + daddu(AT, AT, T9); + ld(T9, SP, 0); + daddiu(SP, SP, wordSize); + sw(src, AT, 0); + } + } + } else { + if (Assembler::is_simm16(disp)) { + sw(src, base, disp); + } else { + assert_different_registers(rt, AT); + lui(AT, split_low(disp >> 16)); + if (split_low(disp)) ori(AT, AT, split_low(disp)); + + if (UseLEXT1) { + gsswx(src, base, AT, 0); + } else { + daddu(AT, base, AT); + sw(src, AT, 0); + } + } + } +} + +void Assembler::swl(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + swl(rt, dst.base(), dst.disp()); +} + +void Assembler::swr(Register rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + swr(rt, dst.base(), dst.disp()); +} + +void Assembler::lwc1(FloatRegister rt, Address src) { + assert(src.index() == NOREG, "index is unimplemented"); + lwc1(rt, src.base(), src.disp()); +} + +void Assembler::ldc1(FloatRegister rt, Address src) { + assert(src.index() == NOREG, "index is unimplemented"); + ldc1(rt, src.base(), src.disp()); +} + +void Assembler::swc1(FloatRegister rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + swc1(rt, dst.base(), dst.disp()); +} + +void Assembler::sdc1(FloatRegister rt, Address dst) { + assert(dst.index() == NOREG, "index is unimplemented"); + sdc1(rt, dst.base(), dst.disp()); +} + +void Assembler::j(address entry) { + int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; + emit_long((j_op<<26) | dest); + has_delay_slot(); +} + +void Assembler::jal(address entry) { + int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; + emit_long((jal_op<<26) | dest); + has_delay_slot(); +} + +void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long + check_delay(); + AbstractAssembler::emit_int32(x); +} + +inline void Assembler::emit_data(int x) { emit_long(x); } +inline void Assembler::emit_data(int x, relocInfo::relocType rtype) { + relocate(rtype); + emit_long(x); +} + +inline void Assembler::emit_data(int x, RelocationHolder const& rspec) { + relocate(rspec); + emit_long(x); +} + +inline void Assembler::check_delay() { +#ifdef CHECK_DELAY + guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot"); + delay_state = no_delay; +#endif +} diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.hpp b/hotspot/src/cpu/mips/vm/assembler_mips.hpp new file mode 100644 index 00000000000..e91b9db222b --- /dev/null +++ b/hotspot/src/cpu/mips/vm/assembler_mips.hpp @@ -0,0 +1,1789 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP +#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP + +#include "asm/register.hpp" + +class BiasedLockingCounters; + + +// Note: A register location is represented via a Register, not +// via an address for efficiency & simplicity reasons. + +class ArrayAddress; + +class Address VALUE_OBJ_CLASS_SPEC { + public: + enum ScaleFactor { + no_scale = -1, + times_1 = 0, + times_2 = 1, + times_4 = 2, + times_8 = 3, + times_ptr = times_8 + }; + static ScaleFactor times(int size) { + assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); + if (size == 8) return times_8; + if (size == 4) return times_4; + if (size == 2) return times_2; + return times_1; + } + + private: + Register _base; + Register _index; + ScaleFactor _scale; + int _disp; + RelocationHolder _rspec; + + // Easily misused constructors make them private + Address(address loc, RelocationHolder spec); + Address(int disp, address loc, relocInfo::relocType rtype); + Address(int disp, address loc, RelocationHolder spec); + + public: + + // creation + Address() + : _base(noreg), + _index(noreg), + _scale(no_scale), + _disp(0) { + } + + // No default displacement otherwise Register can be implicitly + // converted to 0(Register) which is quite a different animal. + + Address(Register base, int disp = 0) + : _base(base), + _index(noreg), + _scale(no_scale), + _disp(disp) { + assert_different_registers(_base, AT); + } + + Address(Register base, Register index, ScaleFactor scale, int disp = 0) + : _base (base), + _index(index), + _scale(scale), + _disp (disp) { + assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); + assert_different_registers(_base, _index, AT); + } + + // The following two overloads are used in connection with the + // ByteSize type (see sizes.hpp). They simplify the use of + // ByteSize'd arguments in assembly code. Note that their equivalent + // for the optimized build are the member functions with int disp + // argument since ByteSize is mapped to an int type in that case. + // + // Note: DO NOT introduce similar overloaded functions for WordSize + // arguments as in the optimized mode, both ByteSize and WordSize + // are mapped to the same type and thus the compiler cannot make a + // distinction anymore (=> compiler errors). + +#ifdef ASSERT + Address(Register base, ByteSize disp) + : _base(base), + _index(noreg), + _scale(no_scale), + _disp(in_bytes(disp)) { + assert_different_registers(_base, AT); + } + + Address(Register base, Register index, ScaleFactor scale, ByteSize disp) + : _base(base), + _index(index), + _scale(scale), + _disp(in_bytes(disp)) { + assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); + assert_different_registers(_base, _index, AT); + } +#endif // ASSERT + + // accessors + bool uses(Register reg) const { return _base == reg || _index == reg; } + Register base() const { return _base; } + Register index() const { return _index; } + ScaleFactor scale() const { return _scale; } + int disp() const { return _disp; } + + static Address make_array(ArrayAddress); + + friend class Assembler; + friend class MacroAssembler; + friend class LIR_Assembler; // base/index/scale/disp +}; + +// Calling convention +class Argument VALUE_OBJ_CLASS_SPEC { + private: + int _number; + public: + enum { + n_register_parameters = 8, // 8 integer registers used to pass parameters + n_float_register_parameters = 8 // 8 float registers used to pass parameters + }; + + Argument(int number):_number(number){ } + Argument successor() {return Argument(number() + 1);} + + int number()const {return _number;} + bool is_Register()const {return _number < n_register_parameters;} + bool is_FloatRegister()const {return _number < n_float_register_parameters;} + + Register as_Register()const { + assert(is_Register(), "must be a register argument"); + return ::as_Register(RA0->encoding() + _number); + } + FloatRegister as_FloatRegister()const { + assert(is_FloatRegister(), "must be a float register argument"); + return ::as_FloatRegister(F12->encoding() + _number); + } + + Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);} +}; + +// +// AddressLiteral has been split out from Address because operands of this type +// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out +// the few instructions that need to deal with address literals are unique and the +// MacroAssembler does not have to implement every instruction in the Assembler +// in order to search for address literals that may need special handling depending +// on the instruction and the platform. As small step on the way to merging i486/amd64 +// directories. +// +class AddressLiteral VALUE_OBJ_CLASS_SPEC { + friend class ArrayAddress; + RelocationHolder _rspec; + // Typically we use AddressLiterals we want to use their rval + // However in some situations we want the lval (effect address) of the item. + // We provide a special factory for making those lvals. + bool _is_lval; + + // If the target is far we'll need to load the ea of this to + // a register to reach it. Otherwise if near we can do rip + // relative addressing. + + address _target; + + protected: + // creation + AddressLiteral() + : _is_lval(false), + _target(NULL) + {} + + public: + + + AddressLiteral(address target, relocInfo::relocType rtype); + + AddressLiteral(address target, RelocationHolder const& rspec) + : _rspec(rspec), + _is_lval(false), + _target(target) + {} + // 32-bit complains about a multiple declaration for int*. + AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none) + : _target((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral addr() { + AddressLiteral ret = *this; + ret._is_lval = true; + return ret; + } + + + private: + + address target() { return _target; } + bool is_lval() { return _is_lval; } + + relocInfo::relocType reloc() const { return _rspec.type(); } + const RelocationHolder& rspec() const { return _rspec; } + + friend class Assembler; + friend class MacroAssembler; + friend class Address; + friend class LIR_Assembler; + RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { + switch (rtype) { + case relocInfo::external_word_type: + return external_word_Relocation::spec(addr); + case relocInfo::internal_word_type: + return internal_word_Relocation::spec(addr); + case relocInfo::opt_virtual_call_type: + return opt_virtual_call_Relocation::spec(); + case relocInfo::static_call_type: + return static_call_Relocation::spec(); + case relocInfo::runtime_call_type: + return runtime_call_Relocation::spec(); + case relocInfo::poll_type: + case relocInfo::poll_return_type: + return Relocation::spec_simple(rtype); + case relocInfo::none: + case relocInfo::oop_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + return RelocationHolder(); + default: + ShouldNotReachHere(); + return RelocationHolder(); + } + } + +}; + +// Convience classes +class RuntimeAddress: public AddressLiteral { + + public: + + RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} + +}; + +class OopAddress: public AddressLiteral { + + public: + + OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} + +}; + +class ExternalAddress: public AddressLiteral { + + public: + + ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} + +}; + +class InternalAddress: public AddressLiteral { + + public: + + InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} + +}; + +// x86 can do array addressing as a single operation since disp can be an absolute +// address amd64 can't. We create a class that expresses the concept but does extra +// magic on amd64 to get the final result + +class ArrayAddress VALUE_OBJ_CLASS_SPEC { + private: + + AddressLiteral _base; + Address _index; + + public: + + ArrayAddress() {}; + ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; + AddressLiteral base() { return _base; } + Address index() { return _index; } + +}; + +const int FPUStateSizeInWords = 512 / wordSize; + +// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction +// level ; i.e., what you write is what you get. The Assembler is generating code into +// a CodeBuffer. + +class Assembler : public AbstractAssembler { + friend class AbstractAssembler; // for the non-virtual hack + friend class LIR_Assembler; // as_Address() + friend class StubGenerator; + + public: + enum Condition { + zero , + notZero , + equal , + notEqual , + less , + lessEqual , + greater , + greaterEqual , + below , + belowEqual , + above , + aboveEqual + }; + + static const int LogInstructionSize = 2; + static const int InstructionSize = 1 << LogInstructionSize; + + // opcode, highest 6 bits: bits[31...26] + enum ops { + special_op = 0x00, // special_ops + regimm_op = 0x01, // regimm_ops + j_op = 0x02, + jal_op = 0x03, + beq_op = 0x04, + bne_op = 0x05, + blez_op = 0x06, + bgtz_op = 0x07, + addiu_op = 0x09, + slti_op = 0x0a, + sltiu_op = 0x0b, + andi_op = 0x0c, + ori_op = 0x0d, + xori_op = 0x0e, + lui_op = 0x0f, + cop0_op = 0x10, // cop0_ops + cop1_op = 0x11, // cop1_ops + gs_cop2_op = 0x12, // gs_cop2_ops + cop1x_op = 0x13, // cop1x_ops + beql_op = 0x14, + bnel_op = 0x15, + blezl_op = 0x16, + bgtzl_op = 0x17, + daddiu_op = 0x19, + ldl_op = 0x1a, + ldr_op = 0x1b, + special2_op = 0x1c, // special2_ops + msa_op = 0x1e, // msa_ops + special3_op = 0x1f, // special3_ops + lb_op = 0x20, + lh_op = 0x21, + lwl_op = 0x22, + lw_op = 0x23, + lbu_op = 0x24, + lhu_op = 0x25, + lwr_op = 0x26, + lwu_op = 0x27, + sb_op = 0x28, + sh_op = 0x29, + swl_op = 0x2a, + sw_op = 0x2b, + sdl_op = 0x2c, + sdr_op = 0x2d, + swr_op = 0x2e, + cache_op = 0x2f, + ll_op = 0x30, + lwc1_op = 0x31, + gs_lwc2_op = 0x32, //gs_lwc2_ops + pref_op = 0x33, + lld_op = 0x34, + ldc1_op = 0x35, + gs_ldc2_op = 0x36, //gs_ldc2_ops + ld_op = 0x37, + sc_op = 0x38, + swc1_op = 0x39, + gs_swc2_op = 0x3a, //gs_swc2_ops + scd_op = 0x3c, + sdc1_op = 0x3d, + gs_sdc2_op = 0x3e, //gs_sdc2_ops + sd_op = 0x3f + }; + + static const char *ops_name[]; + + //special family, the opcode is in low 6 bits. + enum special_ops { + sll_op = 0x00, + movci_op = 0x01, + srl_op = 0x02, + sra_op = 0x03, + sllv_op = 0x04, + srlv_op = 0x06, + srav_op = 0x07, + jr_op = 0x08, + jalr_op = 0x09, + movz_op = 0x0a, + movn_op = 0x0b, + syscall_op = 0x0c, + break_op = 0x0d, + sync_op = 0x0f, + mfhi_op = 0x10, + mthi_op = 0x11, + mflo_op = 0x12, + mtlo_op = 0x13, + dsllv_op = 0x14, + dsrlv_op = 0x16, + dsrav_op = 0x17, + mult_op = 0x18, + multu_op = 0x19, + div_op = 0x1a, + divu_op = 0x1b, + dmult_op = 0x1c, + dmultu_op = 0x1d, + ddiv_op = 0x1e, + ddivu_op = 0x1f, + addu_op = 0x21, + subu_op = 0x23, + and_op = 0x24, + or_op = 0x25, + xor_op = 0x26, + nor_op = 0x27, + slt_op = 0x2a, + sltu_op = 0x2b, + daddu_op = 0x2d, + dsubu_op = 0x2f, + tge_op = 0x30, + tgeu_op = 0x31, + tlt_op = 0x32, + tltu_op = 0x33, + teq_op = 0x34, + tne_op = 0x36, + dsll_op = 0x38, + dsrl_op = 0x3a, + dsra_op = 0x3b, + dsll32_op = 0x3c, + dsrl32_op = 0x3e, + dsra32_op = 0x3f + }; + + static const char* special_name[]; + + //regimm family, the opcode is in rt[16...20], 5 bits + enum regimm_ops { + bltz_op = 0x00, + bgez_op = 0x01, + bltzl_op = 0x02, + bgezl_op = 0x03, + tgei_op = 0x08, + tgeiu_op = 0x09, + tlti_op = 0x0a, + tltiu_op = 0x0b, + teqi_op = 0x0c, + tnei_op = 0x0e, + bltzal_op = 0x10, + bgezal_op = 0x11, + bltzall_op = 0x12, + bgezall_op = 0x13, + bposge32_op = 0x1c, + bposge64_op = 0x1d, + synci_op = 0x1f, + }; + + static const char* regimm_name[]; + + //cop0 family, the ops is in bits[25...21], 5 bits + enum cop0_ops { + mfc0_op = 0x00, + dmfc0_op = 0x01, + // + mxgc0_op = 0x03, //MFGC0, DMFGC0, MTGC0 + mtc0_op = 0x04, + dmtc0_op = 0x05, + rdpgpr_op = 0x0a, + inter_op = 0x0b, + wrpgpr_op = 0x0c + }; + + //cop1 family, the ops is in bits[25...21], 5 bits + enum cop1_ops { + mfc1_op = 0x00, + dmfc1_op = 0x01, + cfc1_op = 0x02, + mfhc1_op = 0x03, + mtc1_op = 0x04, + dmtc1_op = 0x05, + ctc1_op = 0x06, + mthc1_op = 0x07, + bc1f_op = 0x08, + single_fmt = 0x10, + double_fmt = 0x11, + word_fmt = 0x14, + long_fmt = 0x15, + ps_fmt = 0x16 + }; + + + //2 bist (bits[17...16]) of bc1x instructions (cop1) + enum bc_ops { + bcf_op = 0x0, + bct_op = 0x1, + bcfl_op = 0x2, + bctl_op = 0x3, + }; + + // low 6 bits of c_x_fmt instructions (cop1) + enum c_conds { + f_cond = 0x30, + un_cond = 0x31, + eq_cond = 0x32, + ueq_cond = 0x33, + olt_cond = 0x34, + ult_cond = 0x35, + ole_cond = 0x36, + ule_cond = 0x37, + sf_cond = 0x38, + ngle_cond = 0x39, + seq_cond = 0x3a, + ngl_cond = 0x3b, + lt_cond = 0x3c, + nge_cond = 0x3d, + le_cond = 0x3e, + ngt_cond = 0x3f + }; + + // low 6 bits of cop1 instructions + enum float_ops { + fadd_op = 0x00, + fsub_op = 0x01, + fmul_op = 0x02, + fdiv_op = 0x03, + fsqrt_op = 0x04, + fabs_op = 0x05, + fmov_op = 0x06, + fneg_op = 0x07, + froundl_op = 0x08, + ftruncl_op = 0x09, + fceill_op = 0x0a, + ffloorl_op = 0x0b, + froundw_op = 0x0c, + ftruncw_op = 0x0d, + fceilw_op = 0x0e, + ffloorw_op = 0x0f, + movf_f_op = 0x11, + movt_f_op = 0x11, + movz_f_op = 0x12, + movn_f_op = 0x13, + frecip_op = 0x15, + frsqrt_op = 0x16, + fcvts_op = 0x20, + fcvtd_op = 0x21, + fcvtw_op = 0x24, + fcvtl_op = 0x25, + fcvtps_op = 0x26, + fcvtspl_op = 0x28, + fpll_op = 0x2c, + fplu_op = 0x2d, + fpul_op = 0x2e, + fpuu_op = 0x2f + }; + + static const char* cop1_name[]; + + //cop1x family, the opcode is in low 6 bits. + enum cop1x_ops { + lwxc1_op = 0x00, + ldxc1_op = 0x01, + luxc1_op = 0x05, + swxc1_op = 0x08, + sdxc1_op = 0x09, + suxc1_op = 0x0d, + prefx_op = 0x0f, + + alnv_ps_op = 0x1e, + madd_s_op = 0x20, + madd_d_op = 0x21, + madd_ps_op = 0x26, + msub_s_op = 0x28, + msub_d_op = 0x29, + msub_ps_op = 0x2e, + nmadd_s_op = 0x30, + nmadd_d_op = 0x31, + nmadd_ps_op = 0x36, + nmsub_s_op = 0x38, + nmsub_d_op = 0x39, + nmsub_ps_op = 0x3e + }; + + static const char* cop1x_name[]; + + //special2 family, the opcode is in low 6 bits. + enum special2_ops { + madd_op = 0x00, + maddu_op = 0x01, + mul_op = 0x02, + gs0x03_op = 0x03, + msub_op = 0x04, + msubu_op = 0x05, + gs0x06_op = 0x06, + gsemul2_op = 0x07, + gsemul3_op = 0x08, + gsemul4_op = 0x09, + gsemul5_op = 0x0a, + gsemul6_op = 0x0b, + gsemul7_op = 0x0c, + gsemul8_op = 0x0d, + gsemul9_op = 0x0e, + gsemul10_op = 0x0f, + gsmult_op = 0x10, + gsdmult_op = 0x11, + gsmultu_op = 0x12, + gsdmultu_op = 0x13, + gsdiv_op = 0x14, + gsddiv_op = 0x15, + gsdivu_op = 0x16, + gsddivu_op = 0x17, + gsmod_op = 0x1c, + gsdmod_op = 0x1d, + gsmodu_op = 0x1e, + gsdmodu_op = 0x1f, + clz_op = 0x20, + clo_op = 0x21, + xctx_op = 0x22, //ctz, cto, dctz, dcto, gsX + gsrxr_x_op = 0x23, //gsX + dclz_op = 0x24, + dclo_op = 0x25, + gsle_op = 0x26, + gsgt_op = 0x27, + gs86j_op = 0x28, + gsloop_op = 0x29, + gsaj_op = 0x2a, + gsldpc_op = 0x2b, + gs86set_op = 0x30, + gstm_op = 0x31, + gscvt_ld_op = 0x32, + gscvt_ud_op = 0x33, + gseflag_op = 0x34, + gscam_op = 0x35, + gstop_op = 0x36, + gssettag_op = 0x37, + gssdbbp_op = 0x38 + }; + + static const char* special2_name[]; + + // special3 family, the opcode is in low 6 bits. + enum special3_ops { + ext_op = 0x00, + dextm_op = 0x01, + dextu_op = 0x02, + dext_op = 0x03, + ins_op = 0x04, + dinsm_op = 0x05, + dinsu_op = 0x06, + dins_op = 0x07, + lxx_op = 0x0a, //lwx, lhx, lbux, ldx + insv_op = 0x0c, + dinsv_op = 0x0d, + ar1_op = 0x10, //MIPS DSP + cmp1_op = 0x11, //MIPS DSP + re1_op = 0x12, //MIPS DSP, re1_ops + sh1_op = 0x13, //MIPS DSP + ar2_op = 0x14, //MIPS DSP + cmp2_op = 0x15, //MIPS DSP + re2_op = 0x16, //MIPS DSP, re2_ops + sh2_op = 0x17, //MIPS DSP + ar3_op = 0x18, //MIPS DSP + bshfl_op = 0x20 //seb, seh + }; + + // re1_ops + enum re1_ops { + absq_s_qb_op = 0x01, + repl_qb_op = 0x02, + replv_qb_op = 0x03, + absq_s_ph_op = 0x09, + repl_ph_op = 0x0a, + replv_ph_op = 0x0b, + absq_s_w_op = 0x11, + bitrev_op = 0x1b + }; + + // re2_ops + enum re2_ops { + repl_ob_op = 0x02, + replv_ob_op = 0x03, + absq_s_qh_op = 0x09, + repl_qh_op = 0x0a, + replv_qh_op = 0x0b, + absq_s_pw_op = 0x11, + repl_pw_op = 0x12, + replv_pw_op = 0x13, + }; + + static const char* special3_name[]; + + // lwc2/gs_lwc2 family, the opcode is in low 6 bits. + enum gs_lwc2_ops { + gslble_op = 0x10, + gslbgt_op = 0x11, + gslhle_op = 0x12, + gslhgt_op = 0x13, + gslwle_op = 0x14, + gslwgt_op = 0x15, + gsldle_op = 0x16, + gsldgt_op = 0x17, + gslwlec1_op = 0x1c, + gslwgtc1_op = 0x1d, + gsldlec1_op = 0x1e, + gsldgtc1_op = 0x1f, + gslq_op = 0x20 + }; + + static const char* gs_lwc2_name[]; + + // ldc2/gs_ldc2 family, the opcode is in low 3 bits. + enum gs_ldc2_ops { + gslbx_op = 0x0, + gslhx_op = 0x1, + gslwx_op = 0x2, + gsldx_op = 0x3, + gslwxc1_op = 0x6, + gsldxc1_op = 0x7 + }; + + static const char* gs_ldc2_name[]; + + // swc2/gs_swc2 family, the opcode is in low 6 bits. + enum gs_swc2_ops { + gssble_op = 0x10, + gssbgt_op = 0x11, + gsshle_op = 0x12, + gsshgt_op = 0x13, + gsswle_op = 0x14, + gsswgt_op = 0x15, + gssdle_op = 0x16, + gssdgt_op = 0x17, + gsswlec1_op = 0x1c, + gsswgtc1_op = 0x1d, + gssdlec1_op = 0x1e, + gssdgtc1_op = 0x1f, + gssq_op = 0x20 + }; + + static const char* gs_swc2_name[]; + + // sdc2/gs_sdc2 family, the opcode is in low 3 bits. + enum gs_sdc2_ops { + gssbx_op = 0x0, + gsshx_op = 0x1, + gsswx_op = 0x2, + gssdx_op = 0x3, + gsswxc1_op = 0x6, + gssdxc1_op = 0x7 + }; + + static const char* gs_sdc2_name[]; + + enum WhichOperand { + // input to locate_operand, and format code for relocations + imm_operand = 0, // embedded 32-bit|64-bit immediate operand + disp32_operand = 1, // embedded 32-bit displacement or address + call32_operand = 2, // embedded 32-bit self-relative displacement + narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop + _WhichOperand_limit = 4 + }; + + static int opcode(int insn) { return (insn>>26)&0x3f; } + static int rs(int insn) { return (insn>>21)&0x1f; } + static int rt(int insn) { return (insn>>16)&0x1f; } + static int rd(int insn) { return (insn>>11)&0x1f; } + static int sa(int insn) { return (insn>>6)&0x1f; } + static int special(int insn) { return insn&0x3f; } + static int imm_off(int insn) { return (short)low16(insn); } + + static int low (int x, int l) { return bitfield(x, 0, l); } + static int low16(int x) { return low(x, 16); } + static int low26(int x) { return low(x, 26); } + + protected: + //help methods for instruction ejection + + // I-Type (Immediate) + // 31 26 25 21 20 16 15 0 + //| opcode | rs | rt | immediat | + //| | | | | + // 6 5 5 16 + static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); } + + // R-Type (Register) + // 31 26 25 21 20 16 15 11 10 6 5 0 + //| special | rs | rt | rd | 0 | opcode | + //| 0 0 0 0 0 0 | | | | 0 0 0 0 0 | | + // 6 5 5 5 5 6 + static int insn_RRRO(int rs, int rt, int rd, int op) { return (rs<<21) | (rt<<16) | (rd<<11) | op; } + static int insn_RRSO(int rt, int rd, int sa, int op) { return (rt<<16) | (rd<<11) | (sa<<6) | op; } + static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; } + + static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); } + static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); } + + static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) { + return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; + } + static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) { + return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; + } + + static int high (int x, int l) { return bitfield(x, 32-l, l); } + static int high16(int x) { return high(x, 16); } + static int high6 (int x) { return high(x, 6); } + + //get the offset field of jump/branch instruction + int offset(address entry) { + assert(is_simm16((entry - pc() - 4) / 4), "change this code"); + if (!is_simm16((entry - pc() - 4) / 4)) { + tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4); + } + return (entry - pc() - 4) / 4; + } + + +public: + using AbstractAssembler::offset; + + //sign expand with the sign bit is h + static int expand(int x, int h) { return -(x & (1<> 16; + } + + static int split_high(int x) { + return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; + } + + static int merge(int low, int high) { + return expand(low, 15) + (high<<16); + } + + static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) { + return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0; + } + + // Test if x is within signed immediate range for nbits. + static bool is_simm (int x, int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int min = -( ((int)1) << nbits-1 ); + const int maxplus1 = ( ((int)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + static bool is_simm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong min = -( ((jlong)1) << nbits-1 ); + const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + // Test if x is within unsigned immediate range for nbits + static bool is_uimm(int x, unsigned int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int maxplus1 = ( ((int)1) << nbits ); + return 0 <= x && x < maxplus1; + } + + static bool is_uimm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong maxplus1 = ( ((jlong)1) << nbits ); + return 0 <= x && x < maxplus1; + } + + static bool is_simm16(int x) { return is_simm(x, 16); } + static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } + + static bool fit_in_jal(address target, address pc) { + intptr_t mask = 0xfffffffff0000000; + return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask); + } + + bool fit_int_branch(address entry) { + return is_simm16(offset(entry)); + } + +protected: +#ifdef ASSERT + #define CHECK_DELAY +#endif +#ifdef CHECK_DELAY + enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state; +#endif + +public: + void assert_not_delayed() { +#ifdef CHECK_DELAY + assert_not_delayed("next instruction should not be a delay slot"); +#endif + } + + void assert_not_delayed(const char* msg) { +#ifdef CHECK_DELAY + assert(delay_state == no_delay, msg); +#endif + } + +protected: + // Delay slot helpers + // cti is called when emitting control-transfer instruction, + // BEFORE doing the emitting. + // Only effective when assertion-checking is enabled. + + // called when emitting cti with a delay slot, AFTER emitting + void has_delay_slot() { +#ifdef CHECK_DELAY + assert_not_delayed("just checking"); + delay_state = at_delay_slot; +#endif + } + +public: + Assembler* delayed() { +#ifdef CHECK_DELAY + guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot"); + delay_state = filling_delay_slot; +#endif + return this; + } + + void flush() { +#ifdef CHECK_DELAY + guarantee( delay_state == no_delay, "ending code with a delay slot"); +#endif + AbstractAssembler::flush(); + } + + void emit_long(int); // shadows AbstractAssembler::emit_long + void emit_data(int); + void emit_data(int, RelocationHolder const&); + void emit_data(int, relocInfo::relocType rtype); + void check_delay(); + + + // Generic instructions + // Does 32bit or 64bit as needed for the platform. In some sense these + // belong in macro assembler but there is no need for both varieties to exist + + void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); } + void addiu32(Register rt, Register rs, int imm) { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } + void addiu(Register rt, Register rs, int imm) { daddiu (rt, rs, imm);} + void addu(Register rd, Register rs, Register rt) { daddu (rd, rs, rt); } + + void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); } + void andi(Register rt, Register rs, int imm) { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } + + void beq (Register rs, Register rt, int off) { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } + void beql (Register rs, Register rt, int off) { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } + void bgez (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); } + void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); } + void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); } + void bgezl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); } + void bgtz (Register rs, int off) { emit_long(insn_ORRI(bgtz_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } + void bgtzl (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } + void blez (Register rs, int off) { emit_long(insn_ORRI(blez_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } + void blezl (Register rs, int off) { emit_long(insn_ORRI(blezl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } + void bltz (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); } + void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); } + void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); } + void bltzl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); } + void bne (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } + void bnel (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } + // two versions of brk: + // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set + // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27) + // both versions work + void brk (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); } + void brk (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); } + + void beq (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); } + void beql (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));} + void bgez (Register rs, address entry) { bgez (rs, offset(entry)); } + void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); } + void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); } + void bgezl (Register rs, address entry) { bgezl (rs, offset(entry)); } + void bgtz (Register rs, address entry) { bgtz (rs, offset(entry)); } + void bgtzl (Register rs, address entry) { bgtzl (rs, offset(entry)); } + void blez (Register rs, address entry) { blez (rs, offset(entry)); } + void blezl (Register rs, address entry) { blezl (rs, offset(entry)); } + void bltz (Register rs, address entry) { bltz (rs, offset(entry)); } + void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); } + void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); } + void bltzl (Register rs, address entry) { bltzl (rs, offset(entry)); } + void bne (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); } + void bnel (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); } + + void beq (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); } + void beql (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); } + void bgez (Register rs, Label& L){ bgez (rs, target(L)); } + void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); } + void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); } + void bgezl (Register rs, Label& L){ bgezl (rs, target(L)); } + void bgtz (Register rs, Label& L){ bgtz (rs, target(L)); } + void bgtzl (Register rs, Label& L){ bgtzl (rs, target(L)); } + void blez (Register rs, Label& L){ blez (rs, target(L)); } + void blezl (Register rs, Label& L){ blezl (rs, target(L)); } + void bltz (Register rs, Label& L){ bltz (rs, target(L)); } + void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); } + void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); } + void bltzl (Register rs, Label& L){ bltzl (rs, target(L)); } + void bne (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); } + void bnel (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); } + + void daddiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } + void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); } + void ddiv (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op)); } + void ddivu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); } + + void movz (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movz_op)); } + void movn (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movn_op)); } + + void movt (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); } + void movf (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); } + + enum bshfl_ops { + seb_op = 0x10, + seh_op = 0x18 + }; + void seb (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); } + void seh (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); } + + void ext (Register rt, Register rs, int pos, int size) { + guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); + guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); + guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); + + int lsb = pos; + int msbd = size - 1; + + emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op); + } + + void dext (Register rt, Register rs, int pos, int size) { + guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); + guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); + guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]"); + + int lsb = pos; + int msbd = size - 1; + + emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op); + } + + void dextm (Register rt, Register rs, int pos, int size) { + guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); + guarantee((32 < size) && (size <= 64), "size must be in (32, 64]"); + guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]"); + + int lsb = pos; + int msbd = size - 1 - 32; + + emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op); + } + + void rotr (Register rd, Register rt, int sa) { + emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op); + } + + void drotr (Register rd, Register rt, int sa) { + emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op); + } + + void drotr32 (Register rd, Register rt, int sa) { + emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op); + } + + void rotrv (Register rd, Register rt, Register rs) { + emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op); + } + + void drotrv (Register rd, Register rt, Register rs) { + emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op); + } + + void div (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); } + void divu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); } + void dmult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); } + void dmultu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); } + void dsll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); } + void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); } + void dsll32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); } + void dsra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); } + void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); } + void dsra32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); } + void dsrl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); } + void dsrlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); } + void dsrl32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); } + void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); } + + void b(int off) { beq(R0, R0, off); } + void b(address entry) { b(offset(entry)); } + void b(Label& L) { b(target(L)); } + + void j(address entry); + void jal(address entry); + + void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); } + void jalr(Register rs) { jalr(RA, rs); } + void jalr() { jalr(RT9); } + + void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); } + void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); } + + void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lui(Register rt, int imm) { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); } + void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); } + + void lb (Register rt, Address src); + void lbu(Register rt, Address src); + void ld (Register rt, Address src); + void ldl(Register rt, Address src); + void ldr(Register rt, Address src); + void lh (Register rt, Address src); + void lhu(Register rt, Address src); + void ll (Register rt, Address src); + void lld(Register rt, Address src); + void lw (Register rt, Address src); + void lwl(Register rt, Address src); + void lwr(Register rt, Address src); + void lwu(Register rt, Address src); + void lea(Register rt, Address src); + void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); } + + void mfhi (Register rd) { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); } + void mflo (Register rd) { emit_long( ((int)rd->encoding()<<11) | mflo_op ); } + void mthi (Register rs) { emit_long( ((int)rs->encoding()<<21) | mthi_op ); } + void mtlo (Register rs) { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); } + + void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); } + void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); } + + void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); } + + void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); } + void ori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } + + void sb (Register rt, Register base, int off) { emit_long(insn_ORRI(sb_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sc (Register rt, Register base, int off) { emit_long(insn_ORRI(sc_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void scd (Register rt, Register base, int off) { emit_long(insn_ORRI(scd_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sd (Register rt, Register base, int off) { emit_long(insn_ORRI(sd_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sdl (Register rt, Register base, int off) { emit_long(insn_ORRI(sdl_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sdr (Register rt, Register base, int off) { emit_long(insn_ORRI(sdr_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sh (Register rt, Register base, int off) { emit_long(insn_ORRI(sh_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void sll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sll_op)); } + void sllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sllv_op)); } + void slt (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), slt_op)); } + void slti (Register rt, Register rs, int imm) { emit_long(insn_ORRI(slti_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } + void sltiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } + void sltu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sltu_op)); } + void sra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sra_op)); } + void srav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srav_op)); } + void srl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), srl_op)); } + void srlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srlv_op)); } + + void subu (Register rd, Register rs, Register rt) { dsubu (rd, rs, rt); } + void subu32 (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), subu_op)); } + void sw (Register rt, Register base, int off) { emit_long(insn_ORRI(sw_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void swl (Register rt, Register base, int off) { emit_long(insn_ORRI(swl_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void swr (Register rt, Register base, int off) { emit_long(insn_ORRI(swr_op, (int)base->encoding(), (int)rt->encoding(), off)); } + void synci(Register base, int off) { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); } + void sync () { + if (os::is_ActiveCoresMP()) + emit_long(0); + else + emit_long(sync_op); + } + void syscall(int code) { emit_long( (code<<6) | syscall_op ); } + + void sb(Register rt, Address dst); + void sc(Register rt, Address dst); + void scd(Register rt, Address dst); + void sd(Register rt, Address dst); + void sdl(Register rt, Address dst); + void sdr(Register rt, Address dst); + void sh(Register rt, Address dst); + void sw(Register rt, Address dst); + void swl(Register rt, Address dst); + void swr(Register rt, Address dst); + + void teq (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, teq_op)); } + void teqi (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); } + void tge (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tge_op)); } + void tgei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); } + void tgeiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); } + void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tgeu_op)); } + void tlt (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tlt_op)); } + void tlti (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); } + void tltiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); } + void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tltu_op)); } + void tne (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tne_op)); } + void tnei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); } + + void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); } + void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } + + void nop() { emit_long(0); } + + + + void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } + void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } + void ldc1(FloatRegister ft, Address src); + void lwc1(FloatRegister ft, Address src); + + //COP0 + void mfc0 (Register rt, Register rd) { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); } + void dmfc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); } + // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet + void mtc0 (Register rt, Register rd) { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); } + void dmtc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); } + //COP0 end + + + //COP1 + void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); } + void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); } + void cfc1 (Register rt, int fs) { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); } + void mfhc1(Register rt, int fs) { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); } + void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); } + void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); } + void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); } + void ctc1 (Register rt, int fs) { emit_long(insn_COP1(ctc1_op, (int)rt->encoding(), fs)); } + void mthc1(Register rt, int fs) { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); } + + void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); } + void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); } + void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); } + void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off)); has_delay_slot(); } + + void bc1f (address entry) { bc1f(offset(entry)); } + void bc1fl(address entry) { bc1fl(offset(entry)); } + void bc1t (address entry) { bc1t(offset(entry)); } + void bc1tl(address entry) { bc1tl(offset(entry)); } + + void bc1f (Label& L) { bc1f(target(L)); } + void bc1fl(Label& L) { bc1fl(target(L)); } + void bc1t (Label& L) { bc1t(target(L)); } + void bc1tl(Label& L) { bc1tl(target(L)); } + +//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. +#define INSN_SINGLE(r1, r2, r3, op) \ + { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} + void add_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)} + void sub_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)} + void mul_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)} + void div_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)} + void sqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)} + void abs_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)} + void mov_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)} + void neg_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)} + void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)} + void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)} + void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)} + void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)} + void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)} + void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)} + void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)} + void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)} + //null + void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) { + assert(cc >= 0 && cc <= 7, "cc is 3 bits"); + emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} + void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) { + assert(cc >= 0 && cc <= 7, "cc is 3 bits"); + emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} + void movz_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)} + void movn_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)} + //null + void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)} + void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)} + //null + void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)} + //null + void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)} + void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)} + void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)} + //null + void c_f_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)} + void c_un_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)} + void c_eq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)} + void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)} + void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)} + void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)} + void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)} + void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)} + void c_sf_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)} + void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)} + void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)} + void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)} + void c_lt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)} + void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)} + void c_le_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)} + void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)} + +#undef INSN_SINGLE + + +//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags. +#define INSN_DOUBLE(r1, r2, r3, op) \ + { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} + + void add_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)} + void sub_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)} + void mul_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)} + void div_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)} + void sqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)} + void abs_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)} + void mov_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)} + void neg_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)} + void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)} + void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)} + void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)} + void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)} + void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)} + void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)} + void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)} + void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)} + //null + void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) { + assert(cc >= 0 && cc <= 7, "cc is 3 bits"); + emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} + void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) { + assert(cc >= 0 && cc <= 7, "cc is 3 bits"); + emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} + void movz_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)} + void movn_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)} + //null + void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)} + void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)} + //null + void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)} + void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)} + //null + void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)} + //null + void c_f_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)} + void c_un_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)} + void c_eq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)} + void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)} + void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)} + void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)} + void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)} + void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)} + void c_sf_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)} + void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)} + void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)} + void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)} + void c_lt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)} + void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)} + void c_le_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)} + void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)} + +#undef INSN_DOUBLE + + + //null + void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } + void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } + //null + void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } + void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } + //null + + +//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. +#define INSN_PS(r1, r2, r3, op) \ + { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} + + void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)} + void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)} + void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)} + //null + void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)} + void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)} + void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)} + //null + //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")} + //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") } + void movz_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)} + void movn_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)} + //null + void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)} + //null + void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)} + //null + void pll_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)} + void plu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)} + void pul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)} + void puu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)} + void c_f_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)} + void c_un_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)} + void c_eq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)} + void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)} + void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)} + void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)} + void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)} + void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)} + void c_sf_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)} + void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)} + void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)} + void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)} + void c_lt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)} + void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)} + void c_le_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)} + void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)} + //null +#undef INSN_PS + //COP1 end + + + //COP1X +//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. +#define INSN_COP1X(r0, r1, r2, r3, op) \ + { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} + void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) } + void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) } + void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) } + void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) } + void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) } + void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) } + void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) } + void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) } + void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) } + void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) } + void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) } + void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) } +#undef INSN_COP1X + //COP1X end + + //SPECIAL2 +//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. +#define INSN_S2(op) \ + { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);} + + void madd (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); } + void maddu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); } + void mul (Register rd, Register rs, Register rt) { INSN_S2(mul_op) } + void gsandn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) } + void msub (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); } + void msubu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); } + void gsorn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) } + + void gsmult (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op) } + void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) } + void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) } + void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)} + void gsdiv (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op) } + void gsddiv (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op) } + void gsdivu (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op) } + void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) } + void gsmod (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op) } + void gsdmod (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op) } + void gsmodu (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op) } + void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) } + void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); } + void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); } + void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); } + void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); } + void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); } + void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); } + void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); } + void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); } + +#undef INSN_S2 + + //SPECIAL3 +/* +// FIXME +#define is_0_to_32(a, b) \ + assert (a >= 0, " just a check"); \ + assert (a <= 0, " just a check"); \ + assert (b >= 0, " just a check"); \ + assert (b <= 0, " just a check"); \ + assert (a+b >= 0, " just a check"); \ + assert (a+b <= 0, " just a check"); + */ +#define is_0_to_32(a, b) + + void ins (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); } + void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); } + void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); } + void dins (Register rt, Register rs, int pos, int size) { + guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); + guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); + guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); + + emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op); + } + + void repl_qb (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_qb_op << 6 | re1_op); } + void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); } + void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_ph_op << 6 | re1_op); } + void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); } + + void repl_ob (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_ob_op << 6 | re2_op); } + void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); } + void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_qh_op << 6 | re2_op); } + void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); } + void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_pw_op << 6 | re2_op); } + void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); } + + void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } + void sdc1(FloatRegister ft, Address dst); + void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } + void swc1(FloatRegister ft, Address dst); + + + static void print_instruction(int); + int patched_branch(int dest_pos, int inst, int inst_pos); + int branch_destination(int inst, int pos); + + // Loongson extension + + // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4". + void gslble(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op); + } + + void gslbgt(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op); + } + + void gslhle(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op); + } + + void gslhgt(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op); + } + + void gslwle(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op); + } + + void gslwgt(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op); + } + + void gsldle(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op); + } + + void gsldgt(Register rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op); + } + + void gslwlec1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op); + } + + void gslwgtc1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op); + } + + void gsldlec1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op); + } + + void gsldgtc1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op); + } + + void gslq(Register rq, Register rt, Register base, int off) { + assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0"); + off = off >> 4; + assert(is_simm(off, 9),"gslq: off exceeds 9 bits"); + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); + } + + void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { + assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0"); + off = off >> 4; + assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits"); + emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); + } + + void gssble(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op); + } + + void gssbgt(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op); + } + + void gsshle(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op); + } + + void gsshgt(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op); + } + + void gsswle(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op); + } + + void gsswgt(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op); + } + + void gssdle(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op); + } + + void gssdgt(Register rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op); + } + + void gsswlec1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op); + } + + void gsswgtc1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op); + } + + void gssdlec1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op); + } + + void gssdgtc1(FloatRegister rt, Register base, Register bound) { + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op); + } + + void gssq(Register rq, Register rt, Register base, int off) { + assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0"); + off = off >> 4; + assert(is_simm(off, 9),"gssq: off exceeds 9 bits"); + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); + } + + void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { + assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0"); + off = off >> 4; + assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits"); + emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); + } + + //LDC2 & SDC2 +#define INSN(OPS, OP) \ + assert(is_simm(off, 8), "NAME: off exceeds 8 bits"); \ + assert(UseLEXT1, "check UseLEXT1"); \ + emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | \ + ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP); + +#define INSN_LDC2(NAME, op) \ + void NAME(Register rt, Register base, Register index, int off) { \ + INSN(gs_ldc2_op, op) \ + } + +#define INSN_LDC2_F(NAME, op) \ + void NAME(FloatRegister rt, Register base, Register index, int off) { \ + INSN(gs_ldc2_op, op) \ + } + +#define INSN_SDC2(NAME, op) \ + void NAME(Register rt, Register base, Register index, int off) { \ + INSN(gs_sdc2_op, op) \ + } + +#define INSN_SDC2_F(NAME, op) \ + void NAME(FloatRegister rt, Register base, Register index, int off) { \ + INSN(gs_sdc2_op, op) \ + } + +/* + void gslbx(Register rt, Register base, Register index, int off) { + assert(is_simm(off, 8), "gslbx: off exceeds 8 bits"); + assert(UseLEXT1, "check UseLEXT1"); + emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | + ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op); + void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);} + + INSN_LDC2(gslbx, gslbx_op) + INSN_LDC2(gslhx, gslhx_op) + INSN_LDC2(gslwx, gslwx_op) + INSN_LDC2(gsldx, gsldx_op) + INSN_LDC2_F(gslwxc1, gslwxc1_op) + INSN_LDC2_F(gsldxc1, gsldxc1_op) + + INSN_SDC2(gssbx, gssbx_op) + INSN_SDC2(gsshx, gsshx_op) + INSN_SDC2(gsswx, gsswx_op) + INSN_SDC2(gssdx, gssdx_op) + INSN_SDC2_F(gsswxc1, gsswxc1_op) + INSN_SDC2_F(gssdxc1, gssdxc1_op) +*/ + void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) } + void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) } + void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) } + void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) } + void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) } + void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) } + + void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) } + void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) } + void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) } + void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) } + void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) } + void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) } + +#undef INSN +#undef INSN_LDC2 +#undef INSN_LDC2_F +#undef INSN_SDC2 +#undef INSN_SDC2_F + + // cpucfg on Loongson CPUs above 3A4000 + void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);} + + +public: + // Creation + Assembler(CodeBuffer* code) : AbstractAssembler(code) { +#ifdef CHECK_DELAY + delay_state = no_delay; +#endif + } + + // Decoding + static address locate_operand(address inst, WhichOperand which); + static address locate_next_instruction(address inst); +}; + + + +#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp b/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp new file mode 100644 index 00000000000..39aeb5509a7 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP +#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp new file mode 100644 index 00000000000..a4a1b28c2d1 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "interpreter/bytecodeInterpreter.hpp" +#include "interpreter/bytecodeInterpreter.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#ifdef TARGET_ARCH_MODEL_mips_32 +# include "interp_masm_mips_32.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_mips_64 +# include "interp_masm_mips_64.hpp" +#endif + +#ifdef CC_INTERP + +#endif // CC_INTERP (all) diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp new file mode 100644 index 00000000000..aac8b7a2b7f --- /dev/null +++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP +#define CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP + +// Platform specific for C++ based Interpreter +#define LOTS_OF_REGS /* Lets interpreter use plenty of registers */ + +private: + + // save the bottom of the stack after frame manager setup. For ease of restoration after return + // from recursive interpreter call + intptr_t* _frame_bottom; /* saved bottom of frame manager frame */ + intptr_t* _last_Java_pc; /* pc to return to in frame manager */ + intptr_t* _sender_sp; /* sender's sp before stack (locals) extension */ + interpreterState _self_link; /* Previous interpreter state */ /* sometimes points to self??? */ + double _native_fresult; /* save result of native calls that might return floats */ + intptr_t _native_lresult; /* save result of native calls that might return handle/longs */ +public: + + static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp); + inline intptr_t* sender_sp() { + return _sender_sp; + } + + +#define SET_LAST_JAVA_FRAME() + +#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set_flags(0); + +/* + * Macros for accessing the stack. + */ +#undef STACK_INT +#undef STACK_FLOAT +#undef STACK_ADDR +#undef STACK_OBJECT +#undef STACK_DOUBLE +#undef STACK_LONG + +// JavaStack Implementation + +#define GET_STACK_SLOT(offset) (*((intptr_t*) &topOfStack[-(offset)])) +#define STACK_SLOT(offset) ((address) &topOfStack[-(offset)]) +#define STACK_ADDR(offset) (*((address *) &topOfStack[-(offset)])) +#define STACK_INT(offset) (*((jint*) &topOfStack[-(offset)])) +#define STACK_FLOAT(offset) (*((jfloat *) &topOfStack[-(offset)])) +#define STACK_OBJECT(offset) (*((oop *) &topOfStack [-(offset)])) +#define STACK_DOUBLE(offset) (((VMJavaVal64*) &topOfStack[-(offset)])->d) +#define STACK_LONG(offset) (((VMJavaVal64 *) &topOfStack[-(offset)])->l) + +#define SET_STACK_SLOT(value, offset) (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value)) +#define SET_STACK_ADDR(value, offset) (*((address *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_INT(value, offset) (*((jint *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_FLOAT(value, offset) (*((jfloat *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value)) +#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = \ + ((VMJavaVal64*)(addr))->d) +#define SET_STACK_LONG(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value)) +#define SET_STACK_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = \ + ((VMJavaVal64*)(addr))->l) +// JavaLocals implementation + +#define LOCALS_SLOT(offset) ((intptr_t*)&locals[-(offset)]) +#define LOCALS_ADDR(offset) ((address)locals[-(offset)]) +#define LOCALS_INT(offset) (*((jint*)&locals[-(offset)])) +#define LOCALS_FLOAT(offset) (*((jfloat*)&locals[-(offset)])) +#define LOCALS_OBJECT(offset) ((oop)locals[-(offset)]) +#define LOCALS_DOUBLE(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->d) +#define LOCALS_LONG(offset) (((VMJavaVal64*)&locals[-((offset) + 1)])->l) +#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)])) +#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)])) + +#define SET_LOCALS_SLOT(value, offset) (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value)) +#define SET_LOCALS_ADDR(value, offset) (*((address *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_INT(value, offset) (*((jint *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_FLOAT(value, offset) (*((jfloat *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_OBJECT(value, offset) (*((oop *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_DOUBLE(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value)) +#define SET_LOCALS_LONG(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value)) +#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \ + ((VMJavaVal64*)(addr))->d) +#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \ + ((VMJavaVal64*)(addr))->l) + +#endif // CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp new file mode 100644 index 00000000000..8ce77ab92ff --- /dev/null +++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP +#define CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP + +// Inline interpreter functions for MIPS + +inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; } +inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; } +inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; } +inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; } +inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); } + +inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; } + +inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) { + return ( op1 < op2 ? -1 : + op1 > op2 ? 1 : + op1 == op2 ? 0 : + (direction == -1 || direction == 1) ? direction : 0); + +} + +inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) { + // x86 can do unaligned copies but not 64bits at a time + to[0] = from[0]; to[1] = from[1]; +} + +// The long operations depend on compiler support for "long long" on x86 + +inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) { + return op1 + op2; +} + +inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) { + return op1 & op2; +} + +inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) { + // QQQ what about check and throw... + return op1 / op2; +} + +inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) { + return op1 * op2; +} + +inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) { + return op1 | op2; +} + +inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) { + return op1 - op2; +} + +inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) { + return op1 ^ op2; +} + +inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) { + return op1 % op2; +} + +inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) { + // CVM did this 0x3f mask, is the really needed??? QQQ + return ((unsigned long long) op1) >> (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) { + return op1 >> (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) { + return op1 << (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongNeg(jlong op) { + return -op; +} + +inline jlong BytecodeInterpreter::VMlongNot(jlong op) { + return ~op; +} + +inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) { + return (op <= 0); +} + +inline int32_t BytecodeInterpreter::VMlongGez(jlong op) { + return (op >= 0); +} + +inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) { + return (op == 0); +} + +inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) { + return (op1 == op2); +} + +inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) { + return (op1 != op2); +} + +inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) { + return (op1 >= op2); +} + +inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) { + return (op1 <= op2); +} + +inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) { + return (op1 < op2); +} + +inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) { + return (op1 > op2); +} + +inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) { + return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0); +} + +// Long conversions + +inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) { + return (jdouble) val; +} + +inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) { + return (jfloat) val; +} + +inline jint BytecodeInterpreter::VMlong2Int(jlong val) { + return (jint) val; +} + +// Double Arithmetic + +inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) { + return op1 + op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) { + // Divide by zero... QQQ + return op1 / op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) { + return op1 * op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) { + return -op; +} + +inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) { + return fmod(op1, op2); +} + +inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) { + return op1 - op2; +} + +inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) { + return ( op1 < op2 ? -1 : + op1 > op2 ? 1 : + op1 == op2 ? 0 : + (direction == -1 || direction == 1) ? direction : 0); +} + +// Double Conversions + +inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) { + return (jfloat) val; +} + +// Float Conversions + +inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) { + return (jdouble) op; +} + +// Integer Arithmetic + +inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) { + return op1 + op2; +} + +inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) { + return op1 & op2; +} + +inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) { + // it's possible we could catch this special case implicitly + if ((juint)op1 == 0x80000000 && op2 == -1) return op1; + else return op1 / op2; +} + +inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) { + return op1 * op2; +} + +inline jint BytecodeInterpreter::VMintNeg(jint op) { + return -op; +} + +inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) { + return op1 | op2; +} + +inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) { + // it's possible we could catch this special case implicitly + if ((juint)op1 == 0x80000000 && op2 == -1) return 0; + else return op1 % op2; +} + +inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) { + return op1 << op2; +} + +inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) { + return op1 >> (op2 & 0x1f); // QQ op2 & 0x1f?? +} + +inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) { + return op1 - op2; +} + +inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) { + return ((juint) op1) >> (op2 & 0x1f); // QQ op2 & 0x1f?? +} + +inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) { + return op1 ^ op2; +} + +inline jdouble BytecodeInterpreter::VMint2Double(jint val) { + return (jdouble) val; +} + +inline jfloat BytecodeInterpreter::VMint2Float(jint val) { + return (jfloat) val; +} + +inline jlong BytecodeInterpreter::VMint2Long(jint val) { + return (jlong) val; +} + +inline jchar BytecodeInterpreter::VMint2Char(jint val) { + return (jchar) val; +} + +inline jshort BytecodeInterpreter::VMint2Short(jint val) { + return (jshort) val; +} + +inline jbyte BytecodeInterpreter::VMint2Byte(jint val) { + return (jbyte) val; +} + +#endif // CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP diff --git a/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp b/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp new file mode 100644 index 00000000000..61efd1f5611 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/bytecodes.hpp" + + +void Bytecodes::pd_initialize() { + // No mips specific initialization +} + + +Bytecodes::Code Bytecodes::pd_base_code_for(Code code) { + // No mips specific bytecodes + return code; +} diff --git a/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp b/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp new file mode 100644 index 00000000000..25a9562acd5 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_BYTECODES_MIPS_HPP +#define CPU_MIPS_VM_BYTECODES_MIPS_HPP + +// No Loongson specific bytecodes + +#endif // CPU_MIPS_VM_BYTECODES_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/bytes_mips.hpp b/hotspot/src/cpu/mips/vm/bytes_mips.hpp new file mode 100644 index 00000000000..515ffad4b07 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/bytes_mips.hpp @@ -0,0 +1,193 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP +#define CPU_MIPS_VM_BYTES_MIPS_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Returns true if the byte ordering used by Java is different from the native byte ordering + // of the underlying machine. For example, this is true for Intel x86, but false for Solaris + // on Sparc. + // we use mipsel, so return true + static inline bool is_Java_byte_ordering_different(){ return true; } + + + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // (no special code is needed since x86 CPUs can access unaligned data) + static inline u2 get_native_u2(address p) { + if ((intptr_t)p & 0x1) { + return ((u2)p[1] << 8) | (u2)p[0]; + } else { + return *(u2*)p; + } + } + + static inline u4 get_native_u4(address p) { + if ((intptr_t)p & 3) { + u4 res; + __asm__ __volatile__ ( + " .set push\n" + " .set mips64\n" + " .set noreorder\n" + + " lwr %[res], 0(%[addr]) \n" + " lwl %[res], 3(%[addr]) \n" + + " .set pop" + : [res] "=&r" (res) + : [addr] "r" (p) + : "memory" + ); + return res; + } else { + return *(u4*)p; + } + } + + static inline u8 get_native_u8(address p) { + u8 res; + u8 temp; + // u4 tp;//tmp register + __asm__ __volatile__ ( + " .set push\n" + " .set mips64\n" + " .set noreorder\n" + " .set noat\n" + " andi $1,%[addr],0x7 \n" + " beqz $1,1f \n" + " nop \n" + " ldr %[temp], 0(%[addr]) \n" + " ldl %[temp], 7(%[addr]) \n" + " b 2f \n" + " nop \n" + " 1:\t ld %[temp],0(%[addr]) \n" + " 2:\t sd %[temp], %[res] \n" + + " .set at\n" + " .set pop\n" + : [addr]"=r"(p), [temp]"=r" (temp) + : "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res) + : "memory" + ); + + return res; + } + + //use mips unaligned load instructions + static inline void put_native_u2(address p, u2 x) { + if((intptr_t)p & 0x1) { + p[0] = (u_char)(x); + p[1] = (u_char)(x>>8); + } else { + *(u2*)p = x; + } + } + + static inline void put_native_u4(address p, u4 x) { + // refer to sparc implementation. + // Note that sparc is big-endian, while mips is little-endian + switch ( intptr_t(p) & 3 ) { + case 0: *(u4*)p = x; + break; + + case 2: ((u2*)p)[1] = x >> 16; + ((u2*)p)[0] = x; + break; + + default: ((u1*)p)[3] = x >> 24; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[0] = x; + break; + } + } + + static inline void put_native_u8(address p, u8 x) { + // refer to sparc implementation. + // Note that sparc is big-endian, while mips is little-endian + switch ( intptr_t(p) & 7 ) { + case 0: *(u8*)p = x; + break; + + case 4: ((u4*)p)[1] = x >> 32; + ((u4*)p)[0] = x; + break; + + case 2: ((u2*)p)[3] = x >> 48; + ((u2*)p)[2] = x >> 32; + ((u2*)p)[1] = x >> 16; + ((u2*)p)[0] = x; + break; + + default: ((u1*)p)[7] = x >> 56; + ((u1*)p)[6] = x >> 48; + ((u1*)p)[5] = x >> 40; + ((u1*)p)[4] = x >> 32; + ((u1*)p)[3] = x >> 24; + ((u1*)p)[2] = x >> 16; + ((u1*)p)[1] = x >> 8; + ((u1*)p)[0] = x; + } + } + + + // Efficient reading and writing of unaligned unsigned data in Java + // byte ordering (i.e. big-endian ordering). Byte-order reversal is + // needed since MIPS64EL CPUs use little-endian format. + static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } + static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } + static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } + + + // Efficient swapping of byte ordering + static inline u2 swap_u2(u2 x); // compiler-dependent implementation + static inline u4 swap_u4(u4 x); // compiler-dependent implementation + static inline u8 swap_u8(u8 x); +}; + + +// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] +#ifdef TARGET_OS_ARCH_linux_mips +# include "bytes_linux_mips.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_solaris_mips +# include "bytes_solaris_mips.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_windows_mips +# include "bytes_windows_mips.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_bsd_mips +# include "bytes_bsd_mips.inline.hpp" +#endif + + +#endif // CPU_MIPS_VM_BYTES_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp b/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp new file mode 100644 index 00000000000..f254e07abdd --- /dev/null +++ b/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP +#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, UseTLAB, true); +define_pd_global(bool, ResizeTLAB, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, true); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +#ifdef CC_INTERP +define_pd_global(bool, ProfileInterpreter, false); +#else +define_pd_global(bool, ProfileInterpreter, true); +#endif // CC_INTERP +define_pd_global(bool, TieredCompilation, false); // Disable C1 in server JIT +define_pd_global(intx, CompileThreshold, 10000); +define_pd_global(intx, BackEdgeThreshold, 100000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 3); +define_pd_global(intx, FLOATPRESSURE, 6); +define_pd_global(intx, FreqInlineSize, 325); +define_pd_global(intx, MinJumpTableSize, 10); +#ifdef MIPS64 +define_pd_global(intx, INTPRESSURE, 13); +define_pd_global(intx, InteriorEntryAlignment, 16); +define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +define_pd_global(intx, LoopUnrollLimit, 60); +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(intx, CodeCacheExpansionSize, 64*K); + +// Ergonomics related flags +define_pd_global(uint64_t,MaxRAM, 128ULL*G); +#else +define_pd_global(intx, INTPRESSURE, 6); +define_pd_global(intx, InteriorEntryAlignment, 4); +define_pd_global(intx, NewSizeThreadIncrease, 4*K); +define_pd_global(intx, LoopUnrollLimit, 50); // Design center runs on 1.3.1 +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(intx, InitialCodeCacheSize, 2304*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(intx, CodeCacheExpansionSize, 32*K); + +// Ergonomics related flags +define_pd_global(uint64_t,MaxRAM, 4ULL*G); +#endif // MIPS64 +define_pd_global(intx, RegisterCostAreaRatio, 16000); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, false); +define_pd_global(bool, OptoScheduling, false); +define_pd_global(bool, OptoBundling, false); + +define_pd_global(intx, ReservedCodeCacheSize, 120*M); +define_pd_global(uintx, CodeCacheMinBlockLength, 4); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); + +define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed on x86. + +// Heap related flags +define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/c2_init_mips.cpp b/hotspot/src/cpu/mips/vm/c2_init_mips.cpp new file mode 100644 index 00000000000..e6d5815f424 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/c2_init_mips.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" + +// processor dependent initialization for mips + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); +} diff --git a/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp b/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp new file mode 100644 index 00000000000..1836b7a9214 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2017, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP +#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp b/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp new file mode 100644 index 00000000000..8ffaaaf841e --- /dev/null +++ b/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp @@ -0,0 +1,173 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" + +// Release the CompiledICHolder* associated with this call site is there is one. +void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) { + // This call site might have become stale so inspect it carefully. + NativeCall* call = nativeCall_at(call_site->addr()); + if (is_icholder_entry(call->destination())) { + NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value()); + InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data()); + } +} + +bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) { + // This call site might have become stale so inspect it carefully. + NativeCall* call = nativeCall_at(call_site->addr()); + return is_icholder_entry(call->destination()); +} + +// ---------------------------------------------------------------------------- + +#define __ _masm. +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) { + + address mark = cbuf.insts_mark(); // get mark within main instrs section + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); + if (base == NULL) return NULL; // CodeBuffer::expand failed + // static stub relocation stores the instruction address of the call + + __ relocate(static_stub_Relocation::spec(mark), 0); + + // Code stream for loading method may be changed. + __ synci(R0, 0); + + // Rmethod contains methodOop, it should be relocated for GC + // static stub relocation also tags the methodOop in the code-stream. + __ mov_metadata(Rmethod, NULL); + // This is recognized as unresolved by relocs/nativeInst/ic code + + __ relocate(relocInfo::runtime_call_type); + + cbuf.set_insts_mark(); + address call_pc = (address)-1; + __ patchable_jump(call_pc); + __ align(16); + // Update current stubs pointer and restore code_end. + __ end_a_stub(); + return base; +} +#undef __ + +int CompiledStaticCall::to_interp_stub_size() { + int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size; + return round_to(size, 16); +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 16; +} + +void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); +#ifndef MIPS64 + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); +#else + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); +#endif + + assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), + "a) MT-unsafe modification of inline cache"); + assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, + "b) MT-unsafe modification of inline cache"); + + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); +#ifndef MIPS64 + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); +#else + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); +#endif + method_holder->set_data(0); + jump->set_jump_destination((address)-1); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledStaticCall::verify() { + // Verify call. + NativeCall::verify(); + if (os::is_MP()) { + verify_alignment(); + } + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); +#ifndef MIPS64 + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); +#else + NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); +#endif + + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT diff --git a/hotspot/src/cpu/mips/vm/copy_mips.hpp b/hotspot/src/cpu/mips/vm/copy_mips.hpp new file mode 100644 index 00000000000..4442e1dc716 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/copy_mips.hpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_COPY_MIPS_HPP +#define CPU_MIPS_VM_COPY_MIPS_HPP + +// Inline functions for memory copy and fill. + +// Contains inline asm implementations +#ifdef TARGET_OS_ARCH_linux_mips +# include "copy_linux_mips.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_solaris_mips +# include "copy_solaris_mips.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_windows_mips +# include "copy_windows_mips.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_bsd_mips +# include "copy_bsd_mips.inline.hpp" +#endif +// Inline functions for memory copy and fill. + +// Contains inline asm implementations + +// Template for atomic, element-wise copy. +template +static void copy_conjoint_atomic(const T* from, T* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +#endif //CPU_MIPS_VM_COPY_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp new file mode 100644 index 00000000000..37bd03b00b0 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP +#define CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP + + protected: + +#if 0 + address generate_asm_interpreter_entry(bool synchronized); + address generate_native_entry(bool synchronized); + address generate_abstract_entry(void); + address generate_math_entry(AbstractInterpreter::MethodKind kind); + address generate_empty_entry(void); + address generate_accessor_entry(void); + void lock_method(void); + void generate_stack_overflow_check(void); + + void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); + void generate_counter_overflow(Label* do_continue); +#endif + + void generate_more_monitors(); + void generate_deopt_handling(); + address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only + void generate_compute_interpreter_state(const Register state, + const Register prev_state, + const Register sender_sp, + bool native); // C++ interpreter only + +#endif // CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp new file mode 100644 index 00000000000..1f8d75d593a --- /dev/null +++ b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/cppInterpreter.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#ifdef SHARK +#include "shark/shark_globals.hpp" +#endif + +#ifdef CC_INTERP + +// Routine exists to make tracebacks look decent in debugger +// while "shadow" interpreter frames are on stack. It is also +// used to distinguish interpreter frames. + +extern "C" void RecursiveInterpreterActivation(interpreterState istate) { + ShouldNotReachHere(); +} + +bool CppInterpreter::contains(address pc) { + Unimplemented(); +} + +#define STATE(field_name) Lstate, in_bytes(byte_offset_of(BytecodeInterpreter, field_name)) +#define __ _masm-> + +Label frame_manager_entry; +Label fast_accessor_slow_entry_path; // fast accessor methods need to be able to jmp to unsynchronized + // c++ interpreter entry point this holds that entry point label. + +static address unctrap_frame_manager_entry = NULL; + +static address interpreter_return_address = NULL; +static address deopt_frame_manager_return_atos = NULL; +static address deopt_frame_manager_return_btos = NULL; +static address deopt_frame_manager_return_itos = NULL; +static address deopt_frame_manager_return_ltos = NULL; +static address deopt_frame_manager_return_ftos = NULL; +static address deopt_frame_manager_return_dtos = NULL; +static address deopt_frame_manager_return_vtos = NULL; + +const Register prevState = G1_scratch; + +void InterpreterGenerator::save_native_result(void) { + Unimplemented(); +} + +void InterpreterGenerator::restore_native_result(void) { + Unimplemented(); +} + +// A result handler converts/unboxes a native call result into +// a java interpreter/compiler result. The current frame is an +// interpreter frame. The activation frame unwind code must be +// consistent with that of TemplateTable::_return(...). In the +// case of native methods, the caller's SP was not modified. +address CppInterpreterGenerator::generate_result_handler_for(BasicType type) { + Unimplemented(); +} + +address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) { + Unimplemented(); +} + +address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) { + Unimplemented(); +} + +address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) { + Unimplemented(); +} + +address CppInterpreter::return_entry(TosState state, int length) { + Unimplemented(); +} + +address CppInterpreter::deopt_entry(TosState state, int length) { + Unimplemented(); +} + +void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) { + Unimplemented(); +} + +address InterpreterGenerator::generate_empty_entry(void) { + Unimplemented(); +} + +address InterpreterGenerator::generate_accessor_entry(void) { + Unimplemented(); +} + +address InterpreterGenerator::generate_native_entry(bool synchronized) { + Unimplemented(); +} + +void CppInterpreterGenerator::generate_compute_interpreter_state(const Register state, + const Register prev_state, + bool native) { + Unimplemented(); +} + +void InterpreterGenerator::lock_method(void) { + Unimplemented(); +} + +void CppInterpreterGenerator::generate_deopt_handling() { + Unimplemented(); +} + +void CppInterpreterGenerator::generate_more_monitors() { + Unimplemented(); +} + + +static address interpreter_frame_manager = NULL; + +void CppInterpreterGenerator::adjust_callers_stack(Register args) { + Unimplemented(); +} + +address InterpreterGenerator::generate_normal_entry(bool synchronized) { + Unimplemented(); +} + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : CppInterpreterGenerator(code) { + Unimplemented(); +} + + +static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) { + Unimplemented(); +} + +int AbstractInterpreter::size_top_interpreter_activation(methodOop method) { + Unimplemented(); +} + +void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill, + frame* caller, + frame* current, + methodOop method, + intptr_t* locals, + intptr_t* stack, + intptr_t* stack_base, + intptr_t* monitor_base, + intptr_t* frame_bottom, + bool is_top_frame + ) +{ + Unimplemented(); +} + +void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp) { + Unimplemented(); +} + + +int AbstractInterpreter::layout_activation(methodOop method, + int tempcount, // Number of slots on java expression stack in use + int popframe_extra_args, + int moncount, // Number of active monitors + int callee_param_size, + int callee_locals_size, + frame* caller, + frame* interpreter_frame, + bool is_top_frame) { + Unimplemented(); +} + +#endif // CC_INTERP diff --git a/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp new file mode 100644 index 00000000000..49c47330495 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP +#define CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP + // Size of interpreter code. Increase if too small. Interpreter will + // fail with a guarantee ("not enough space for interpreter generation"); + // if too small. + // Run with +PrintInterpreter to get the VM to print out the size. + // Max size with JVMTI and TaggedStackInterpreter + + // QQQ this is proably way too large for c++ interpreter + + // The sethi() instruction generates lots more instructions when shell + // stack limit is unlimited, so that's why this is much bigger. + const static int InterpreterCodeSize = 210 * K; + +#endif // CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/debug_mips.cpp b/hotspot/src/cpu/mips/vm/debug_mips.cpp new file mode 100644 index 00000000000..50de03653b1 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/debug_mips.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nmethod.hpp" +#include "runtime/frame.hpp" +#include "runtime/init.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" +#include "utilities/top.hpp" + +#ifndef PRODUCT + +void pd_ps(frame f) { + intptr_t* sp = f.sp(); + intptr_t* prev_sp = sp - 1; + intptr_t *pc = NULL; + intptr_t *next_pc = NULL; + int count = 0; + tty->print("register window backtrace from %#lx:\n", p2i(sp)); +} + +// This function is used to add platform specific info +// to the error reporting code. + +void pd_obfuscate_location(char *buf,int buflen) {} + +#endif // PRODUCT diff --git a/hotspot/src/cpu/mips/vm/depChecker_mips.cpp b/hotspot/src/cpu/mips/vm/depChecker_mips.cpp new file mode 100644 index 00000000000..756ccb68f9c --- /dev/null +++ b/hotspot/src/cpu/mips/vm/depChecker_mips.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/disassembler.hpp" +#include "depChecker_mips.hpp" + +// Nothing to do on mips diff --git a/hotspot/src/cpu/mips/vm/depChecker_mips.hpp b/hotspot/src/cpu/mips/vm/depChecker_mips.hpp new file mode 100644 index 00000000000..11e52b4e8f8 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/depChecker_mips.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP +#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP + +// Nothing to do on MIPS + +#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/disassembler_mips.hpp b/hotspot/src/cpu/mips/vm/disassembler_mips.hpp new file mode 100644 index 00000000000..c5f3a8888dd --- /dev/null +++ b/hotspot/src/cpu/mips/vm/disassembler_mips.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP +#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP + + static int pd_instruction_alignment() { + return sizeof(int); + } + + static const char* pd_cpu_opts() { + return "gpr-names=64"; + } + +#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/frame_mips.cpp b/hotspot/src/cpu/mips/vm/frame_mips.cpp new file mode 100644 index 00000000000..1c928976fc3 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/frame_mips.cpp @@ -0,0 +1,711 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_mips.inline.hpp" + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif + + +// Profiling/safepoint support +// for Profiling - acting on another frame. walks sender frames +// if valid. +// frame profile_find_Java_sender_frame(JavaThread *thread); + +bool frame::safe_for_sender(JavaThread *thread) { + address sp = (address)_sp; + address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers + static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0; + size_t usable_stack_size = thread->stack_size() - stack_guard_size; + + // sp must be within the usable part of the stack (not in guards) + bool sp_safe = (sp < thread->stack_base()) && + (sp >= thread->stack_base() - usable_stack_size); + + + if (!sp_safe) { + return false; + } + + // unextended sp must be within the stack and above or equal sp + bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && + (unextended_sp >= sp); + + if (!unextended_sp_safe) { + return false; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 + bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); + + // We know sp/unextended_sp are safe only fp is questionable here + + // If the current frame is known to the code cache then we can attempt to + // construct the sender and do some validation of it. This goes a long way + // toward eliminating issues when we get in frame construction code + + if (_cb != NULL ) { + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + + intptr_t* sender_sp = NULL; + intptr_t* sender_unextended_sp = NULL; + address sender_pc = NULL; + intptr_t* saved_fp = NULL; + + if (is_interpreted_frame()) { + // fp must be safe + if (!fp_safe) { + return false; + } + + sender_pc = (address) this->fp()[return_addr_offset]; + // for interpreted frames, the value below is the sender "raw" sp, + // which can be different from the sender unextended sp (the sp seen + // by the sender) because of current frame local variables + sender_sp = (intptr_t*) addr_at(sender_sp_offset); + sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; + saved_fp = (intptr_t*) this->fp()[link_offset]; + + } else { + // must be some sort of compiled/runtime frame + // fp does not have to be safe (although it could be check for c1?) + + // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc + if (_cb->frame_size() <= 0) { + return false; + } + + sender_sp = _unextended_sp + _cb->frame_size(); + sender_unextended_sp = sender_sp; + // On MIPS the return_address is always the word on the stack + sender_pc = (address) *(sender_sp-1); + // Note: frame::sender_sp_offset is only valid for compiled frame + saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + + // FP is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP + // is really a frame pointer. + + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + return sender.is_interpreted_frame_valid(thread); + + } + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + // Could be a zombie method + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // Could just be some random pointer within the codeBlob + if (!sender_blob->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp()); + + return jcw_safe; + } + + if (sender_blob->is_nmethod()) { + nmethod* nm = sender_blob->as_nmethod_or_null(); + if (nm != NULL) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) { + return false; + } + } + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because the return address counts against the callee's frame. + + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_nmethod(), "should count return address at least"); + return false; + } + + // We should never be able to see anything here except an nmethod. If something in the + // code cache (current frame) is called by an entity within the code cache that entity + // should not be anything but the call stub (already covered), the interpreter (already covered) + // or an nmethod. + + if (!sender_blob->is_nmethod()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + return true; + } + // Note: fp == NULL is not really a prerequisite for this to be safe to + // walk for c2. However we've modified the code such that if we get + // a failure with fp != NULL that we then try with FP == NULL. + // This is basically to mimic what a last_frame would look like if + // c2 had generated it. + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + + if (!fp_safe) { + return false; + } + + // Will the pc we fetch be non-zero (which we'll find at the oldest frame) + + if ( (address) this->fp()[return_addr_offset] == NULL) return false; + + + // could try and do some more potential verification of native frame if we could think of some... + + return true; + +} + +void frame::patch_pc(Thread* thread, address pc) { + assert(_cb == CodeCache::find_blob(pc), "unexpected pc"); + address* pc_addr = &(((address*) sp())[-1]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + } + + // Either the return address is the original one or we are going to + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; + _cb = CodeCache::find_blob(pc); + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + _deopt_state = is_deoptimized; + // leave _pc as is + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +int frame::frame_size(RegisterMap* map) const { + frame sender = this->sender(map); + return sender.sp() - sp(); +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + // Entry frame's arguments are always in relation to unextended_sp() + return &unextended_sp()[index]; +} + +// sender_sp +#ifdef CC_INTERP +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + // QQQ why does this specialize method exist if frame::sender_sp() does same thing? + // seems odd and if we always know interpreted vs. non then sender_sp() is really + // doing too much work. + return get_interpreterState()->sender_sp(); +} + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return get_interpreterState()->monitor_base(); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + return (BasicObjectLock*) get_interpreterState()->stack_base(); +} + +#else // CC_INTERP + +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return (intptr_t*) at(interpreter_frame_sender_sp_offset); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); +} + + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); + // make sure the pointer points inside the frame + assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); + assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); + return result; +} + +void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; +} + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_last_sp(intptr_t* sp) { + *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; +} +#endif // CC_INTERP + +frame frame::sender_for_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + if (jfa->last_Java_pc() != NULL ) { + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + return fr; + } + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); + return fr; +} + +frame frame::sender_for_interpreter_frame(RegisterMap* map) const { + // sp is the raw sp from the sender after adapter or interpreter extension + intptr_t* sender_sp = this->sender_sp(); + + // This is the sp before any possible extension (adapter/locals). + intptr_t* unextended_sp = interpreter_frame_sender_sp(); + + // The interpreter and compiler(s) always save FP in a known + // location on entry. We must record where that location is + // so this if FP was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves FP if we record where it is then + // we don't have to always save FP on entry and exit to c2 compiled + // code, on entry will be enough. +#ifdef COMPILER2 + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); + } +#endif /* COMPILER2 */ + return frame(sender_sp, unextended_sp, link(), sender_pc()); +} + + +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// +// Verifies the calculated original PC of a deoptimization PC for the +// given unextended SP. The unextended SP might also be the saved SP +// for MethodHandle call sites. +#ifdef ASSERT +void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) { + frame fr; + + // This is ugly but it's better than to change {get,set}_original_pc + // to take an SP value as argument. And it's only a debugging + // method anyway. + fr._unextended_sp = unextended_sp; + + address original_pc = nm->get_original_pc(&fr); + assert(nm->insts_contains(original_pc), "original PC must be in nmethod"); + assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be"); +} +#endif + + +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +void frame::adjust_unextended_sp() { + // On MIPS, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null(); + if (sender_nm != NULL) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_nm->is_deopt_entry(_pc) || + sender_nm->is_deopt_mh_entry(_pc)) { + DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp)); + } + } +} + +//------------------------------------------------------------------------------ +// frame::update_map_with_saved_link +void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { + // The interpreter and compiler(s) always save fp in a known + // location on entry. We must record where that location is + // so that if fp was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves fp if we record where it is then + // we don't have to always save fp on entry and exit to c2 compiled + // code, on entry will be enough. + map->set_location(FP->as_VMReg(), (address) link_addr); + // this is weird "H" ought to be at a higher address however the + // oopMaps seems to have the "H" regs at the same address and the + // vanilla register. + // XXXX make this go away + if (true) { + map->set_location(FP->as_VMReg()->next(), (address) link_addr); + } +} + +//------------------------------sender_for_compiled_frame----------------------- +frame frame::sender_for_compiled_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + + // frame owned by optimizing compiler + assert(_cb->frame_size() >= 0, "must have non-zero frame size"); + + intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); + intptr_t* unextended_sp = sender_sp; + +#ifdef ASSERT + const bool c1_compiled = _cb->is_compiled_by_c1(); + bool native = _cb->is_nmethod() && ((nmethod*)_cb)->is_native_method(); + if (c1_compiled && native) { + assert(sender_sp == fp() + frame::sender_sp_offset, "incorrect frame size"); + } +#endif // ASSERT + // On Intel the return_address is always the word on the stack + // the fp in compiler points to sender fp, but in interpreter, fp points to return address, + // so getting sender for compiled frame is not same as interpreter frame. + // we hard code here temporarily + // spark + address sender_pc = (address) *(sender_sp-1); + + intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); + + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + + // Since the prolog does the save and restore of epb there is no oopmap + // for it so we must fill in its location as if there was an oopmap entry + // since if our caller was compiled code there could be live jvm state in it. + update_map_with_saved_link(map, saved_fp_addr); + } + assert(sender_sp != sp(), "must have changed"); + return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + +frame frame::sender(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map->set_include_argument_oops(false); + + if (is_entry_frame()) return sender_for_entry_frame(map); + if (is_interpreted_frame()) return sender_for_interpreter_frame(map); + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), link(), sender_pc()); +} + + +bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) { + assert(is_interpreted_frame(), "must be interpreter frame"); + Method* method = interpreter_frame_method(); + // When unpacking an optimized frame the frame pointer is + // adjusted with: + int diff = (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; + printf("^^^^^^^^^^^^^^^adjust fp in deopt fp = 0%lx \n", (intptr_t)(fp - diff)); + return _fp == (fp - diff); +} + +void frame::pd_gc_epilog() { + // nothing done here now +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { +// QQQ +#ifdef CC_INTERP +#else + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { + return false; + } + if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { + return false; + } + if (fp() + interpreter_frame_initial_sp_offset < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + + // do some validation of frame elements + + // first the method + + Method* m = *interpreter_frame_method_addr(); + + // validate the method we'd find in this potential sender + if (!m->is_valid_method()) return false; + + // stack frames shouldn't be much larger than max_stack elements + + //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { + if (fp() - sp() > 4096) { // stack frames shouldn't be large. + return false; + } + + // validate bci/bcx + + intptr_t bcx = interpreter_frame_bcx(); + if (m->validate_bci_from_bcx(bcx) < 0) { + return false; + } + + // validate ConstantPoolCache* + + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + + if (cp == NULL || !cp->is_metaspace_object()) return false; + + // validate locals + + address locals = (address) *interpreter_frame_locals_addr(); + + if (locals > thread->stack_base() || locals < (address) fp()) return false; + + // We'd have to be pretty unlucky to be mislead at this point + +#endif // CC_INTERP + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { +#ifdef CC_INTERP + // Needed for JVMTI. The result should always be in the interpreterState object + assert(false, "NYI"); + interpreterState istate = get_interpreterState(); +#endif // CC_INTERP + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + intptr_t* tos_addr; + if (method->is_native()) { + // Prior to calling into the runtime to report the method_exit the possible + // return value is pushed to the native stack. If the result is a jfloat/jdouble + // then ST0 is saved. See the note in generate_native_result + tos_addr = (intptr_t*)sp(); + if (type == T_FLOAT || type == T_DOUBLE) { + tos_addr += 2; + } + } else { + tos_addr = (intptr_t*)interpreter_frame_tos_address(); + } + + switch (type) { + case T_OBJECT : + case T_ARRAY : { + oop obj; + if (method->is_native()) { +#ifdef CC_INTERP + obj = istate->_oop_temp; +#else + obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); +#endif // CC_INTERP + } else { + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; + case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; + case T_CHAR : value_result->c = *(jchar*)tos_addr; break; + case T_SHORT : value_result->s = *(jshort*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + + return type; +} + + +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + return &interpreter_frame_tos_address()[index]; +} + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::name##_offset, #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mdx); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcx); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } +} +#endif + +intptr_t *frame::initial_deoptimization_info() { + // used to reset the saved FP + return fp(); +} + +intptr_t* frame::real_fp() const { + if (_cb != NULL) { + // use the frame size if valid + int size = _cb->frame_size(); + if (size > 0) { + return unextended_sp() + size; + } + } + // else rely on fp() + assert(! is_compiled_frame(), "unknown compiled frame size"); + return fp(); +} + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* sp, void* fp, void* pc) { + init((intptr_t*)sp, (intptr_t*)fp, (address)pc); +} +#endif diff --git a/hotspot/src/cpu/mips/vm/frame_mips.hpp b/hotspot/src/cpu/mips/vm/frame_mips.hpp new file mode 100644 index 00000000000..9e684a8dc34 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/frame_mips.hpp @@ -0,0 +1,229 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP +#define CPU_MIPS_VM_FRAME_MIPS_HPP + +#include "runtime/synchronizer.hpp" +#include "utilities/top.hpp" + +// A frame represents a physical stack frame (an activation). Frames can be +// C or Java frames, and the Java frames can be interpreted or compiled. +// In contrast, vframes represent source-level activations, so that one physical frame +// can correspond to multiple source level frames because of inlining. +// A frame is comprised of {pc, fp, sp} +// ------------------------------ Asm interpreter ---------------------------------------- +// Layout of asm interpreter frame: +// [expression stack ] * <- sp +// [monitors ] \ +// ... | monitor block size +// [monitors ] / +// [monitor block size ] +// [byte code index/pointr] = bcx() bcx_offset +// [pointer to locals ] = locals() locals_offset +// [constant pool cache ] = cache() cache_offset +// [methodData ] = mdp() mdx_offset +// [methodOop ] = method() method_offset +// [last sp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset +// [old frame pointer ] <- fp = link() +// [return pc ] +// [oop temp ] (only for native calls) +// [locals and parameters ] +// <- sender sp +// ------------------------------ Asm interpreter ---------------------------------------- + +// ------------------------------ C++ interpreter ---------------------------------------- +// +// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run) +// +// <- SP (current sp) +// [local variables ] BytecodeInterpreter::run local variables +// ... BytecodeInterpreter::run local variables +// [local variables ] BytecodeInterpreter::run local variables +// [old frame pointer ] fp [ BytecodeInterpreter::run's fp ] +// [return pc ] (return to frame manager) +// [interpreter_state* ] (arg to BytecodeInterpreter::run) -------------- +// [expression stack ] <- last_Java_sp | +// [... ] * <- interpreter_state.stack | +// [expression stack ] * <- interpreter_state.stack_base | +// [monitors ] \ | +// ... | monitor block size | +// [monitors ] / <- interpreter_state.monitor_base | +// [struct interpretState ] <-----------------------------------------| +// [return pc ] (return to callee of frame manager [1] +// [locals and parameters ] +// <- sender sp + +// [1] When the c++ interpreter calls a new method it returns to the frame +// manager which allocates a new frame on the stack. In that case there +// is no real callee of this newly allocated frame. The frame manager is +// aware of the additional frame(s) and will pop them as nested calls +// complete. Howevers tTo make it look good in the debugger the frame +// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation +// with a fake interpreter_state* parameter to make it easy to debug +// nested calls. + +// Note that contrary to the layout for the assembly interpreter the +// expression stack allocated for the C++ interpreter is full sized. +// However this is not as bad as it seems as the interpreter frame_manager +// will truncate the unused space on succesive method calls. +// +// ------------------------------ C++ interpreter ---------------------------------------- + +// Layout of interpreter frame: +// +// [ monitor entry ] <--- sp +// ... +// [ monitor entry ] +// -9 [ monitor block top ] ( the top monitor entry ) +// -8 [ byte code pointer ] (if native, bcp = 0) +// -7 [ constant pool cache ] +// -6 [ methodData ] mdx_offset(not core only) +// -5 [ mirror ] +// -4 [ methodOop ] +// -3 [ locals offset ] +// -2 [ last_sp ] +// -1 [ sender's sp ] +// 0 [ sender's fp ] <--- fp +// 1 [ return address ] +// 2 [ oop temp offset ] (only for native calls) +// 3 [ result handler offset ] (only for native calls) +// 4 [ result type info ] (only for native calls) +// [ local var m-1 ] +// ... +// [ local var 0 ] +// [ argumnet word n-1 ] <--- ( sender's sp ) +// ... +// [ argument word 0 ] <--- S7 + + public: + enum { + pc_return_offset = 0, + // All frames + link_offset = 0, + return_addr_offset = 1, + // non-interpreter frames + sender_sp_offset = 2, + +#ifndef CC_INTERP + + // Interpreter frames + interpreter_frame_return_addr_offset = 1, + interpreter_frame_result_handler_offset = 3, // for native calls only + interpreter_frame_oop_temp_offset = 2, // for native calls only + + interpreter_frame_sender_fp_offset = 0, + interpreter_frame_sender_sp_offset = -1, + // outgoing sp before a call to an invoked method + interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, + interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, + interpreter_frame_mdx_offset = interpreter_frame_method_offset - 1, + interpreter_frame_cache_offset = interpreter_frame_mdx_offset - 1, + interpreter_frame_bcx_offset = interpreter_frame_cache_offset - 1, + interpreter_frame_initial_sp_offset = interpreter_frame_bcx_offset - 1, + + interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, + interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + +#endif // CC_INTERP + + // Entry frames + entry_frame_call_wrapper_offset = -9, + + // Native frames + + native_frame_initial_param_offset = 2 + + }; + + intptr_t ptr_at(int offset) const { + return *ptr_at_addr(offset); + } + + void ptr_at_put(int offset, intptr_t value) { + *ptr_at_addr(offset) = value; + } + + private: + // an additional field beyond _sp and _pc: + intptr_t* _fp; // frame pointer + // The interpreter and adapters will extend the frame of the caller. + // Since oopMaps are based on the sp of the caller before extension + // we need to know that value. However in order to compute the address + // of the return address we need the real "raw" sp. Since sparc already + // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's + // original sp we use that convention. + + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + intptr_t* ptr_at_addr(int offset) const { + return (intptr_t*) addr_at(offset); + } +#ifdef ASSERT + // Used in frame::sender_for_{interpreter,compiled}_frame + static void verify_deopt_original_pc( nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false); + static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) { + verify_deopt_original_pc(nm, unextended_sp, true); + } +#endif + + public: + // Constructors + + frame(intptr_t* sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* fp); + + void init(intptr_t* sp, intptr_t* fp, address pc); + + // accessors for the instance variables + intptr_t* fp() const { return _fp; } + + inline address* sender_pc_addr() const; + + // return address of param, zero origin index. + inline address* native_param_addr(int idx) const; + + // expression stack tos if we are nested in a java call + intptr_t* interpreter_frame_last_sp() const; + + // helper to update a map with callee-saved FP + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + +#ifndef CC_INTERP + // deoptimization support + void interpreter_frame_set_last_sp(intptr_t* sp); +#endif // CC_INTERP + +#ifdef CC_INTERP + inline interpreterState get_interpreterState() const; +#endif // CC_INTERP + +#endif // CPU_MIPS_VM_FRAME_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp b/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp new file mode 100644 index 00000000000..60e56ac7aba --- /dev/null +++ b/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp @@ -0,0 +1,312 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP +#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP + +#include "code/codeCache.hpp" + +// Inline functions for Loongson frames: + +// Constructors: + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _unextended_sp = NULL; + _fp = NULL; + _cb = NULL; + _deopt_state = unknown; +} + +inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { + init(sp, fp, pc); +} + +inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { + _sp = sp; + _unextended_sp = unextended_sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = (address)(sp[-1]); + + // Here's a sticky one. This constructor can be called via AsyncGetCallTrace + // when last_Java_sp is non-null but the pc fetched is junk. If we are truly + // unlucky the junk value could be to a zombied method and we'll die on the + // find_blob call. This is also why we can have no asserts on the validity + // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler + // -> pd_last_frame should use a specialized version of pd_last_frame which could + // call a specilaized frame constructor instead of this one. + // Then we could use the assert below. However this assert is of somewhat dubious + // value. + // assert(_pc != NULL, "no pc?"); + + _cb = CodeCache::find_blob(_pc); + adjust_unextended_sp(); + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +// Accessors + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() + && unextended_sp() == other.unextended_sp() + && fp() == other.fp() + && pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Relationals on frames based +// Return true if the frame is younger (more recent activation) than the frame represented by id +inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() < id ; } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + + + +inline intptr_t* frame::link() const { return (intptr_t*) *(intptr_t **)addr_at(link_offset); } +inline void frame::set_link(intptr_t* addr) { *(intptr_t **)addr_at(link_offset) = addr; } + + +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +// Return address: + +inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } +inline address frame::sender_pc() const { return *sender_pc_addr(); } + +// return address of param, zero origin index. +inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); } + +#ifdef CC_INTERP + +inline interpreterState frame::get_interpreterState() const { + return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize )); +} + +inline intptr_t* frame::sender_sp() const { + // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames? + if (is_interpreted_frame()) { + assert(false, "should never happen"); + return get_interpreterState()->sender_sp(); + } else { + return addr_at(sender_sp_offset); + } +} + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return &(get_interpreterState()->_locals); +} + +inline intptr_t* frame::interpreter_frame_bcx_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return (intptr_t*) &(get_interpreterState()->_bcp); +} + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return &(get_interpreterState()->_constants); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return &(get_interpreterState()->_method); +} + +inline intptr_t* frame::interpreter_frame_mdx_addr() const { + assert(is_interpreted_frame(), "must be interpreted"); + return (intptr_t*) &(get_interpreterState()->_mdx); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + assert(is_interpreted_frame(), "wrong frame type"); + return get_interpreterState()->_stack + 1; +} + +#else // asm interpreter +inline intptr_t* frame::sender_sp() const { return addr_at( sender_sp_offset); } + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**)addr_at(interpreter_frame_locals_offset); +} + +inline intptr_t* frame::interpreter_frame_last_sp() const { + return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); +} + +inline intptr_t* frame::interpreter_frame_bcx_addr() const { + return (intptr_t*)addr_at(interpreter_frame_bcx_offset); +} + + +inline intptr_t* frame::interpreter_frame_mdx_addr() const { + return (intptr_t*)addr_at(interpreter_frame_mdx_offset); +} + + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)addr_at(interpreter_frame_method_offset); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + intptr_t* last_sp = interpreter_frame_last_sp(); + if (last_sp == NULL ) { + return sp(); + } else { + // sp() may have been extended by an adapter + assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); + return last_sp; + } +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + +#endif // CC_INTERP + +inline int frame::pd_oop_map_offset_adjustment() const { + return 0; +} + +inline int frame::interpreter_frame_monitor_size() { + return BasicObjectLock::size(); +} + + +// expression stack +// (the max_stack arguments are used by the GC; see class FrameClosure) + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); + return monitor_end-1; +} + + +inline jint frame::interpreter_frame_expression_stack_direction() { return -1; } + + +// Entry frames + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); +} + +// Compiled frames + +inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) { + return (nof_args - local_index + (local_index < nof_args ? 1: -1)); +} + +inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) { + return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors); +} + +inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) { + return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1); +} + +inline bool frame::volatile_across_calls(Register reg) { + return true; +} + + + +inline oop frame::saved_oop_result(RegisterMap* map) const { + return *((oop*) map->location(V0->as_VMReg())); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + *((oop*) map->location(V0->as_VMReg())) = obj; +} + +#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP diff --git a/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp b/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp new file mode 100644 index 00000000000..bd00a8d473d --- /dev/null +++ b/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP +#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP +// Size of MIPS Instructions +const int BytesPerInstWord = 4; + +const int StackAlignmentInBytes = (2*wordSize); + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are properly extended to 64 bits. +// If set, SharedRuntime::c_calling_convention() must adapt +// signatures accordingly. +const bool CCallingConventionRequiresIntsAsLongs = false; + +#define SUPPORTS_NATIVE_CX8 + +#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/globals_mips.hpp b/hotspot/src/cpu/mips/vm/globals_mips.hpp new file mode 100644 index 00000000000..988bc35137d --- /dev/null +++ b/hotspot/src/cpu/mips/vm/globals_mips.hpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP +#define CPU_MIPS_VM_GLOBALS_MIPS_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +#ifdef CORE +define_pd_global(bool, UseSSE, 0); +#endif /* CORE */ +define_pd_global(bool, ConvertSleepToYield, true); +define_pd_global(bool, ShareVtableStubs, true); +define_pd_global(bool, CountInterpCalls, true); + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast +define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this + +// See 4827828 for this change. There is no globals_core_i486.hpp. I can't +// assign a different value for C2 without touching a number of files. Use +// #ifdef to minimize the change as it's late in Mantis. -- FIXME. +// c1 doesn't have this problem because the fix to 4858033 assures us +// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns +// the uep and the vep doesn't get real alignment but just slops on by +// only assured that the entry instruction meets the 5 byte size requirement. +define_pd_global(intx, CodeEntryAlignment, 16); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); +define_pd_global(intx, InlineSmallCode, 4000); // MIPS generates 3x instructions than X86 + +define_pd_global(uintx, TLABSize, 0); +define_pd_global(uintx, NewSize, 1024 * K); +define_pd_global(intx, PreInflateSpin, 10); + +define_pd_global(intx, PrefetchCopyIntervalInBytes, -1); +define_pd_global(intx, PrefetchScanIntervalInBytes, -1); +define_pd_global(intx, PrefetchFieldsAhead, -1); + +define_pd_global(intx, StackYellowPages, 2); +define_pd_global(intx, StackRedPages, 1); +define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1)); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); +define_pd_global(bool, UseMembar, true); +// GC Ergo Flags +define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread + +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, PreserveFramePointer, false); +// Only c2 cares about this at the moment +define_pd_global(intx, AllocatePrefetchStyle, 2); +define_pd_global(intx, AllocatePrefetchDistance, -1); + +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ + \ + product(bool, UseLEXT1, false, \ + "Use LoongISA general EXTensions 1") \ + \ + product(bool, UseLEXT2, false, \ + "Use LoongISA general EXTensions 2") \ + \ + product(bool, UseLEXT3, false, \ + "Use LoongISA general EXTensions 3") \ + \ + product(bool, UseCodeCacheAllocOpt, true, \ + "Allocate code cache within 32-bit memory address space") \ + \ + product(intx, UseSyncLevel, 10000, \ + "The sync level on Loongson CPUs" \ + "UseSyncLevel == 10000, 111, for all Loongson CPUs, " \ + "UseSyncLevel == 4000, 101, maybe for GS464V" \ + "UseSyncLevel == 3000, 001, maybe for GS464V" \ + "UseSyncLevel == 2000, 011, maybe for GS464E/GS264" \ + "UseSyncLevel == 1000, 110, maybe for GS464") \ + \ + develop(bool, UseBoundCheckInstruction, false, \ + "Use bound check instruction") \ + \ + product(intx, SetFSFOFN, 999, \ + "Set the FS/FO/FN bits in FCSR" \ + "999 means FS/FO/FN will not be changed" \ + "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on") \ + \ + /* assembler */ \ + product(bool, UseCountLeadingZerosInstructionMIPS64, true, \ + "Use count leading zeros instruction") \ + \ + product(bool, UseCountTrailingZerosInstructionMIPS64, false, \ + "Use count trailing zeros instruction") \ + \ + product(bool, UseActiveCoresMP, false, \ + "Eliminate barriers for single active cpu") + +#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp b/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp new file mode 100644 index 00000000000..96ea3453606 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_mips.hpp" +#include "oops/oop.inline.hpp" +#include "oops/oop.inline2.hpp" + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +int InlineCacheBuffer::ic_stub_code_size() { + return NativeMovConstReg::instruction_size + + NativeGeneralJump::instruction_size + + 1; + // so that code_end can be set in CodeBuffer + // 64bit 15 = 6 + 8 bytes + 1 byte + // 32bit 7 = 2 + 4 bytes + 1 byte +} + + +// we use T1 as cached oop(klass) now. this is the target of virtual call, +// when reach here, the receiver in T0 +// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + // note: even though the code contains an embedded oop, we do not need reloc info + // because + // (1) the oop is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear +// assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); +#define __ masm-> + __ patchable_set48(T1, (long)cached_value); + + __ patchable_jump(entry_point); + __ flush(); +#undef __ +} + + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); + return jump->jump_destination(); +} + + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + // creation also verifies the object + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); + // Verifies the jump + NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); + void* o= (void*)move->data(); + return o; +} diff --git a/hotspot/src/cpu/mips/vm/icache_mips.cpp b/hotspot/src/cpu/mips/vm/icache_mips.cpp new file mode 100644 index 00000000000..848964b63f6 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/icache_mips.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" + +void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) +{ +#define __ _masm-> + StubCodeMark mark(this, "ICache", "flush_icache_stub"); + address start = __ pc(); + + __ jr_hb(RA); + __ delayed()->ori(V0, RA2, 0); + + *flush_icache_stub = (ICache::flush_icache_stub_t)start; +#undef __ +} diff --git a/hotspot/src/cpu/mips/vm/icache_mips.hpp b/hotspot/src/cpu/mips/vm/icache_mips.hpp new file mode 100644 index 00000000000..78ee11cc733 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/icache_mips.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP +#define CPU_MIPS_VM_ICACHE_MIPS_HPP + +// Interface for updating the instruction cache. Whenever the VM modifies +// code, part of the processor instruction cache potentially has to be flushed. + +class ICache : public AbstractICache { + public: + enum { + stub_size = 2 * BytesPerInstWord, // Size of the icache flush stub in bytes + line_size = 32, // flush instruction affects a dword + log2_line_size = 5 // log2(line_size) + }; +}; + +#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp new file mode 100644 index 00000000000..ed2d931e94c --- /dev/null +++ b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp @@ -0,0 +1,2084 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interp_masm_mips_64.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiRedefineClassesTrace.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +// Implementation of InterpreterMacroAssembler + +#ifdef CC_INTERP +void InterpreterMacroAssembler::get_method(Register reg) { +} +#endif // CC_INTERP + +void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { + // The runtime address of BCP may be unaligned. + // Refer to the SPARC implementation. + lbu(reg, BCP, offset+1); + lbu(tmp, BCP, offset); + dsll(reg, reg, 8); + daddu(reg, tmp, reg); +} + +void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) { + assert(reg != tmp, "need separate temp register"); + if (offset & 3) { // Offset unaligned? + lbu(reg, BCP, offset+3); + lbu(tmp, BCP, offset+2); + dsll(reg, reg, 8); + daddu(reg, tmp, reg); + lbu(tmp, BCP, offset+1); + dsll(reg, reg, 8); + daddu(reg, tmp, reg); + lbu(tmp, BCP, offset); + dsll(reg, reg, 8); + daddu(reg, tmp, reg); + } else { + lwu(reg, BCP, offset); + } +} + +#ifndef CC_INTERP + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments) { + // interpreter specific + // + // Note: No need to save/restore bcp & locals (r13 & r14) pointer + // since these are callee saved registers and no blocking/ + // GC can happen in leaf calls. + // Further Note: DO NOT save/restore bcp/locals. If a caller has + // already saved them so that it can use BCP/LVP as temporaries + // then a save/restore here will DESTROY the copy the caller + // saved! There used to be a save_bcp() that only happened in + // the ASSERT path (no restore_bcp). Which caused bizarre failures + // when jvm built with ASSERTs. +#ifdef ASSERT + save_bcp(); + { + Label L; + ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); + beq(AT,R0,L); + delayed()->nop(); + stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); + bind(L); + } +#endif + // super call + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); + // interpreter specific + // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals + // but since they may not have been saved (and we don't want to + // save them here (see note above) the assert is invalid. +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // interpreter specific + // + // Note: Could avoid restoring locals ptr (callee saved) - however doesn't + // really make a difference for these runtime calls, since they are + // slow anyway. Btw., bcp must be saved/restored since it may change + // due to GC. + assert(java_thread == noreg , "not expecting a precomputed java thread"); + save_bcp(); +#ifdef ASSERT + { + Label L; + ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); + beq(AT, R0, L); + delayed()->nop(); + stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, + entry_point, number_of_arguments, + check_exceptions); + // interpreter specific + restore_bcp(); + restore_locals(); +} + + +void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { + if (JvmtiExport::can_pop_frame()) { + Label L; + // Initiate popframe handling only if it is not already being + // processed. If the flag has the popframe_processing bit set, it + // means that this code is called *during* popframe handling - we + // don't want to reenter. + // This method is only called just after the call into the vm in + // call_VM_base, so the arg registers are available. + // Not clear if any other register is available, so load AT twice + assert(AT != java_thread, "check"); + lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); + andi(AT, AT, JavaThread::popframe_pending_bit); + beq(AT, R0, L); + delayed()->nop(); + + lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); + andi(AT, AT, JavaThread::popframe_processing_bit); + bne(AT, R0, L); + delayed()->nop(); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + jr(V0); + delayed()->nop(); + bind(L); + } +} + + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + Register thread = T8; +#ifndef OPT_THREAD + get_thread(thread); +#else + move(T8, TREG); +#endif + ld_ptr(thread, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + const Address tos_addr (thread, in_bytes(JvmtiThreadState::earlyret_tos_offset())); + const Address oop_addr (thread, in_bytes(JvmtiThreadState::earlyret_oop_offset())); + const Address val_addr (thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); + //V0, oop_addr,V1,val_addr + switch (state) { + case atos: + ld_ptr(V0, oop_addr); + st_ptr(R0, oop_addr); + verify_oop(V0, state); + break; + case ltos: + ld_ptr(V0, val_addr); // fall through + break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: + lw(V0, val_addr); + break; + case ftos: + lwc1(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); + break; + case dtos: + ldc1(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); + break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + // Clean up tos value in the thread object + move(AT, (int)ilgl); + sw(AT, tos_addr); + sw(R0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset())); +} + + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { + if (JvmtiExport::can_force_early_return()) { + Label L; + Register tmp = T9; + + assert(java_thread != AT, "check"); + assert(java_thread != tmp, "check"); + ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + beq(AT, R0, L); + delayed()->nop(); + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); + move(tmp, JvmtiThreadState::earlyret_pending); + bne(tmp, AT, L); + delayed()->nop(); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); + lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); + move(A0, AT); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); + jr(V0); + delayed()->nop(); + bind(L); + } +} + + +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, + int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + lbu(AT, BCP, bcp_offset); + lbu(reg, BCP, bcp_offset + 1); + ins(reg, AT, 8, 8); +} + + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + get_2_byte_integer_at_bcp(index, AT, bcp_offset); + } else if (index_size == sizeof(u4)) { + assert(EnableInvokeDynamic, "giant index used only for JSR 292"); + get_4_byte_integer_at_bcp(index, AT, bcp_offset); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + nor(index, index, R0); + sll(index, index, 0); + } else if (index_size == sizeof(u1)) { + lbu(index, BCP, bcp_offset); + } else { + ShouldNotReachHere(); + } +} + + +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, + Register index, + int bcp_offset, + size_t index_size) { + assert_different_registers(cache, index); + get_cache_index_at_bcp(index, bcp_offset, index_size); + ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); + shl(index, 2); +} + + +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register index, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) { + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // We use a 32-bit load here since the layout of 64-bit words on + // little-endian machines allow us that. + dsll(AT, index, Address::times_ptr); + daddu(AT, cache, AT); + lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); + if(os::is_MP()) { + sync(); // load acquire + } + + const int shift_count = (1 + byte_no) * BitsPerByte; + assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || + (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), + "correct shift count"); + dsrl(bytecode, bytecode, shift_count); + assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); + move(AT, ConstantPoolCacheEntry::bytecode_1_mask); + andr(bytecode, bytecode, AT); +} + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + assert(cache != tmp, "must use different register"); + get_cache_index_at_bcp(tmp, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); + shl(tmp, 2 + LogBytesPerWord); + ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); + // skip past the header + daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset())); + daddu(cache, cache, tmp); +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register mcs, Label& skip) { + Label has_counters; + ld(mcs, method, in_bytes(Method::method_counters_offset())); + bne(mcs, R0, has_counters); + delayed()->nop(); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method); + ld(mcs, method, in_bytes(Method::method_counters_offset())); + beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory + delayed()->nop(); + bind(has_counters); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index) { + assert_different_registers(result, index); + // convert from field index to resolved_references() index and from + // word index to byte offset. Since this is a java object, it can be compressed + Register tmp = index; // reuse + shl(tmp, LogBytesPerHeapOop); + + get_constant_pool(result); + // load pointer for resolved_references[] objArray + ld(result, result, ConstantPool::resolved_references_offset_in_bytes()); + // JNIHandles::resolve(obj); + ld(result, result, 0); //? is needed? + // Add in the index + daddu(result, result, tmp); + load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); +} + +// Resets LVP to locals. Register sub_klass cannot be any of the above. +void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { + assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); + assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); + assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); + // Profile the not-null value's klass. + // Here T9 and T1 are used as temporary registers. + profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1 + + // Do the check. + check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 + + // Profile the failure of the check. + profile_typecheck_failed(T9); // blows T9 +} + + + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + ld(r, SP, 0); + daddiu(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + lw(r, SP, 0); + daddiu(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_l(Register r) { + ld(r, SP, 0); + daddiu(SP, SP, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_f(FloatRegister r) { + lwc1(r, SP, 0); + daddiu(SP, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop_d(FloatRegister r) { + ldc1(r, SP, 0); + daddiu(SP, SP, 2 * Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + daddiu(SP, SP, - Interpreter::stackElementSize); + sd(r, SP, 0); +} + +void InterpreterMacroAssembler::push_i(Register r) { + // For compatibility reason, don't change to sw. + daddiu(SP, SP, - Interpreter::stackElementSize); + sd(r, SP, 0); +} + +void InterpreterMacroAssembler::push_l(Register r) { + daddiu(SP, SP, -2 * Interpreter::stackElementSize); + sd(r, SP, 0); + sd(R0, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::push_f(FloatRegister r) { + daddiu(SP, SP, - Interpreter::stackElementSize); + swc1(r, SP, 0); +} + +void InterpreterMacroAssembler::push_d(FloatRegister r) { + daddiu(SP, SP, -2 * Interpreter::stackElementSize); + sdc1(r, SP, 0); + sd(R0, SP, Interpreter::stackElementSize); +} + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: pop_i(); break; + case ltos: pop_l(); break; + case ftos: pop_f(); break; + case dtos: pop_d(); break; + case vtos: /* nothing to do */ break; + default: ShouldNotReachHere(); + } + verify_oop(FSR, state); +} + +//FSR=V0,SSR=V1 +void InterpreterMacroAssembler::push(TosState state) { + verify_oop(FSR, state); + switch (state) { + case atos: push_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: push_i(); break; + case ltos: push_l(); break; + case ftos: push_f(); break; + case dtos: push_d(); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + + + +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ld(val, SP, Interpreter::expr_offset_in_bytes(n)); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + sd(val, SP, Interpreter::expr_offset_in_bytes(n)); +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { + // record last_sp + move(Rsender, SP); + sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + if (JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. +#ifndef OPT_THREAD + get_thread(temp); +#else + move(temp, TREG); +#endif + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + lw(AT, temp, in_bytes(JavaThread::interp_only_mode_offset())); + beq(AT, R0, run_compiled_code); + delayed()->nop(); + ld(AT, method, in_bytes(Method::interpreter_entry_offset())); + jr(AT); + delayed()->nop(); + bind(run_compiled_code); + } + + ld(AT, method, in_bytes(Method::from_interpreted_offset())); + jr(AT); + delayed()->nop(); +} + + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. mips64 does not do this. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { + // Nothing mips64 specific to be done here +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +// assume the next bytecode in T8. +void InterpreterMacroAssembler::dispatch_base(TosState state, + address* table, + bool verifyoop) { + if (VerifyActivationFrameSize) { + Label L; + + dsubu(T2, FP, SP); + int min_frame_size = (frame::link_offset - + frame::interpreter_frame_initial_sp_offset) * wordSize; + daddiu(T2, T2,- min_frame_size); + bgez(T2, L); + delayed()->nop(); + stop("broken stack frame"); + bind(L); + } + // FIXME: I do not know which register should pass to verify_oop + if (verifyoop) verify_oop(FSR, state); + dsll(T2, Rnext, LogBytesPerWord); + + if((long)table >= (long)Interpreter::dispatch_table(btos) && + (long)table <= (long)Interpreter::dispatch_table(vtos) + ) { + int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos); + int table_offset = ((int)state - (int)itos) * table_size; + + // GP points to the starting address of Interpreter::dispatch_table(itos). + // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP. + if(table_offset != 0) { + daddiu(T3, GP, table_offset); + if (UseLEXT1) { + gsldx(T3, T2, T3, 0); + } else { + daddu(T3, T2, T3); + ld(T3, T3, 0); + } + } else { + if (UseLEXT1) { + gsldx(T3, T2, GP, 0); + } else { + daddu(T3, T2, GP); + ld(T3, T3, 0); + } + } + } else { + li(T3, (long)table); + if (UseLEXT1) { + gsldx(T3, T2, T3, 0); + } else { + daddu(T3, T2, T3); + ld(T3, T3, 0); + } + } + jr(T3); + delayed()->nop(); +} + +void InterpreterMacroAssembler::dispatch_only(TosState state) { + dispatch_base(state, Interpreter::dispatch_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { + dispatch_base(state, Interpreter::normal_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { + dispatch_base(state, Interpreter::normal_table(state), false); +} + + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step) { + // load next bytecode (load before advancing r13 to prevent AGI) + lbu(Rnext, BCP, step); + increment(BCP, step); + dispatch_base(state, Interpreter::dispatch_table(state)); +} + +void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + // load current bytecode + lbu(Rnext, BCP, 0); + dispatch_base(state, table); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +// used registers : T1, T2, T3, T8 +// T1 : thread, method access flags +// T2 : monitor entry pointer +// T3 : method, monitor top +// T8 : unlock flag +void InterpreterMacroAssembler::remove_activation( + TosState state, + Register ret_addr, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { + // Note: Registers V0, V1 and F0, F1 may be in use for the result + // check if synchronized method + Label unlocked, unlock, no_unlock; + + // get the value of _do_not_unlock_if_synchronized into T8 +#ifndef OPT_THREAD + Register thread = T1; + get_thread(thread); +#else + Register thread = TREG; +#endif + lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + // reset the flag + sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + // get method access flags + ld(T3, FP, frame::interpreter_frame_method_offset * wordSize); + lw(T1, T3, in_bytes(Method::access_flags_offset())); + andi(T1, T1, JVM_ACC_SYNCHRONIZED); + beq(T1, R0, unlocked); + delayed()->nop(); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. + bne(T8, R0, no_unlock); + delayed()->nop(); + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object has + // not been unlocked by an explicit monitorexit bytecode. + daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize + - (int)sizeof(BasicObjectLock)); + // address of first monitor + ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + bne(T1, R0, unlock); + delayed()->nop(); + pop(state); + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + // I think mips do not need empty_FPU_stack + // remove possible return value from FPU-stack, otherwise stack could overflow + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. If requested, + // install an illegal_monitor_state_exception. Continue with + // stack unrolling. + if (install_monitor_exception) { + // remove possible return value from FPU-stack, + // otherwise stack could overflow + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + + } + + b(unlocked); + delayed()->nop(); + } + + bind(unlock); + unlock_object(c_rarg0); + pop(state); + + // Check that for block-structured locking (i.e., that all locked + // objects has been unlocked) + bind(unlocked); + + // V0, V1: Might contain return value + + // Check that all monitors are unlocked + { + Label loop, exception, entry, restart; + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Address monitor_block_top(FP, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + + bind(restart); + // points to current entry, starting with top-most entry + ld(c_rarg0, monitor_block_top); + // points to word before bottom of monitor block + daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + b(entry); + delayed()->nop(); + + // Entry already locked, need to throw exception + bind(exception); + + if (throw_monitor_exception) { + // Throw exception + // remove possible return value from FPU-stack, + // otherwise stack could overflow + empty_FPU_stack(); + MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception + // Unlock does not block, so don't have to worry about the frame + // We don't have to preserve c_rarg0, since we are going to + // throw an exception + + push(state); + unlock_object(c_rarg0); + pop(state); + + if (install_monitor_exception) { + empty_FPU_stack(); + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + } + + b(restart); + delayed()->nop(); + } + + bind(loop); + ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + bne(T1, R0, exception);// check if current entry is used + delayed()->nop(); + + daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry + bind(entry); + bne(c_rarg0, T3, loop); // check if bottom reached + delayed()->nop(); // if not at bottom then check this entry + } + + bind(no_unlock); + + // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + // remove activation + ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize); + ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); +} + +#endif // C_INTERP + +// Lock object +// +// Args: +// c_rarg1: BasicObjectLock to be used for locking +// +// Kills: +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) +// rscratch1, rscratch2 (scratch regs) +void InterpreterMacroAssembler::lock_object(Register lock_reg) { + assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); + + if (UseHeavyMonitors) { + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } else { + Label done; + + const Register swap_reg = T2; // Must use T2 for cmpxchg instruction + const Register obj_reg = T1; // Will contain the oop + + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + + BasicLock::displaced_header_offset_in_bytes(); + + Label slow_case; + + // Load object pointer into obj_reg %T1 + ld(obj_reg, lock_reg, obj_offset); + + if (UseBiasedLocking) { + // Note: we use noreg for the temporary register since it's hard + // to come up with a free register on all incoming code paths + biased_locking_enter(lock_reg, obj_reg, swap_reg, noreg, false, done, &slow_case); + } + + + // Load (object->mark() | 1) into swap_reg %T2 + ld(AT, obj_reg, 0); + ori(swap_reg, AT, 1); + + + // Save (object->mark() | 1) into BasicLock's displaced header + sd(swap_reg, lock_reg, mark_offset); + + assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); + //if (os::is_MP()) { + // lock(); + //} + cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg); + + if (PrintBiasedLockingStatistics) { + Label L; + beq(AT, R0, L); + delayed()->nop(); + push(T0); + push(T1); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); + pop(T1); + pop(T0); + bind(L); + } + + bne(AT, R0, done); + delayed()->nop(); + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) SP <= mark < SP + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg %T2 as the result of cmpxchg + + dsubu(swap_reg, swap_reg, SP); + move(AT, 3 - os::vm_page_size()); + andr(swap_reg, swap_reg, AT); + // Save the test result, for recursive case, the result is zero + sd(swap_reg, lock_reg, mark_offset); + if (PrintBiasedLockingStatistics) { + Label L; + bne(swap_reg, R0, L); + delayed()->nop(); + push(T0); + push(T1); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); + pop(T1); + pop(T0); + bind(L); + } + + beq(swap_reg, R0, done); + delayed()->nop(); + bind(slow_case); + // Call the runtime routine for slow case + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); + + bind(done); + } +} + + +// Unlocks an object. Used in monitorexit bytecode and +// remove_activation. Throws an IllegalMonitorException if object is +// not locked by current thread. +// +// Args: +// c_rarg1: BasicObjectLock for lock +// +// Kills: +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) +// rscratch1, rscratch2 (scratch regs) +// Argument: T6 : Points to BasicObjectLock structure for lock +// Argument: c_rarg0 : Points to BasicObjectLock structure for lock +// Throw an IllegalMonitorException if object is not locked by current thread +void InterpreterMacroAssembler::unlock_object(Register lock_reg) { + assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); + + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); + } else { + Label done; + + const Register swap_reg = T2; // Must use T2 for cmpxchg instruction + const Register header_reg = T3; // Will contain the old oopMark + const Register obj_reg = T1; // Will contain the oop + + save_bcp(); // Save in case of exception + + // Convert from BasicObjectLock structure to object and BasicLock structure + // Store the BasicLock address into %T2 + daddiu(swap_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); + + // Load oop into obj_reg(%T1) + ld(obj_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes ()); + //free entry + sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); + if (UseBiasedLocking) { + biased_locking_exit(obj_reg, header_reg, done); + } + + // Load the old header from BasicLock structure + ld(header_reg, swap_reg, BasicLock::displaced_header_offset_in_bytes()); + // zero for recursive case + beq(header_reg, R0, done); + delayed()->nop(); + + // Atomic swap back the old header + if (os::is_MP()); //lock(); + cmpxchg(header_reg, Address(obj_reg, 0), swap_reg); + + // zero for recursive case + bne(AT, R0, done); + delayed()->nop(); + + // Call the runtime routine for slow case. + sd(obj_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj + call_VM(NOREG, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + lock_reg); + + bind(done); + + restore_bcp(); + } +} + +#ifndef CC_INTERP + +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, + Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ld(mdp, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); + beq(mdp, R0, zero_continue); + delayed()->nop(); +} + + +// Set the method data pointer for the current bcp. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + + // V0 and T0 will be used as two temporary registers. + push2(V0, T0); + + get_method(T0); + // Test MDO to avoid the call if it is NULL. + ld(V0, T0, in_bytes(Method::method_data_offset())); + beq(V0, R0, set_mdp); + delayed()->nop(); + + // method: T0 + // bcp: BCP --> S0 + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); + // mdi: V0 + // mdo is guaranteed to be non-zero here, we checked for it before the call. + get_method(T0); + ld(T0, T0, in_bytes(Method::method_data_offset())); + daddiu(T0, T0, in_bytes(MethodData::data_offset())); + daddu(V0, T0, V0); + bind(set_mdp); + sd(V0, FP, frame::interpreter_frame_mdx_offset * wordSize); + pop2(V0, T0); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + Register method = V0; + Register mdp = V1; + Register tmp = A0; + push(method); + push(mdp); + push(tmp); + test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue + get_method(method); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + lhu(tmp, mdp, in_bytes(DataLayout::bci_offset())); + ld(AT, method, in_bytes(Method::const_offset())); + daddu(tmp, tmp, AT); + daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset())); + beq(tmp, BCP, verify_continue); + delayed()->nop(); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); + bind(verify_continue); + pop(tmp); + pop(mdp); + pop(method); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, + int constant, + Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address data(mdp_in, constant); + sd(value, data); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int constant, + bool decrement) { + // Counter address + Address data(mdp_in, constant); + + increment_mdp_data_at(data, decrement); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Address data, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // %%% this does 64bit counters at best it is wasting space + // at worst it is a rare bug when counters overflow + Register tmp = S0; + push(tmp); + if (decrement) { + // Decrement the register. + ld(AT, data); + daddiu(tmp, AT, (int32_t) -DataLayout::counter_increment); + // If the decrement causes the counter to overflow, stay negative + Label L; + slt(AT, tmp, R0); + bne(AT, R0, L); + delayed()->nop(); + daddiu(tmp, tmp, (int32_t) DataLayout::counter_increment); + bind(L); + sd(tmp, data); + } else { + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + ld(AT, data); + // Increment the register. + daddiu(tmp, AT, DataLayout::counter_increment); + // If the increment causes the counter to overflow, pull back by 1. + slt(AT, tmp, R0); + dsubu(tmp, tmp, AT); + sd(tmp, data); + } + pop(tmp); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + Register reg, + int constant, + bool decrement) { + Register tmp = S0; + push(S0); + if (decrement) { + // Decrement the register. + daddu(AT, mdp_in, reg); + assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); + ld(AT, AT, constant); + + daddiu(tmp, AT, (int32_t) -DataLayout::counter_increment); + // If the decrement causes the counter to overflow, stay negative + Label L; + slt(AT, tmp, R0); + bne(AT, R0, L); + delayed()->nop(); + daddiu(tmp, tmp, (int32_t) DataLayout::counter_increment); + bind(L); + + daddu(AT, mdp_in, reg); + sd(tmp, AT, constant); + } else { + daddu(AT, mdp_in, reg); + assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); + ld(AT, AT, constant); + + // Increment the register. + daddiu(tmp, AT, DataLayout::counter_increment); + // If the increment causes the counter to overflow, pull back by 1. + slt(AT, tmp, R0); + dsubu(tmp, tmp, AT); + + daddu(AT, mdp_in, reg); + sd(tmp, AT, constant); + } + pop(S0); +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, + int flag_byte_constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + int header_offset = in_bytes(DataLayout::header_offset()); + int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); + // Set the flag + lw(AT, Address(mdp_in, header_offset)); + if(Assembler::is_simm16(header_bits)) { + ori(AT, AT, header_bits); + } else { + push(T8); + // T8 is used as a temporary register. + move(T8, header_bits); + orr(AT, AT, T8); + pop(T8); + } + sw(AT, Address(mdp_in, header_offset)); +} + + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if (test_value_out == noreg) { + ld(AT, Address(mdp_in, offset)); + bne(AT, value, not_equal_continue); + delayed()->nop(); + } else { + // Put the test value into a register, so caller can use it: + ld(test_value_out, Address(mdp_in, offset)); + bne(value, test_value_out, not_equal_continue); + delayed()->nop(); + } +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); + ld(AT, mdp_in, offset_of_disp); + daddu(mdp_in, mdp_in, AT); + sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + Register reg, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + daddu(AT, reg, mdp_in); + assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); + ld(AT, AT, offset_of_disp); + daddu(mdp_in, mdp_in, AT); + sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if(Assembler::is_simm16(constant)) { + daddiu(mdp_in, mdp_in, constant); + } else { + move(AT, constant); + daddu(mdp_in, mdp_in, AT); + } + sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + push(return_bci); // save/restore across call_VM + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), + return_bci); + pop(return_bci); +} + + +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, + Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + // We inline increment_mdp_data_at to return bumped_count in a register + //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); + ld(bumped_count, mdp, in_bytes(JumpData::taken_offset())); + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + push(T8); + // T8 is used as a temporary register. + daddiu(T8, bumped_count, DataLayout::counter_increment); + slt(AT, T8, R0); + dsubu(bumped_count, T8, AT); + pop(T8); + sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + + // The method data pointer needs to be updated to correspond to + // the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register mdp, + Register reg2, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + bne(receiver, R0, not_null); + delayed()->nop(); + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + beq(R0, R0, skip_receiver_profile); + delayed()->nop(); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, reg2, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + +// This routine creates a state machine for updating the multi-row +// type profile at a virtual call site (or other type-sensitive bytecode). +// The machine visits each row (of receiver/count) until the receiver type +// is found, or until it runs out of rows. At the same time, it remembers +// the location of the first empty row. (An empty row records null for its +// receiver, and can be allocated for a newly-observed receiver type.) +// Because there are two degrees of freedom in the state, a simple linear +// search will not work; it must be a decision tree. Hence this helper +// function is recursive, to generate the required tree structured code. +// It's the interpreter, so we are trading off code space for speed. +// See below for example code. +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } + return; + } + + int last_row = VirtualCallData::row_limit() - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the receiver and for null. + // Take any of three different outcomes: + // 1. found receiver => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + bool test_for_null_also = (row == start_row); + + // See if the receiver is receiver[n]. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); + test_mdp_data_at(mdp, recvr_offset, receiver, + (test_for_null_also ? reg2 : noreg), + next_test); + // (Reg2 now contains the receiver from the CallData.) + + // The receiver is receiver[n]. Increment count[n]. + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); + increment_mdp_data_at(mdp, count_offset); + beq(R0, R0, done); + delayed()->nop(); + bind(next_test); + + if (test_for_null_also) { + Label found_null; + // Failed the equality check on receiver[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (is_virtual_call) { + beq(reg2, R0, found_null); + delayed()->nop(); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + beq(R0, R0, done); + delayed()->nop(); + bind(found_null); + } else { + bne(reg2, R0, done); + delayed()->nop(); + } + break; + } + // Since null is rare, make it be the branch-taken case. + beq(reg2, R0, found_null); + delayed()->nop(); + + // Put all the "Case 3" tests here. + record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); + + // Found a null. Keep searching for a matching receiver, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching receiver, but we + // observed the receiver[start_row] is NULL. + + // Fill in the receiver field and increment the count. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); + set_mdp_data_at(mdp, recvr_offset, receiver); + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); + move(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { + beq(R0, R0, done); + delayed()->nop(); + } +} + +// Example state machine code for three profile rows: +// // main copy of decision tree, rooted at row[1] +// if (row[0].rec == rec) { row[0].incr(); goto done; } +// if (row[0].rec != NULL) { +// // inner copy of decision tree, rooted at row[1] +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[1].rec != NULL) { +// // degenerate decision tree, rooted at row[2] +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// if (row[2].rec != NULL) { goto done; } // overflow +// row[2].init(rec); goto done; +// } else { +// // remember row[1] is empty +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[1].init(rec); goto done; +// } +// } else { +// // remember row[0] is empty +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[0].init(rec); goto done; +// } +// done: + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, Register reg2, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); + + bind (done); +} + +void InterpreterMacroAssembler::profile_ret(Register return_bci, + Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + for (row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, + in_bytes(RetData::bci_offset(row)), + return_bci, noreg, + next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, + in_bytes(RetData::bci_displacement_offset(row))); + beq(R0, R0, profile_continue); + delayed()->nop(); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, true); + + bind (profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, reg2, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, + in_bytes(MultiBranchData::default_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + in_bytes(MultiBranchData:: + default_displacement_offset())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register mdp, + Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + + // case_array_offset_in_bytes() + move(reg2, in_bytes(MultiBranchData::per_case_size())); + if (UseLEXT1) { + gsdmult(index, index, reg2); + } else { + dmult(index, reg2); + mflo(index); + } + daddiu(index, index, in_bytes(MultiBranchData::case_array_offset())); + + // Update the case count + increment_mdp_data_at(mdp, + index, + in_bytes(MultiBranchData::relative_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + index, + in_bytes(MultiBranchData:: + relative_displacement_offset())); + + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::narrow(Register result) { + + // Get method->_constMethod->_result_type + ld(T9, FP, frame::interpreter_frame_method_offset * wordSize); + ld(T9, T9, in_bytes(Method::const_offset())); + lbu(T9, T9, in_bytes(ConstMethod::result_type_offset())); + + Label done, notBool, notByte, notChar; + + // common case first + addiu(AT, T9, -T_INT); + beq(AT, R0, done); + delayed()->nop(); + + // mask integer result to narrower return type. + addiu(AT, T9, -T_BOOLEAN); + bne(AT, R0, notBool); + delayed()->nop(); + andi(result, result, 0x1); + beq(R0, R0, done); + delayed()->nop(); + + bind(notBool); + addiu(AT, T9, -T_BYTE); + bne(AT, R0, notByte); + delayed()->nop(); + seb(result, result); + beq(R0, R0, done); + delayed()->nop(); + + bind(notByte); + addiu(AT, T9, -T_CHAR); + bne(AT, R0, notChar); + delayed()->nop(); + andi(result, result, 0xFFFF); + beq(R0, R0, done); + delayed()->nop(); + + bind(notChar); + seh(result, result); + + // Nothing to do for T_INT + bind(done); +} + + +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { + Label update, next, none; + + verify_oop(obj); + + if (mdo_addr.index() != noreg) { + guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); + guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); + push(T0); + dsll(T0, mdo_addr.index(), mdo_addr.scale()); + daddu(T0, T0, mdo_addr.base()); + } + + bne(obj, R0, update); + delayed()->nop(); + + if (mdo_addr.index() == noreg) { + ld(AT, mdo_addr); + } else { + ld(AT, T0, mdo_addr.disp()); + } + ori(AT, AT, TypeEntries::null_seen); + if (mdo_addr.index() == noreg) { + sd(AT, mdo_addr); + } else { + sd(AT, T0, mdo_addr.disp()); + } + + beq(R0, R0, next); + delayed()->nop(); + + bind(update); + load_klass(obj, obj); + + if (mdo_addr.index() == noreg) { + ld(AT, mdo_addr); + } else { + ld(AT, T0, mdo_addr.disp()); + } + xorr(obj, obj, AT); + + assert(TypeEntries::type_klass_mask == -4, "must be"); + dextm(AT, obj, 2, 62); + beq(AT, R0, next); + delayed()->nop(); + + andi(AT, obj, TypeEntries::type_unknown); + bne(AT, R0, next); + delayed()->nop(); + + if (mdo_addr.index() == noreg) { + ld(AT, mdo_addr); + } else { + ld(AT, T0, mdo_addr.disp()); + } + beq(AT, R0, none); + delayed()->nop(); + + daddiu(AT, AT, -(TypeEntries::null_seen)); + beq(AT, R0, none); + delayed()->nop(); + + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + if (mdo_addr.index() == noreg) { + ld(AT, mdo_addr); + } else { + ld(AT, T0, mdo_addr.disp()); + } + xorr(obj, obj, AT); + assert(TypeEntries::type_klass_mask == -4, "must be"); + dextm(AT, obj, 2, 62); + beq(AT, R0, next); + delayed()->nop(); + + // different than before. Cannot keep accurate profile. + if (mdo_addr.index() == noreg) { + ld(AT, mdo_addr); + } else { + ld(AT, T0, mdo_addr.disp()); + } + ori(AT, AT, TypeEntries::type_unknown); + if (mdo_addr.index() == noreg) { + sd(AT, mdo_addr); + } else { + sd(AT, T0, mdo_addr.disp()); + } + beq(R0, R0, next); + delayed()->nop(); + + bind(none); + // first time here. Set profile type. + if (mdo_addr.index() == noreg) { + sd(obj, mdo_addr); + } else { + sd(obj, T0, mdo_addr.disp()); + } + + bind(next); + if (mdo_addr.index() != noreg) { + pop(T0); + } +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); + li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); + bne(tmp, AT, profile_continue); + delayed()->nop(); + + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + if (Assembler::is_simm16(off_to_args)) { + daddiu(mdp, mdp, off_to_args); + } else { + move(AT, off_to_args); + daddu(mdp, mdp, AT); + } + + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); + + if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) { + addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); + } else { + li(AT, i*TypeStackSlotEntries::per_arg_count()); + subu32(tmp, tmp, AT); + } + + li(AT, TypeStackSlotEntries::per_arg_count()); + slt(AT, tmp, AT); + bne(AT, R0, done); + delayed()->nop(); + } + ld(tmp, callee, in_bytes(Method::const_offset())); + + lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); + + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list + ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); + subu(tmp, tmp, AT); + + addiu32(tmp, tmp, -1); + + Address arg_addr = argument_address(tmp); + ld(tmp, arg_addr); + + Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp, mdo_arg_addr); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + if (Assembler::is_simm16(to_add)) { + daddiu(mdp, mdp, to_add); + } else { + move(AT, to_add); + daddu(mdp, mdp, AT); + } + + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); + + int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); + if (Assembler::is_simm16(-1 * tmp_arg_counts)) { + addiu32(tmp, tmp, -1 * tmp_arg_counts); + } else { + move(AT, tmp_arg_counts); + subu32(mdp, mdp, AT); + } + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + sll(tmp, tmp, exact_log2(DataLayout::cell_size)); + daddu(mdp, mdp, tmp); + } + sd(mdp, FP, frame::interpreter_frame_mdx_offset * wordSize); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp, _bcp_register); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length + Label do_profile; + lb(tmp, _bcp_register, 0); + daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic); + beq(AT, R0, do_profile); + delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle); + beq(AT, R0, do_profile); + delayed()->nop(); + + get_method(tmp); + lb(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); + li(AT, vmIntrinsics::_compiledLambdaForm); + bne(tmp, AT, profile_continue); + delayed()->nop(); + + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + daddu(tmp, ret, R0); + profile_obj_type(tmp, mdo_ret_addr); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { + guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !"); + + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters + lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); + bltz(tmp1, profile_continue); + delayed()->nop(); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + daddu(mdp, mdp, tmp1); + ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); + decrement(tmp1, TypeStackSlotEntries::per_arg_count()); + + + Label loop; + bind(loop); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); + Address arg_type(mdp, tmp1, per_arg_scale, type_base); + + // load offset on the stack from the slot for this parameter + dsll(AT, tmp1, per_arg_scale); + daddu(AT, AT, mdp); + ld(tmp2, AT, off_base); + + subu(tmp2, R0, tmp2); + + // read the parameter from the local area + dsll(AT, tmp2, Interpreter::stackElementScale()); + daddu(AT, AT, _locals_register); + ld(tmp2, AT, 0); + + // profile the parameter + profile_obj_type(tmp2, arg_type); + + // go to next parameter + decrement(tmp1, TypeStackSlotEntries::per_arg_count()); + bgtz(tmp1, loop); + delayed()->nop(); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { + if (state == atos) { + MacroAssembler::verify_oop(reg); + } +} + +void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { +} +#endif // !CC_INTERP + + +void InterpreterMacroAssembler::notify_method_entry() { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + Register tempreg = T0; +#ifndef OPT_THREAD + get_thread(T8); +#else + move(T8, TREG); +#endif + if (JvmtiExport::can_post_interpreter_events()) { + Label L; + lw(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset())); + beq(tempreg, R0, L); + delayed()->nop(); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_method_entry)); + bind(L); + } + + { + SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); + get_method(S3); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + //Rthread, + T8, + //Rmethod); + S3); + } + +} + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode) { + Register tempreg = T0; +#ifndef OPT_THREAD + get_thread(T8); +#else + move(T8, TREG); +#endif + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label skip; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + // For c++ interpreter the result is always stored at a known location in the frame + // template interpreter will leave it on the top of the stack. + NOT_CC_INTERP(push(state);) + lw(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset())); + beq(tempreg, R0, skip); + delayed()->nop(); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + bind(skip); + NOT_CC_INTERP(pop(state)); + } + + { + // Dtrace notification + SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); + NOT_CC_INTERP(push(state);) + get_method(S3); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + //Rthread, Rmethod); + T8, S3); + NOT_CC_INTERP(pop(state)); + } +} + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, int mask, + Register scratch, bool preloaded, + Condition cond, Label* where) { + assert_different_registers(scratch, AT); + + if (!preloaded) { + lw(scratch, counter_addr); + } + addiu32(scratch, scratch, increment); + sw(scratch, counter_addr); + + move(AT, mask); + andr(scratch, scratch, AT); + + if (cond == Assembler::zero) { + beq(scratch, R0, *where); + delayed()->nop(); + } else { + unimplemented(); + } +} diff --git a/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp new file mode 100644 index 00000000000..a2ebdec3adb --- /dev/null +++ b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP +#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP + +#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" + +// This file specializes the assember with interpreter-specific macros + + +class InterpreterMacroAssembler: public MacroAssembler { +#ifndef CC_INTERP + private: + + Register _locals_register; // register that contains the pointer to the locals + Register _bcp_register; // register that contains the bcp + + protected: + // Interpreter specific version of call_VM_base + virtual void call_VM_leaf_base(address entry_point, + int number_of_arguments); + + virtual void call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions); + + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + // base routine for all dispatches + void dispatch_base(TosState state, address* table, bool verifyoop = true); +#endif // CC_INTERP + + public: + // narrow int return value + void narrow(Register result); + + InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} + + void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); + void get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset); + + void load_earlyret_value(TosState state); + +#ifdef CC_INTERP + void save_bcp() { /* not needed in c++ interpreter and harmless */ } + void restore_bcp() { /* not needed in c++ interpreter and harmless */ } + + // Helpers for runtime call arguments/results + void get_method(Register reg); + +#else + + // Interpreter-specific registers + void save_bcp() { + sd(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize); + } + + void restore_bcp() { + ld(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize); + } + + void restore_locals() { + ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); + } + + // Helpers for runtime call arguments/results + void get_method(Register reg) { + ld(reg, FP, frame::interpreter_frame_method_offset * wordSize); + } + + void get_const(Register reg){ + get_method(reg); + ld(reg, reg, in_bytes(Method::const_offset())); + } + + void get_constant_pool(Register reg) { + get_const(reg); + ld(reg, reg, in_bytes(ConstMethod::constants_offset())); + } + + void get_constant_pool_cache(Register reg) { + get_constant_pool(reg); + ld(reg, reg, ConstantPool::cache_offset_in_bytes()); + } + + void get_cpool_and_tags(Register cpool, Register tags) { + get_constant_pool(cpool); + ld(tags, cpool, ConstantPool::tags_offset_in_bytes()); + } + + void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); + void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_method_counters(Register method, Register mcs, Label& skip); + + // load cpool->resolved_references(index); + void load_resolved_reference_at_index(Register result, Register index); + + void pop_ptr( Register r = FSR); + void pop_i( Register r = FSR); + void pop_l( Register r = FSR); + void pop_f(FloatRegister r = FSF); + void pop_d(FloatRegister r = FSF); + + void push_ptr( Register r = FSR); + void push_i( Register r = FSR); + void push_l( Register r = FSR); + void push_f(FloatRegister r = FSF); + void push_d(FloatRegister r = FSF); + + void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } + + void push(Register r ) { ((MacroAssembler*)this)->push(r); } + + void pop(TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + + void empty_expression_stack() { + ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + // NULL last_sp until next java call + sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + } + + // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. + //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); + + // Dispatching + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + void dispatch_only(TosState state); + void dispatch_only_normal(TosState state); + void dispatch_only_noverify(TosState state); + void dispatch_next(TosState state, int step = 0); + void dispatch_via (TosState state, address* table); + + // jump to an invoked target + void prepare_to_jump_from_interpreted(); + void jump_from_interpreted(Register method, Register temp); + + + // Returning from interpreted functions + // + // Removes the current activation (incl. unlocking of monitors) + // and sets up the return address. This code is also used for + // exception unwindwing. In that case, we do not want to throw + // IllegalMonitorStateExceptions, since that might get us into an + // infinite rethrow exception loop. + // Additionally this code is used for popFrame and earlyReturn. + // In popFrame case we want to skip throwing an exception, + // installing an exception, and notifying jvmdi. + // In earlyReturn case we only want to skip throwing an exception + // and installing an exception. + void remove_activation(TosState state, Register ret_addr, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmdi = true); +#endif // CC_INTERP + + // Object locking + void lock_object (Register lock_reg); + void unlock_object(Register lock_reg); + +#ifndef CC_INTERP + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int constant, Register value); + void increment_mdp_data_at(Address data, bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant, + bool decrement = false); + void increment_mdp_data_at(Register mdp_in, Register reg, int constant, + bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, int mask, + Register scratch, bool preloaded, + Condition cond, Label* where); + void set_mdp_flag_at(Register mdp_in, int flag_constant); + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg2, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call); + + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); + + void profile_taken_branch(Register mdp, Register bumped_count); + void profile_not_taken_branch(Register mdp); + void profile_call(Register mdp); + void profile_final_call(Register mdp); + void profile_virtual_call(Register receiver, Register mdp, + Register scratch2, + bool receiver_can_be_null = false); + void profile_ret(Register return_bci, Register mdp); + void profile_null_seen(Register mdp); + void profile_typecheck(Register mdp, Register klass, Register scratch); + void profile_typecheck_failed(Register mdp); + void profile_switch_default(Register mdp); + void profile_switch_case(Register index_in_scratch, Register mdp, + Register scratch2); + + // Debugging + // only if +VerifyOops && state == atos + void verify_oop(Register reg, TosState state = atos); + // only if +VerifyFPU && (state == ftos || state == dtos) + void verify_FPU(int stack_depth, TosState state = ftos); + + void profile_obj_type(Register obj, const Address& mdo_addr); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); +#endif // !CC_INTERP + + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + + // support for jvmti/dtrace + void notify_method_entry(); + void notify_method_exit(TosState state, NotifyMethodExitMode mode); +}; + +#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP diff --git a/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp new file mode 100644 index 00000000000..26fced492a8 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP +#define CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP + + +// Generation of Interpreter +// + friend class AbstractInterpreterGenerator; + + private: + + address generate_normal_entry(bool synchronized); + address generate_native_entry(bool synchronized); + address generate_abstract_entry(void); + address generate_math_entry(AbstractInterpreter::MethodKind kind); + address generate_empty_entry(void); + address generate_accessor_entry(void); + address generate_Reference_get_entry(); + void lock_method(void); + void generate_stack_overflow_check(void); + + void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); + void generate_counter_overflow(Label* do_continue); + +#endif // CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp b/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp new file mode 100644 index 00000000000..8dec2007c6b --- /dev/null +++ b/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP +#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP + +#include "memory/allocation.hpp" + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + + void move(int from_offset, int to_offset); + + void box(int from_offset, int to_offset); + void pass_int(); + void pass_long(); + void pass_object(); + void pass_float(); + void pass_double(); + + public: + // Creation + SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) { + _masm = new MacroAssembler(buffer); + } + + // Code generation + void generate(uint64_t fingerprint); + + // Code generation support + static Register from(); + static Register to(); + static Register temp(); +}; + +#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp b/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp new file mode 100644 index 00000000000..14b7e39af76 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.inline.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +// Implementation of SignatureHandlerGenerator + +void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { + __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); + __ sd(temp(), to(), to_offset * longSize); +} + +void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { + __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); + __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); + + Label L; + __ bne(AT, R0, L); + __ delayed()->nop(); + __ move(temp(), R0); + __ bind(L); + __ sw(temp(), to(), to_offset * wordSize); +} + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + // generate code to handle arguments + iterate(fingerprint); + // return result handler + __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); + // return + __ jr(RA); + __ delayed()->nop(); + + __ flush(); +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + Argument jni_arg(jni_offset()); + if(jni_arg.is_Register()) { + __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); + } else { + __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); + __ sw(temp(), jni_arg.as_caller_address()); + } +} + +// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + Argument jni_arg(jni_offset()); + if(jni_arg.is_Register()) { + __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + } else { + __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + __ sd(temp(), jni_arg.as_caller_address()); + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + Argument jni_arg(jni_offset()); + + // the handle for a receiver will never be null + bool do_NULL_check = offset() != 0 || is_static(); + if (do_NULL_check) { + __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset())); + __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset())); + __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT); + } else { + __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); + } + + if (!jni_arg.is_Register()) + __ sd(temp(), jni_arg.as_caller_address()); +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + Argument jni_arg(jni_offset()); + if(jni_arg.is_Register()) { + __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset())); + } else { + __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); + __ sw(temp(), jni_arg.as_caller_address()); + } +} + +// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + Argument jni_arg(jni_offset()); + if(jni_arg.is_Register()) { + __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + } else { + __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); + __ sd(temp(), jni_arg.as_caller_address()); + } +} + + +Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } +Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } +Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + + +class SlowSignatureHandler + : public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + intptr_t* _reg_args; + intptr_t* _fp_identifiers; + unsigned int _num_args; + + virtual void pass_int() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_args < Argument::n_register_parameters) { + *_reg_args++ = from_obj; + _num_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_long() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2 * Interpreter::stackElementSize; + + if (_num_args < Argument::n_register_parameters) { + *_reg_args++ = from_obj; + _num_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_object() + { + intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + if (_num_args < Argument::n_register_parameters) { + *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + _num_args++; + } else { + *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + } + } + + virtual void pass_float() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_args < Argument::n_float_register_parameters) { + *_reg_args++ = from_obj; + *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float + _num_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_double() + { + intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + + if (_num_args < Argument::n_float_register_parameters) { + *_reg_args++ = from_obj; + *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double + _num_args++; + } else { + *_to++ = from_obj; + } + } + + public: + SlowSignatureHandler(methodHandle method, address from, intptr_t* to) + : NativeSignatureIterator(method) + { + _from = from; + _to = to; + + // see TemplateInterpreterGenerator::generate_slow_signature_handler() + _reg_args = to - Argument::n_register_parameters + jni_offset() - 1; + _fp_identifiers = to - 1; + *(int*) _fp_identifiers = 0; + _num_args = jni_offset(); + } +}; + + +IRT_ENTRY(address, + InterpreterRuntime::slow_signature_handler(JavaThread* thread, + Method* method, + intptr_t* from, + intptr_t* to)) + methodHandle m(thread, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments + SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); + + // return result handler + return Interpreter::result_handler(m->result_type()); +IRT_END diff --git a/hotspot/src/cpu/mips/vm/interpreter_mips.hpp b/hotspot/src/cpu/mips/vm/interpreter_mips.hpp new file mode 100644 index 00000000000..9a21d704fa3 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/interpreter_mips.hpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_INTERPRETER_MIPS_HPP +#define CPU_MIPS_VM_INTERPRETER_MIPS_HPP + + public: + + // Sentinel placed in the code for interpreter returns so + // that i2c adapters and osr code can recognize an interpreter + // return address and convert the return to a specialized + // block of code to handle compiedl return values and cleaning + // the fpu stack. + static const int return_sentinel; + + static Address::ScaleFactor stackElementScale() { + return Address::times_8; + } + + // Offset from sp (which points to the last stack element) + static int expr_offset_in_bytes(int i) { return stackElementSize * i; } + // Size of interpreter code. Increase if too small. Interpreter will + // fail with a guarantee ("not enough space for interpreter generation"); + // if too small. + // Run with +PrintInterpreterSize to get the VM to print out the size. + // Max size with JVMTI and TaggedStackInterpreter + const static int InterpreterCodeSize = 168 * 1024; +#endif // CPU_MIPS_VM_INTERPRETER_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp b/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp new file mode 100644 index 00000000000..014c8127131 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + + +address AbstractInterpreterGenerator::generate_slow_signature_handler() { + address entry = __ pc(); + + // Rmethod: method + // LVP: pointer to locals + // A3: first stack arg + __ move(A3, SP); + __ daddiu(SP, SP, -10 * wordSize); + __ sd(RA, SP, 0); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::slow_signature_handler), + Rmethod, LVP, A3); + + // V0: result handler + + // Stack layout: + // ... + // 10 stack arg0 <--- old sp + // 9 float/double identifiers + // 8 register arg7 + // ... + // 2 register arg1 + // 1 aligned slot + // SP: 0 return address + + // Do FP first so we can use T3 as temp + __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers + + // A0 is for env. + // If the mothed is not static, A1 will be corrected in generate_native_entry. + for ( int i = 1; i < Argument::n_register_parameters; i++ ) { + Register reg = as_Register(i + A0->encoding()); + FloatRegister floatreg = as_FloatRegister(i + F12->encoding()); + Label isfloatordouble, isdouble, next; + + __ andi(AT, T3, 1 << (i*2)); // Float or Double? + __ bne(AT, R0, isfloatordouble); + __ delayed()->nop(); + + // Do Int register here + __ ld(reg, SP, (1 + i) * wordSize); + __ b (next); + __ delayed()->nop(); + + __ bind(isfloatordouble); + __ andi(AT, T3, 1 << ((i*2)+1)); // Double? + __ bne(AT, R0, isdouble); + __ delayed()->nop(); + + // Do Float Here + __ lwc1(floatreg, SP, (1 + i) * wordSize); + __ b(next); + __ delayed()->nop(); + + // Do Double here + __ bind(isdouble); + __ ldc1(floatreg, SP, (1 + i) * wordSize); + + __ bind(next); + } + + __ ld(RA, SP, 0); + __ daddiu(SP, SP, 10 * wordSize); + __ jr(RA); + __ delayed()->nop(); + return entry; +} + + +// +// Various method entries +// + +address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { + + // Rmethod: methodOop + // V0: scratrch + // Rsender: send 's sp + + if (!InlineIntrinsics) return NULL; // Generate a vanilla entry + + address entry_point = __ pc(); + + // These don't need a safepoint check because they aren't virtually + // callable. We won't enter these intrinsics from compiled code. + // If in the future we added an intrinsic which was virtually callable + // we'd have to worry about how to safepoint so that this code is used. + + // mathematical functions inlined by compiler + // (interpreter must provide identical implementation + // in order to avoid monotonicity bugs when switching + // from interpreter to compiler in the middle of some + // computation) + // + // stack: [ lo(arg) ] <-- sp + // [ hi(arg) ] + { + // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are + // java methods. Interpreter::method_kind(...) will select + // this entry point for the corresponding methods in JDK 1.3. + __ ldc1(F12, SP, 0 * wordSize); + __ ldc1(F13, SP, 1 * wordSize); + __ push2(RA, FP); + __ daddiu(FP, SP, 2 * wordSize); + + // [ fp ] <-- sp + // [ ra ] + // [ lo ] <-- fp + // [ hi ] + //FIXME, need consider this + switch (kind) { + case Interpreter::java_lang_math_sin : + __ trigfunc('s'); + break; + case Interpreter::java_lang_math_cos : + __ trigfunc('c'); + break; + case Interpreter::java_lang_math_tan : + __ trigfunc('t'); + break; + case Interpreter::java_lang_math_sqrt: + __ sqrt_d(F0, F12); + break; + case Interpreter::java_lang_math_abs: + __ abs_d(F0, F12); + break; + case Interpreter::java_lang_math_log: + // Store to stack to convert 80bit precision back to 64bits + break; + case Interpreter::java_lang_math_log10: + // Store to stack to convert 80bit precision back to 64bits + break; + case Interpreter::java_lang_math_pow: + break; + case Interpreter::java_lang_math_exp: + break; + + default : + ShouldNotReachHere(); + } + + // must maintain return value in F0:F1 + __ ld(RA, FP, (-1) * wordSize); + //FIXME + __ ld(FP, FP, (-2) * wordSize); + __ move(SP, Rsender); + __ jr(RA); + __ delayed()->nop(); + } + return entry_point; +} + + +// Abstract method entry +// Attempt to execute abstract method. Throw exception +address InterpreterGenerator::generate_abstract_entry(void) { + + // Rmethod: methodOop + // V0: receiver (unused) + // Rsender : sender 's sp + address entry_point = __ pc(); + + // abstract method entry + // throw exception + // adjust stack to what a normal return would do + __ empty_expression_stack(); + __ restore_bcp(); + __ restore_locals(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + return entry_point; +} + + +// Empty method, generate a very fast return. + +address InterpreterGenerator::generate_empty_entry(void) { + + // Rmethod: methodOop + // V0: receiver (unused) + // Rsender: sender 's sp , must set sp to this value on return , on mips ,now use T0,as it right? + if (!UseFastEmptyMethods) return NULL; + + address entry_point = __ pc(); + + Label slow_path; + __ li(RT0, SafepointSynchronize::address_of_state()); + __ lw(AT, RT0, 0); + __ move(RT0, (SafepointSynchronize::_not_synchronized)); + __ bne(AT, RT0,slow_path); + __ delayed()->nop(); + __ move(SP, Rsender); + __ jr(RA); + __ delayed()->nop(); + __ bind(slow_path); + (void) generate_normal_entry(false); + + return entry_point; + +} + +void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { + + // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in + // the days we had adapter frames. When we deoptimize a situation where a + // compiled caller calls a compiled caller will have registers it expects + // to survive the call to the callee. If we deoptimize the callee the only + // way we can restore these registers is to have the oldest interpreter + // frame that we create restore these values. That is what this routine + // will accomplish. + + // At the moment we have modified c2 to not have any callee save registers + // so this problem does not exist and this routine is just a place holder. + + assert(f->is_interpreted_frame(), "must be interpreted"); +} diff --git a/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp b/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp new file mode 100644 index 00000000000..dccdf6a019c --- /dev/null +++ b/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP +#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP + +private: + + // FP value associated with _last_Java_sp: + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + // fence? + _last_Java_fp = NULL; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // We must clear _last_Java_sp before copying the rest of the new data + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing + // + if (_last_Java_sp != src->_last_Java_sp) + _last_Java_sp = NULL; + + _last_Java_fp = src->_last_Java_fp; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true + _last_Java_sp = src->_last_Java_sp; + } + + // Always walkable + bool walkable(void) { return true; } + // Never any thing to do since we are always walkable and can find address of return addresses + void make_walkable(JavaThread* thread) { } + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + +private: + + static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + +public: + + void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + // Assert (last_Java_sp == NULL || fp == NULL) + void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } + +#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp b/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp new file mode 100644 index 00000000000..0f7dd9424aa --- /dev/null +++ b/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/codeBlob.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +#define BUFFER_SIZE 30*wordSize + +// Instead of issuing lfence for LoadLoad barrier, we create data dependency +// between loads, which is more efficient than lfence. + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + const char *name = NULL; + switch (type) { + case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; + case T_BYTE: name = "jni_fast_GetByteField"; break; + case T_CHAR: name = "jni_fast_GetCharField"; break; + case T_SHORT: name = "jni_fast_GetShortField"; break; + case T_INT: name = "jni_fast_GetIntField"; break; + case T_LONG: name = "jni_fast_GetLongField"; break; + case T_FLOAT: name = "jni_fast_GetFloatField"; break; + case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; + default: ShouldNotReachHere(); + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label slow; + + // return pc RA + // jni env A0 + // obj A1 + // jfieldID A2 + + address counter_addr = SafepointSynchronize::safepoint_counter_addr(); + __ set64(AT, (long)counter_addr); + __ lw(T1, AT, 0); + + // Parameters(A0~A3) should not be modified, since they will be used in slow path + __ andi(AT, T1, 1); + __ bne(AT, R0, slow); + __ delayed()->nop(); + + __ move(T0, A1); + __ clear_jweak_tag(T0); + + __ ld(T0, T0, 0); // unbox, *obj + __ dsrl(T2, A2, 2); // offset + __ daddu(T0, T0, T2); + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); + switch (type) { + case T_BOOLEAN: __ lbu (V0, T0, 0); break; + case T_BYTE: __ lb (V0, T0, 0); break; + case T_CHAR: __ lhu (V0, T0, 0); break; + case T_SHORT: __ lh (V0, T0, 0); break; + case T_INT: __ lw (V0, T0, 0); break; + case T_LONG: __ ld (V0, T0, 0); break; + case T_FLOAT: __ lwc1(F0, T0, 0); break; + case T_DOUBLE: __ ldc1(F0, T0, 0); break; + default: ShouldNotReachHere(); + } + + __ set64(AT, (long)counter_addr); + __ lw(AT, AT, 0); + __ bne(T1, AT, slow); + __ delayed()->nop(); + + __ jr(RA); + __ delayed()->nop(); + + slowcase_entry_pclist[count++] = __ pc(); + __ bind (slow); + address slow_case_addr = NULL; + switch (type) { + case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; + case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; + case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; + case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; + case T_INT: slow_case_addr = jni_GetIntField_addr(); break; + case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; + case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; + case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; + default: ShouldNotReachHere(); + } + __ jmp(slow_case_addr); + __ delayed()->nop(); + + __ flush (); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + return generate_fast_get_int_field0(T_LONG); +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_int_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_int_field0(T_DOUBLE); +} diff --git a/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp b/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp new file mode 100644 index 00000000000..dfcd47b478b --- /dev/null +++ b/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp @@ -0,0 +1,144 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP +#define CPU_MIPS_VM_JNITYPES_MIPS_HPP + +#include "memory/allocation.hpp" +#include "oops/oop.hpp" +#include "prims/jni.h" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_sparc.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +private: + + // 32bit Helper routines. + static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; + *(jint *)(to ) = from[0]; } + static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } + +public: + // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] + // is 8 bytes. + // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. + // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. + // This error occurs in ReflectInvoke.java + // The parameter of DD(int) should be 4 instead of 0x550000004. + // + // See: [runtime/javaCalls.hpp] + + static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to). + // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), + // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. + static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + *(jlong*) (to) = from; + } + + // A long parameter occupies two slot. + // It must fit the layout rule in methodHandle. + // + // See: [runtime/reflection.cpp] Reflection::invoke() + // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + *(jlong*) (to + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + *(jlong*) (to + pos) = *from; + pos += 2; + } + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } + static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } + static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + +#undef _JNI_SLOT_OFFSET +#define _JNI_SLOT_OFFSET 0 + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to). + // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), + // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. + static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + *(jdouble*) (to) = from; + } + + // A long parameter occupies two slot. + // It must fit the layout rule in methodHandle. + // + // See: [runtime/reflection.cpp] Reflection::invoke() + // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + *(jdouble*) (to + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + *(jdouble*) (to + pos) = *from; + pos += 2; + } + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + static inline jint get_int (intptr_t *from) { return *(jint *) from; } + static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } + static inline oop get_obj (intptr_t *from) { return *(oop *) from; } + static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } +#undef _JNI_SLOT_OFFSET +}; + +#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/jni_mips.h b/hotspot/src/cpu/mips/vm/jni_mips.h new file mode 100644 index 00000000000..6714f51d5d6 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/jni_mips.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef _JAVASOFT_JNI_MD_H_ +#define _JAVASOFT_JNI_MD_H_ + +// Note: please do not change these without also changing jni_md.h in the JDK +// repository +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility) + #define JNIEXPORT __attribute__((visibility("default"))) + #define JNIIMPORT __attribute__((visibility("default"))) +#else + #define JNIEXPORT + #define JNIIMPORT +#endif + +#define JNICALL + +typedef int jint; + +typedef long jlong; + +typedef signed char jbyte; + +#endif diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp new file mode 100644 index 00000000000..2b8840ae100 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp @@ -0,0 +1,4332 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +// Implementation of MacroAssembler + +intptr_t MacroAssembler::i[32] = {0}; +float MacroAssembler::f[32] = {0.0}; + +void MacroAssembler::print(outputStream *s) { + unsigned int k; + for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); + } + s->cr(); + + for(k=0; kprint_cr("f%d = %f", k, f[k]); + } + s->cr(); +} + +int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } +int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } + +void MacroAssembler::save_registers(MacroAssembler *masm) { +#define __ masm-> + for(int k=0; k<32; k++) { + __ sw (as_Register(k), A0, i_offset(k)); + } + + for(int k=0; k<32; k++) { + __ swc1 (as_FloatRegister(k), A0, f_offset(k)); + } +#undef __ +} + +void MacroAssembler::restore_registers(MacroAssembler *masm) { +#define __ masm-> + for(int k=0; k<32; k++) { + __ lw (as_Register(k), A0, i_offset(k)); + } + + for(int k=0; k<32; k++) { + __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); + } +#undef __ +} + + +void MacroAssembler::pd_patch_instruction(address branch, address target) { + jint& stub_inst = *(jint*) branch; + jint *pc = (jint *)branch; + + if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) { + //b_far: + // move(AT, RA); // daddu + // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); + // nop(); + // lui(T9, 0); // to be patched + // ori(T9, 0); + // daddu(T9, T9, RA); + // move(RA, AT); + // jr(T9); + + assert(opcode(pc[3]) == lui_op + && opcode(pc[4]) == ori_op + && special(pc[5]) == daddu_op, "Not a branch label patch"); + if(!(opcode(pc[3]) == lui_op + && opcode(pc[4]) == ori_op + && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); } + + int offset = target - branch; + if (!is_simm16(offset)) { + pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); + pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); + } else { + // revert to "beq + nop" + CodeBuffer cb(branch, 4 * 10); + MacroAssembler masm(&cb); +#define __ masm. + __ b(target); + __ delayed()->nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + __ nop(); + } + return; + } else if (special(pc[4]) == jr_op + && opcode(pc[4]) == special_op + && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) { + //jmp_far: + // patchable_set48(T9, target); + // jr(T9); + // nop(); + + CodeBuffer cb(branch, 4 * 4); + MacroAssembler masm(&cb); + masm.patchable_set48(T9, (long)(target)); + return; + } + +#ifndef PRODUCT + if (!is_simm16((target - branch - 4) >> 2)) { + tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target)); + tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch)); + Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty); + tty->print_cr("======= End of decoding ======="); + } +#endif + + stub_inst = patched_branch(target - branch, stub_inst, 0); +} + +static inline address first_cache_address() { + return CodeCache::low_bound() + sizeof(HeapBlock::Header); +} + +static inline address last_cache_address() { + return CodeCache::high_bound() - Assembler::InstructionSize; +} + +int MacroAssembler::call_size(address target, bool far, bool patchable) { + if (patchable) return 6 << Assembler::LogInstructionSize; + if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop + return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize; +} + +// Can we reach target using jal/j from anywhere +// in the code cache (because code can be relocated)? +bool MacroAssembler::reachable_from_cache(address target) { + address cl = first_cache_address(); + address ch = last_cache_address(); + + return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch); +} + +bool MacroAssembler::reachable_from_cache() { + if (ForceUnreachable) { + return false; + } else { + address cl = first_cache_address(); + address ch = last_cache_address(); + + return fit_in_jal(cl, ch); + } +} + +void MacroAssembler::general_jump(address target) { + if (reachable_from_cache(target)) { + j(target); + delayed()->nop(); + } else { + set64(T9, (long)target); + jr(T9); + delayed()->nop(); + } +} + +int MacroAssembler::insts_for_general_jump(address target) { + if (reachable_from_cache(target)) { + //j(target); + //nop(); + return 2; + } else { + //set64(T9, (long)target); + //jr(T9); + //nop(); + return insts_for_set64((jlong)target) + 2; + } +} + +void MacroAssembler::patchable_jump(address target) { + if (reachable_from_cache(target)) { + nop(); + nop(); + nop(); + nop(); + j(target); + delayed()->nop(); + } else { + patchable_set48(T9, (long)target); + jr(T9); + delayed()->nop(); + } +} + +int MacroAssembler::insts_for_patchable_jump(address target) { + return 6; +} + +void MacroAssembler::general_call(address target) { + if (reachable_from_cache(target)) { + jal(target); + delayed()->nop(); + } else { + set64(T9, (long)target); + jalr(T9); + delayed()->nop(); + } +} + +int MacroAssembler::insts_for_general_call(address target) { + if (reachable_from_cache(target)) { + //jal(target); + //nop(); + return 2; + } else { + //set64(T9, (long)target); + //jalr(T9); + //nop(); + return insts_for_set64((jlong)target) + 2; + } +} + +void MacroAssembler::patchable_call(address target) { + if (reachable_from_cache(target)) { + nop(); + nop(); + nop(); + nop(); + jal(target); + delayed()->nop(); + } else { + patchable_set48(T9, (long)target); + jalr(T9); + delayed()->nop(); + } +} + +int MacroAssembler::insts_for_patchable_call(address target) { + return 6; +} + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. + +address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { + assert(JavaThread::current()->is_Compiler_thread(), "just checking"); + assert(entry.rspec().type() == relocInfo::runtime_call_type + || entry.rspec().type() == relocInfo::opt_virtual_call_type + || entry.rspec().type() == relocInfo::static_call_type + || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + + address target = entry.target(); + if (!reachable_from_cache()) { + address stub = emit_trampoline_stub(offset(), target); + if (stub == NULL) { + return NULL; // CodeCache is full + } + } + + if (cbuf) cbuf->set_insts_mark(); + relocate(entry.rspec()); + + if (reachable_from_cache()) { + nop(); + nop(); + nop(); + nop(); + jal(target); + delayed()->nop(); + } else { + // load the call target from the trampoline stub + // branch + long dest = (long)pc(); + dest += (dest & 0x8000) << 1; + lui(T9, dest >> 32); + ori(T9, T9, split_low(dest >> 16)); + dsll(T9, T9, 16); + ld(T9, T9, simm16(split_low(dest))); + jalr(T9); + delayed()->nop(); + } + return pc(); +} + +// Emit a trampoline stub for a call to a target which is too far away. +address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { + // Max stub size: alignment nop, TrampolineStub. + address stub = start_a_stub(NativeInstruction::nop_instruction_size + + NativeCallTrampolineStub::instruction_size); + if (stub == NULL) { + return NULL; // CodeBuffer::expand failed + } + + // Create a trampoline stub relocation which relates this trampoline stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + align(wordSize); + relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + + insts_call_instruction_offset)); + emit_int64((int64_t)dest); + end_a_stub(); + return stub; +} + +void MacroAssembler::beq_far(Register rs, Register rt, address entry) { + u_char * cur_pc = pc(); + + // Near/Far jump + if(is_simm16((entry - pc() - 4) / 4)) { + Assembler::beq(rs, rt, offset(entry)); + } else { + Label not_jump; + bne(rs, rt, not_jump); + delayed()->nop(); + + b_far(entry); + delayed()->nop(); + + bind(not_jump); + has_delay_slot(); + } +} + +void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { + if (L.is_bound()) { + beq_far(rs, rt, target(L)); + } else { + u_char * cur_pc = pc(); + Label not_jump; + bne(rs, rt, not_jump); + delayed()->nop(); + + b_far(L); + delayed()->nop(); + + bind(not_jump); + has_delay_slot(); + } +} + +void MacroAssembler::bne_far(Register rs, Register rt, address entry) { + u_char * cur_pc = pc(); + + //Near/Far jump + if(is_simm16((entry - pc() - 4) / 4)) { + Assembler::bne(rs, rt, offset(entry)); + } else { + Label not_jump; + beq(rs, rt, not_jump); + delayed()->nop(); + + b_far(entry); + delayed()->nop(); + + bind(not_jump); + has_delay_slot(); + } +} + +void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { + if (L.is_bound()) { + bne_far(rs, rt, target(L)); + } else { + u_char * cur_pc = pc(); + Label not_jump; + beq(rs, rt, not_jump); + delayed()->nop(); + + b_far(L); + delayed()->nop(); + + bind(not_jump); + has_delay_slot(); + } +} + +void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { + Label not_taken; + + bne(rs, rt, not_taken); + delayed()->nop(); + + jmp_far(L); + + bind(not_taken); +} + +void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { + Label not_taken; + + beq(rs, rt, not_taken); + delayed()->nop(); + + jmp_far(L); + + bind(not_taken); +} + +void MacroAssembler::bc1t_long(Label& L) { + Label not_taken; + + bc1f(not_taken); + delayed()->nop(); + + jmp_far(L); + + bind(not_taken); +} + +void MacroAssembler::bc1f_long(Label& L) { + Label not_taken; + + bc1t(not_taken); + delayed()->nop(); + + jmp_far(L); + + bind(not_taken); +} + +void MacroAssembler::b_far(Label& L) { + if (L.is_bound()) { + b_far(target(L)); + } else { + volatile address dest = target(L); +// +// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 +// 0x00000055651ed514: daddu at, ra, zero +// 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 +// +// 0x00000055651ed51c: sll zero, zero, 0 +// 0x00000055651ed520: lui t9, 0x0 +// 0x00000055651ed524: ori t9, t9, 0x21b8 +// 0x00000055651ed528: daddu t9, t9, ra +// 0x00000055651ed52c: daddu ra, at, zero +// 0x00000055651ed530: jr t9 +// 0x00000055651ed534: sll zero, zero, 0 +// + move(AT, RA); + emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); + nop(); + lui(T9, 0); // to be patched + ori(T9, T9, 0); + daddu(T9, T9, RA); + move(RA, AT); + jr(T9); + } +} + +void MacroAssembler::b_far(address entry) { + u_char * cur_pc = pc(); + + // Near/Far jump + if(is_simm16((entry - pc() - 4) / 4)) { + b(offset(entry)); + } else { + // address must be bounded + move(AT, RA); + emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); + nop(); + li32(T9, entry - pc()); + daddu(T9, T9, RA); + move(RA, AT); + jr(T9); + } +} + +void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { + addu_long(AT, base, offset); + ld_ptr(rt, AT, 0); +} + +void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { + guarantee(AT != rt, "AT must not equal rt"); + addu_long(AT, base, offset); + st_ptr(rt, AT, 0); +} + +Address MacroAssembler::as_Address(AddressLiteral adr) { + return Address(adr.target(), adr.rspec()); +} + +Address MacroAssembler::as_Address(ArrayAddress adr) { + return Address::make_array(adr); +} + +// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). +void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { + Label again; + + li(tmp_reg1, counter_addr); + bind(again); + if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); + ll(tmp_reg2, tmp_reg1, 0); + addiu(tmp_reg2, tmp_reg2, inc); + sc(tmp_reg2, tmp_reg1, 0); + beq(tmp_reg2, R0, again); + delayed()->nop(); +} + +int MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + bool need_tmp_reg = false; + if (tmp_reg == noreg) { + need_tmp_reg = true; + tmp_reg = T9; + } + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + Address saved_mark_addr(lock_reg, 0); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + ld_ptr(swap_reg, mark_addr); + } + + if (need_tmp_reg) { + push(tmp_reg); + } + move(tmp_reg, swap_reg); + andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); + daddiu(AT, R0, markOopDesc::biased_lock_pattern); + dsubu(AT, AT, tmp_reg); + if (need_tmp_reg) { + pop(tmp_reg); + } + + bne(AT, R0, cas_label); + delayed()->nop(); + + + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + // Note that because there is no current thread register on MIPS we + // need to store off the mark word we read out of the object to + // avoid reloading it and needing to recheck invariants below. This + // store is unfortunate but it makes the overall code shorter and + // simpler. + st_ptr(swap_reg, saved_mark_addr); + if (need_tmp_reg) { + push(tmp_reg); + } + if (swap_reg_contains_mark) { + null_check_offset = offset(); + } + load_prototype_header(tmp_reg, obj_reg); + xorr(tmp_reg, tmp_reg, swap_reg); + get_thread(swap_reg); + xorr(swap_reg, swap_reg, tmp_reg); + + move(AT, ~((int) markOopDesc::age_mask_in_place)); + andr(swap_reg, swap_reg, AT); + + if (PrintBiasedLockingStatistics) { + Label L; + bne(swap_reg, R0, L); + delayed()->nop(); + push(tmp_reg); + push(A0); + atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); + pop(A0); + pop(tmp_reg); + bind(L); + } + if (need_tmp_reg) { + pop(tmp_reg); + } + beq(swap_reg, R0, done); + delayed()->nop(); + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + + move(AT, markOopDesc::biased_lock_mask_in_place); + andr(AT, swap_reg, AT); + bne(AT, R0, try_revoke_bias); + delayed()->nop(); + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + + move(AT, markOopDesc::epoch_mask_in_place); + andr(AT,swap_reg, AT); + bne(AT, R0, try_rebias); + delayed()->nop(); + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + + ld_ptr(swap_reg, saved_mark_addr); + + move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + andr(swap_reg, swap_reg, AT); + + if (need_tmp_reg) { + push(tmp_reg); + } + get_thread(tmp_reg); + orr(tmp_reg, tmp_reg, swap_reg); + //if (os::is_MP()) { + // sync(); + //} + cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + delayed()->nop(); + push(tmp_reg); + push(A0); + atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); + pop(A0); + pop(tmp_reg); + bind(L); + } + if (slow_case != NULL) { + beq_far(AT, R0, *slow_case); + delayed()->nop(); + } + b(done); + delayed()->nop(); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); + get_thread(swap_reg); + orr(tmp_reg, tmp_reg, swap_reg); + ld_ptr(swap_reg, saved_mark_addr); + + //if (os::is_MP()) { + // sync(); + //} + cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + delayed()->nop(); + push(AT); + push(tmp_reg); + atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); + pop(tmp_reg); + pop(AT); + bind(L); + } + if (slow_case != NULL) { + beq_far(AT, R0, *slow_case); + delayed()->nop(); + } + + b(done); + delayed()->nop(); + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + ld_ptr(swap_reg, saved_mark_addr); + + if (need_tmp_reg) { + push(tmp_reg); + } + load_prototype_header(tmp_reg, obj_reg); + //if (os::is_MP()) { + // lock(); + //} + cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg); + if (need_tmp_reg) { + pop(tmp_reg); + } + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (PrintBiasedLockingStatistics) { + Label L; + bne(AT, R0, L); + delayed()->nop(); + push(AT); + push(tmp_reg); + atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); + pop(tmp_reg); + pop(AT); + bind(L); + } + + bind(cas_label); + return null_check_offset; +} + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); + daddiu(AT, R0, markOopDesc::biased_lock_pattern); + + beq(AT, temp_reg, done); + delayed()->nop(); +} + +// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf +// this method will handle the stack problem, you need not to preserve the stack space for the argument now +void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { + Label L, E; + + assert(number_of_arguments <= 4, "just check"); + + andi(AT, SP, 0xf); + beq(AT, R0, L); + delayed()->nop(); + daddiu(SP, SP, -8); + call(entry_point, relocInfo::runtime_call_type); + delayed()->nop(); + daddiu(SP, SP, 8); + b(E); + delayed()->nop(); + + bind(L); + call(entry_point, relocInfo::runtime_call_type); + delayed()->nop(); + bind(E); +} + + +void MacroAssembler::jmp(address entry) { + patchable_set48(T9, (long)entry); + jr(T9); +} + +void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::runtime_call_type: + case relocInfo::none: + jmp(entry); + break; + default: + { + InstructionMark im(this); + relocate(rtype); + patchable_set48(T9, (long)entry); + jr(T9); + } + break; + } +} + +void MacroAssembler::jmp_far(Label& L) { + if (L.is_bound()) { + address entry = target(L); + assert(entry != NULL, "jmp most probably wrong"); + InstructionMark im(this); + + relocate(relocInfo::internal_word_type); + patchable_set48(T9, (long)entry); + } else { + InstructionMark im(this); + L.add_patch_at(code(), locator()); + + relocate(relocInfo::internal_word_type); + patchable_set48(T9, (long)pc()); + } + + jr(T9); + delayed()->nop(); +} +void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { + int oop_index; + if (obj) { + oop_index = oop_recorder()->find_index(obj); + } else { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } + relocate(metadata_Relocation::spec(oop_index)); + patchable_set48(AT, (long)obj); + sd(AT, dst); +} + +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + int oop_index; + if (obj) { + oop_index = oop_recorder()->find_index(obj); + } else { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } + relocate(metadata_Relocation::spec(oop_index)); + patchable_set48(dst, (long)obj); +} + +void MacroAssembler::call(address entry) { +// c/c++ code assume T9 is entry point, so we just always move entry to t9 +// maybe there is some more graceful method to handle this. FIXME +// For more info, see class NativeCall. + patchable_set48(T9, (long)entry); + jalr(T9); +} + +void MacroAssembler::call(address entry, relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::runtime_call_type: + case relocInfo::none: + call(entry); + break; + default: + { + InstructionMark im(this); + relocate(rtype); + call(entry); + } + break; + } +} + +void MacroAssembler::call(address entry, RelocationHolder& rh) +{ + switch (rh.type()) { + case relocInfo::runtime_call_type: + case relocInfo::none: + call(entry); + break; + default: + { + InstructionMark im(this); + relocate(rh); + call(entry); + } + break; + } +} + +void MacroAssembler::ic_call(address entry) { + RelocationHolder rh = virtual_call_Relocation::spec(pc()); + patchable_set48(IC_Klass, (long)Universe::non_oop_word()); + assert(entry != NULL, "call most probably wrong"); + InstructionMark im(this); + trampoline_call(AddressLiteral(entry, rh)); +} + +void MacroAssembler::c2bool(Register r) { + Label L; + Assembler::beq(r, R0, L); + delayed()->nop(); + move(r, 1); + bind(L); +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { + if ( ShowMessageBoxOnError ) { + JavaThreadState saved_state = JavaThread::current()->thread_state(); + JavaThread::current()->set_thread_state(_thread_in_vm); + { + // In order to get locks work, we need to fake a in_VM state + ttyLocker ttyl; + ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + BytecodeCounter::print(); + } + + } + ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); + } + else + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); +} + + +void MacroAssembler::stop(const char* msg) { + li(A0, (long)msg); + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + delayed()->nop(); + brk(17); +} + +void MacroAssembler::warn(const char* msg) { + pushad(); + li(A0, (long)msg); + push(S2); + move(AT, -(StackAlignmentInBytes)); + move(S2, SP); // use S2 as a sender SP holder + andr(SP, SP, AT); // align stack as required by ABI + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + delayed()->nop(); + move(SP, S2); // use S2 as a sender SP holder + pop(S2); + popad(); +} + +void MacroAssembler::increment(Register reg, int imm) { + if (!imm) return; + if (is_simm16(imm)) { + daddiu(reg, reg, imm); + } else { + move(AT, imm); + daddu(reg, reg, AT); + } +} + +void MacroAssembler::decrement(Register reg, int imm) { + increment(reg, -imm); +} + + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + bool check_exceptions) { + call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + if (arg_2!=A2) move(A2, arg_2); + assert(arg_2 != A1, "smashed argument"); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + if (arg_1!=A1) move(A1, arg_1); + if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + if (arg_1 != A1) move(A1, arg_1); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); + if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + + address before_call_pc; + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T2; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + + assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); + + // set last Java frame before call + before_call_pc = (address)pc(); + set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); + + // do the call + move(A0, java_thread); + call(entry_point, relocInfo::runtime_call_type); + delayed()->nop(); + + // restore the thread (cannot use the pushed argument since arguments + // may be overwritten by C code generated by an optimizing compiler); + // however can use the register value directly if it is callee saved. +#ifndef OPT_THREAD + get_thread(java_thread); +#else +#ifdef ASSERT + { + Label L; + get_thread(AT); + beq(java_thread, AT, L); + delayed()->nop(); + stop("MacroAssembler::call_VM_base: TREG not callee saved?"); + bind(L); + } +#endif +#endif + + // discard thread and arguments + ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + // reset last Java frame + reset_last_Java_frame(java_thread, false); + + check_and_handle_popframe(java_thread); + check_and_handle_earlyret(java_thread); + if (check_exceptions) { + // check for pending exceptions (java_thread is set upon return) + Label L; + ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); + beq(AT, R0, L); + delayed()->nop(); + li(AT, before_call_pc); + push(AT); + jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + delayed()->nop(); + bind(L); + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); + sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); + verify_oop(oop_result); + } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + + move(V0, SP); + //we also reserve space for java_thread here + move(AT, -(StackAlignmentInBytes)); + andr(SP, SP, AT); + call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); + +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { + call_VM_leaf_base(entry_point, number_of_arguments); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { + if (arg_0 != A0) move(A0, arg_0); + call_VM_leaf(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + if (arg_0 != A0) move(A0, arg_0); + if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); + call_VM_leaf(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + if (arg_0 != A0) move(A0, arg_0); + if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); + if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); + call_VM_leaf(entry_point, 3); +} +void MacroAssembler::super_call_VM_leaf(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 0); +} + + +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1) { + if (arg_1 != A0) move(A0, arg_1); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + + +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1, + Register arg_2) { + if (arg_1 != A0) move(A0, arg_1); + if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} +void MacroAssembler::super_call_VM_leaf(address entry_point, + Register arg_1, + Register arg_2, + Register arg_3) { + if (arg_1 != A0) move(A0, arg_1); + if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); + if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { +} + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check(offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any (non-CC) registers + // NOTE: cmpl is plenty here to provoke a segv + lw(AT, reg, 0); + } else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +void MacroAssembler::enter() { + push2(RA, FP); + move(FP, SP); +} + +void MacroAssembler::leave() { + move(SP, FP); + pop2(RA, FP); +} + +void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T1; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + // we must set sp to zero to clear frame + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + // must clear fp, so that compiled frames are not confused; it is possible + // that we need it only for debugging + if(clear_fp) { + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); +} + +void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + Register thread = TREG; +#ifndef OPT_THREAD + get_thread(thread); +#endif + // we must set sp to zero to clear frame + sd(R0, Address(thread, JavaThread::last_Java_sp_offset())); + // must clear fp, so that compiled frames are not confused; it is + // possible that we need it only for debugging + if (clear_fp) { + sd(R0, Address(thread, JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + sd(R0, Address(thread, JavaThread::last_Java_pc_offset())); +} + +// Write serialization page so VM thread can do a pseudo remote membar. +// We use the current thread pointer to calculate a thread specific +// offset to write to within the page. This minimizes bus traffic +// due to cache line collision. +void MacroAssembler::serialize_memory(Register thread, Register tmp) { + int mask = os::vm_page_size() - sizeof(int); + assert_different_registers(AT, tmp); + assert(is_uimm(mask, 16), "Not a unsigned 16-bit"); + srl(AT, thread, os::get_serialize_page_shift_count()); + andi(AT, AT, mask); + li(tmp, os::get_memory_serialize_page()); + addu(tmp, tmp, AT); + sw(R0, tmp, 0); +} + +// Calls to C land +// +// When entering C land, the fp, & sp of the last Java frame have to be recorded +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp +// has to be reset to 0. This is required to allow proper stack traversal. +void MacroAssembler::set_last_Java_frame(Register java_thread, + Register last_java_sp, + Register last_java_fp, + address last_java_pc) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifndef OPT_THREAD + java_thread = T2; + get_thread(java_thread); +#else + java_thread = TREG; +#endif + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + + // last_java_fp is optional + if (last_java_fp->is_valid()) { + st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); + } + + // last_java_pc is optional + if (last_java_pc != NULL) { + relocate(relocInfo::internal_word_type); + patchable_set48(AT, (long)last_java_pc); + st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + } + st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc) { + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + + Register thread = TREG; +#ifndef OPT_THREAD + get_thread(thread); +#endif + // last_java_fp is optional + if (last_java_fp->is_valid()) { + sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); + } + + // last_java_pc is optional + if (last_java_pc != NULL) { + relocate(relocInfo::internal_word_type); + patchable_set48(AT, (long)last_java_pc); + st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + } + + sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); +} + +////////////////////////////////////////////////////////////////////////////////// +#if INCLUDE_ALL_GCS + +void MacroAssembler::g1_write_barrier_pre(Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + + assert(thread == TREG, "must be"); + + Label done; + Label runtime; + + assert(pre_val != noreg, "check this code"); + + if (obj != noreg) { + assert_different_registers(obj, pre_val, tmp); + assert(pre_val != V0, "check this code"); + } + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + + // Is marking active? + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + lw(AT, in_progress); + } else { + assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); + lb(AT, in_progress); + } + beq(AT, R0, done); + delayed()->nop(); + + // Do we need to load the previous value? + if (obj != noreg) { + load_heap_oop(pre_val, Address(obj, 0)); + } + + // Is the previous value null? + beq(pre_val, R0, done); + delayed()->nop(); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + ld(tmp, index); + beq(tmp, R0, runtime); + delayed()->nop(); + + daddiu(tmp, tmp, -1 * wordSize); + sd(tmp, index); + ld(AT, buffer); + daddu(tmp, tmp, AT); + + // Record the previous value + sd(pre_val, tmp, 0); + beq(R0, R0, done); + delayed()->nop(); + + bind(runtime); + // save the live input values + if (tosca_live) push(V0); + + if (obj != noreg && obj != V0) push(obj); + + if (pre_val != V0) push(pre_val); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then fp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + + + if (expand_call) { + assert(pre_val != A1, "smashed arg"); + if (thread != A1) move(A1, thread); + if (pre_val != A0) move(A0, pre_val); + MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); + } else { + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); + } + + // save the live input values + if (pre_val != V0) + pop(pre_val); + + if (obj != noreg && obj != V0) + pop(obj); + + if(tosca_live) pop(V0); + + bind(done); +} + +void MacroAssembler::g1_write_barrier_post(Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { + assert(tmp != AT, "must be"); + assert(tmp2 != AT, "must be"); + assert(thread == TREG, "must be"); + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // Does store cross heap regions? + xorr(AT, store_addr, new_val); + dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes); + beq(AT, R0, done); + delayed()->nop(); + + + // crosses regions, storing NULL? + beq(new_val, R0, done); + delayed()->nop(); + + // storing region crossing non-NULL, is card already dirty? + const Register card_addr = tmp; + const Register cardtable = tmp2; + + move(card_addr, store_addr); + dsrl(card_addr, card_addr, CardTableModRefBS::card_shift); + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT + // a valid address and therefore is not properly handled by the relocation code. + set64(cardtable, (intptr_t)ct->byte_map_base); + daddu(card_addr, card_addr, cardtable); + + lb(AT, card_addr, 0); + daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val()); + beq(AT, R0, done); + delayed()->nop(); + + sync(); + lb(AT, card_addr, 0); + daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val()); + beq(AT, R0, done); + delayed()->nop(); + + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + move(AT, (int)CardTableModRefBS::dirty_card_val()); + sb(AT, card_addr, 0); + + lw(AT, queue_index); + beq(AT, R0, runtime); + delayed()->nop(); + daddiu(AT, AT, -1 * wordSize); + sw(AT, queue_index); + ld(tmp2, buffer); + ld(AT, queue_index); + daddu(tmp2, tmp2, AT); + sd(card_addr, tmp2, 0); + beq(R0, R0, done); + delayed()->nop(); + + bind(runtime); + // save the live input values + push(store_addr); + push(new_val); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG); + pop(new_val); + pop(store_addr); + + bind(done); +} + +#endif // INCLUDE_ALL_GCS +////////////////////////////////////////////////////////////////////////////////// + + +void MacroAssembler::store_check(Register obj) { + // Does a store check for the oop in register obj. The content of + // register obj is destroyed afterwards. + store_check_part_1(obj); + store_check_part_2(obj); +} + +void MacroAssembler::store_check(Register obj, Address dst) { + store_check(obj); +} + + +// split the store check operation so that other instructions can be scheduled inbetween +void MacroAssembler::store_check_part_1(Register obj) { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + dsrl(obj, obj, CardTableModRefBS::card_shift); +} + +void MacroAssembler::store_check_part_2(Register obj) { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + set64(AT, (long)ct->byte_map_base); + daddu(AT, AT, obj); + if (UseConcMarkSweepGC) sync(); + sb(R0, AT, 0); +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, + Register t1, Register t2, Label& slow_case) { + assert_different_registers(obj, var_size_in_bytes, t1, t2, AT); + + Register end = t2; +#ifndef OPT_THREAD + Register thread = t1; + get_thread(thread); +#else + Register thread = TREG; +#endif + verify_tlab(t1, t2);//blows t1&t2 + + ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset())); + + if (var_size_in_bytes == NOREG) { + set64(AT, con_size_in_bytes); + addu(end, obj, AT); + } else { + addu(end, obj, var_size_in_bytes); + } + + ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset())); + sltu(AT, AT, end); + bne_far(AT, R0, slow_case); + delayed()->nop(); + + + // update the tlab top pointer + st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset())); + + verify_tlab(t1, t2); +} + +// Defines obj, preserves var_size_in_bytes +void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, + Register t1, Register t2, Label& slow_case) { + assert_different_registers(obj, var_size_in_bytes, t1, AT); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + b_far(slow_case); + delayed()->nop(); + } else { + + Address heap_top(t1); + li(t1, (long)Universe::heap()->top_addr()); + ld_ptr(obj, heap_top); + + Register end = t2; + Label retry; + + bind(retry); + if (var_size_in_bytes == NOREG) { + set64(AT, con_size_in_bytes); + addu(end, obj, AT); + } else { + addu(end, obj, var_size_in_bytes); + } + // if end < obj then we wrapped around => object too long => slow case + sltu(AT, end, obj); + bne_far(AT, R0, slow_case); + delayed()->nop(); + + li(AT, (long)Universe::heap()->end_addr()); + ld_ptr(AT, AT, 0); + sltu(AT, AT, end); + bne_far(AT, R0, slow_case); + delayed()->nop(); + // Compare obj with the top addr, and if still equal, store the new top addr in + // end at the address of the top addr pointer. Sets ZF if was equal, and clears + // it otherwise. Use lock prefix for atomicity on MPs. + //if (os::is_MP()) { + // sync(); + //} + + // if someone beat us on the allocation, try again, otherwise continue + cmpxchg(end, heap_top, obj); + beq_far(AT, R0, retry); + delayed()->nop(); + } +} + +// C2 doesn't invoke this one. +void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) { + Register top = T0; + Register t1 = T1; + Register t2 = T9; + Register t3 = T3; + Register thread_reg = T8; + assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ T2, A4); + Label do_refill, discard_tlab; + + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + b(slow_case); + delayed()->nop(); + } + + get_thread(thread_reg); + + ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); + ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_end_offset())); + + // calculate amount of free space + subu(t1, t1, top); + shr(t1, LogHeapWordSize); + + // Retain tlab and allocate object in shared space if + // the amount free in the tlab is too large to discard. + ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); + slt(AT, t2, t1); + beq(AT, R0, discard_tlab); + delayed()->nop(); + + // Retain + li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment()); + addu(t2, t2, AT); + st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); + + if (TLABStats) { + // increment number of slow_allocations + lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); + addiu(AT, AT, 1); + sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())); + } + b(try_eden); + delayed()->nop(); + + bind(discard_tlab); + if (TLABStats) { + // increment number of refills + lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); + addiu(AT, AT, 1); + sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())); + // accumulate wastage -- t1 is amount free in tlab + lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); + addu(AT, AT, t1); + sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); + } + + // if tlab is currently allocated (top or end != null) then + // fill [top, end + alignment_reserve) with array object + beq(top, R0, do_refill); + delayed()->nop(); + + // set up the mark word + li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2)); + st_ptr(AT, top, oopDesc::mark_offset_in_bytes()); + + // set the length to the remaining space + addiu(t1, t1, - typeArrayOopDesc::header_size(T_INT)); + addiu(t1, t1, ThreadLocalAllocBuffer::alignment_reserve()); + shl(t1, log2_intptr(HeapWordSize/sizeof(jint))); + sw(t1, top, arrayOopDesc::length_offset_in_bytes()); + + // set klass to intArrayKlass + li(AT, (intptr_t)Universe::intArrayKlassObj_addr()); + ld_ptr(t1, AT, 0); + //st_ptr(t1, top, oopDesc::klass_offset_in_bytes()); + store_klass(top, t1); + + ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_start_offset())); + subu(t1, top, t1); + incr_allocated_bytes(thread_reg, t1, 0); + + // refill the tlab with an eden allocation + bind(do_refill); + ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset())); + shl(t1, LogHeapWordSize); + // add object_size ?? + eden_allocate(top, t1, 0, t2, t3, slow_case); + + // Check that t1 was preserved in eden_allocate. +#ifdef ASSERT + if (UseTLAB) { + Label ok; + assert_different_registers(thread_reg, t1); + ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset())); + shl(AT, LogHeapWordSize); + beq(AT, t1, ok); + delayed()->nop(); + stop("assert(t1 != tlab size)"); + should_not_reach_here(); + + bind(ok); + } +#endif + st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset())); + st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset())); + addu(top, top, t1); + addiu(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); + st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset())); + verify_tlab(t1, t2); + b(retry); + delayed()->nop(); +} + +void MacroAssembler::incr_allocated_bytes(Register thread, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1) { + if (!thread->is_valid()) { +#ifndef OPT_THREAD + assert(t1->is_valid(), "need temp reg"); + thread = t1; + get_thread(thread); +#else + thread = TREG; +#endif + } + + ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); + if (var_size_in_bytes->is_valid()) { + addu(AT, AT, var_size_in_bytes); + } else { + addiu(AT, AT, con_size_in_bytes); + } + st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); +} + +static const double pi_4 = 0.7853981633974483; + +// must get argument(a double) in F12/F13 +//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) { +//We need to preseve the register which maybe modified during the Call +void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { + // save all modified register here + // FIXME, in the disassembly of tirgfunc, only used V0, V1, T9, SP, RA, so we ony save V0, V1, T9 + pushad(); + // we should preserve the stack space before we call + addiu(SP, SP, -wordSize * 2); + switch (trig){ + case 's' : + call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type ); + delayed()->nop(); + break; + case 'c': + call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type ); + delayed()->nop(); + break; + case 't': + call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type ); + delayed()->nop(); + break; + default:assert (false, "bad intrinsic"); + break; + + } + + addiu(SP, SP, wordSize * 2); + popad(); +} + +void MacroAssembler::li(Register rd, long imm) { + if (imm <= max_jint && imm >= min_jint) { + li32(rd, (int)imm); + } else if (julong(imm) <= 0xFFFFFFFF) { + assert_not_delayed(); + // lui sign-extends, so we can't use that. + ori(rd, R0, julong(imm) >> 16); + dsll(rd, rd, 16); + ori(rd, rd, split_low(imm)); + } else if ((imm > 0) && is_simm16(imm >> 32)) { + // A 48-bit address + li48(rd, imm); + } else { + li64(rd, imm); + } +} + +void MacroAssembler::li32(Register reg, int imm) { + if (is_simm16(imm)) { + addiu(reg, R0, imm); + } else { + lui(reg, split_low(imm >> 16)); + if (split_low(imm)) + ori(reg, reg, split_low(imm)); + } +} + +void MacroAssembler::set64(Register d, jlong value) { + assert_not_delayed(); + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + if (value == lo) { // 32-bit integer + if (is_simm16(value)) { + daddiu(d, R0, value); + } else { + lui(d, split_low(value >> 16)); + if (split_low(value)) { + ori(d, d, split_low(value)); + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + ori(d, R0, julong(value) >> 16); + dsll(d, d, 16); + if (split_low(value)) { + ori(d, d, split_low(value)); + } + } else if ((value> 0) && is_simm16(value >> 32)) { // li48 + // 4 insts + li48(d, value); + } else { // li64 + // 6 insts + li64(d, value); + } +} + + +int MacroAssembler::insts_for_set64(jlong value) { + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (is_simm16(value)) { + //daddiu(d, R0, value); + count++; + } else { + //lui(d, split_low(value >> 16)); + count++; + if (split_low(value)) { + //ori(d, d, split_low(value)); + count++; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + //ori(d, R0, julong(value) >> 16); + //dsll(d, d, 16); + count += 2; + if (split_low(value)) { + //ori(d, d, split_low(value)); + count++; + } + } else if ((value> 0) && is_simm16(value >> 32)) { // li48 + // 4 insts + //li48(d, value); + count += 4; + } else { // li64 + // 6 insts + //li64(d, value); + count += 6; + } + + return count; +} + +void MacroAssembler::patchable_set48(Register d, jlong value) { + assert_not_delayed(); + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (is_simm16(value)) { + daddiu(d, R0, value); + count += 1; + } else { + lui(d, split_low(value >> 16)); + count += 1; + if (split_low(value)) { + ori(d, d, split_low(value)); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + ori(d, R0, julong(value) >> 16); + dsll(d, d, 16); + count += 2; + if (split_low(value)) { + ori(d, d, split_low(value)); + count += 1; + } + } else if ((value> 0) && is_simm16(value >> 32)) { // li48 + // 4 insts + li48(d, value); + count += 4; + } else { // li64 + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + nop(); + count++; + } +} + +void MacroAssembler::patchable_set32(Register d, jlong value) { + assert_not_delayed(); + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (is_simm16(value)) { + daddiu(d, R0, value); + count += 1; + } else { + lui(d, split_low(value >> 16)); + count += 1; + if (split_low(value)) { + ori(d, d, split_low(value)); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + ori(d, R0, julong(value) >> 16); + dsll(d, d, 16); + count += 2; + if (split_low(value)) { + ori(d, d, split_low(value)); + count += 1; + } + } else { + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 3) { + nop(); + count++; + } +} + +void MacroAssembler::patchable_call32(Register d, jlong value) { + assert_not_delayed(); + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (is_simm16(value)) { + daddiu(d, R0, value); + count += 1; + } else { + lui(d, split_low(value >> 16)); + count += 1; + if (split_low(value)) { + ori(d, d, split_low(value)); + count += 1; + } + } + } else { + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 2) { + nop(); + count++; + } +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert(UseCompressedClassPointers, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + long narrowKlass = (long)Klass::encode_klass(k); + + relocate(rspec, Assembler::narrow_oop_operand); + patchable_set48(dst, narrowKlass); +} + + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { + assert(UseCompressedOops, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + relocate(rspec, Assembler::narrow_oop_operand); + patchable_set48(dst, oop_index); +} + +void MacroAssembler::li64(Register rd, long imm) { + assert_not_delayed(); + lui(rd, split_low(imm >> 48)); + ori(rd, rd, split_low(imm >> 32)); + dsll(rd, rd, 16); + ori(rd, rd, split_low(imm >> 16)); + dsll(rd, rd, 16); + ori(rd, rd, split_low(imm)); +} + +void MacroAssembler::li48(Register rd, long imm) { + assert_not_delayed(); + assert(is_simm16(imm >> 32), "Not a 48-bit address"); + lui(rd, imm >> 32); + ori(rd, rd, split_low(imm >> 16)); + dsll(rd, rd, 16); + ori(rd, rd, split_low(imm)); +} + +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) return; + const char * b = NULL; + stringStream ss; + ss.print("verify_oop: %s: %s", reg->name(), s); + b = code_string(ss.as_string()); + pushad(); + move(A1, reg); + li(A0, (long)b); + li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); + ld(T9, AT, 0); + jalr(T9); + delayed()->nop(); + popad(); +} + + +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + if (!VerifyOops) { + nop(); + return; + } + // Pass register number to verify_oop_subroutine + const char * b = NULL; + stringStream ss; + ss.print("verify_oop_addr: %s", s); + b = code_string(ss.as_string()); + + addiu(SP, SP, - 7 * wordSize); + st_ptr(T0, SP, 6 * wordSize); + st_ptr(T1, SP, 5 * wordSize); + st_ptr(RA, SP, 4 * wordSize); + st_ptr(A0, SP, 3 * wordSize); + st_ptr(A1, SP, 2 * wordSize); + st_ptr(AT, SP, 1 * wordSize); + st_ptr(T9, SP, 0); + + // addr may contain sp so we will have to adjust it based on the + // pushes that we just did. + if (addr.uses(SP)) { + lea(A1, addr); + ld_ptr(A1, Address(A1, 7 * wordSize)); + } else { + ld_ptr(A1, addr); + } + li(A0, (long)b); + // call indirectly to solve generation ordering problem + li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); + ld_ptr(T9, AT, 0); + jalr(T9); + delayed()->nop(); + ld_ptr(T0, SP, 6* wordSize); + ld_ptr(T1, SP, 5* wordSize); + ld_ptr(RA, SP, 4* wordSize); + ld_ptr(A0, SP, 3* wordSize); + ld_ptr(A1, SP, 2* wordSize); + ld_ptr(AT, SP, 1* wordSize); + ld_ptr(T9, SP, 0* wordSize); + addiu(SP, SP, 7 * wordSize); +} + +// used registers : T0, T1 +void MacroAssembler::verify_oop_subroutine() { + // RA: ra + // A0: char* error message + // A1: oop object to verify + + Label exit, error; + // increment counter + li(T0, (long)StubRoutines::verify_oop_count_addr()); + lw(AT, T0, 0); + daddiu(AT, AT, 1); + sw(AT, T0, 0); + + // make sure object is 'reasonable' + beq(A1, R0, exit); // if obj is NULL it is ok + delayed()->nop(); + + // Check if the oop is in the right area of memory + // const int oop_mask = Universe::verify_oop_mask(); + // const int oop_bits = Universe::verify_oop_bits(); + const uintptr_t oop_mask = Universe::verify_oop_mask(); + const uintptr_t oop_bits = Universe::verify_oop_bits(); + li(AT, oop_mask); + andr(T0, A1, AT); + li(AT, oop_bits); + bne(T0, AT, error); + delayed()->nop(); + + // make sure klass is 'reasonable' + // add for compressedoops + reinit_heapbase(); + // add for compressedoops + load_klass(T0, A1); + beq(T0, R0, error); // if klass is NULL it is broken + delayed()->nop(); + // return if everything seems ok + bind(exit); + + jr(RA); + delayed()->nop(); + + // handle errors + bind(error); + pushad(); + call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + delayed()->nop(); + popad(); + jr(RA); + delayed()->nop(); +} + +void MacroAssembler::verify_tlab(Register t1, Register t2) { +#ifdef ASSERT + assert_different_registers(t1, t2, AT); + if (UseTLAB && VerifyOops) { + Label next, ok; + + get_thread(t1); + + ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); + ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); + sltu(AT, t2, AT); + beq(AT, R0, next); + delayed()->nop(); + + stop("assert(top >= start)"); + + bind(next); + ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); + sltu(AT, AT, t2); + beq(AT, R0, ok); + delayed()->nop(); + + stop("assert(top <= end)"); + + bind(ok); + + } +#endif +} + +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterOrConstant(value + offset); + AddressLiteral a(delayed_value_addr); + // load indirectly to solve generation ordering problem + //movptr(tmp, ExternalAddress((address) delayed_value_addr)); + //ld(tmp, a); + if (offset != 0) + daddiu(tmp,tmp, offset); + + return RegisterOrConstant(tmp); +} + +void MacroAssembler::hswap(Register reg) { + //short + //andi(reg, reg, 0xffff); + srl(AT, reg, 8); + sll(reg, reg, 24); + sra(reg, reg, 16); + orr(reg, reg, AT); +} + +void MacroAssembler::huswap(Register reg) { + dsrl(AT, reg, 8); + dsll(reg, reg, 24); + dsrl(reg, reg, 16); + orr(reg, reg, AT); + andi(reg, reg, 0xffff); +} + +// something funny to do this will only one more register AT +// 32 bits +void MacroAssembler::swap(Register reg) { + srl(AT, reg, 8); + sll(reg, reg, 24); + orr(reg, reg, AT); + //reg : 4 1 2 3 + srl(AT, AT, 16); + xorr(AT, AT, reg); + andi(AT, AT, 0xff); + //AT : 0 0 0 1^3); + xorr(reg, reg, AT); + //reg : 4 1 2 1 + sll(AT, AT, 16); + xorr(reg, reg, AT); + //reg : 4 3 2 1 +} + +// do 32-bit CAS using MIPS64 lld/scd +// +// cas_int should only compare 32-bits of the memory value. +// However, lld/scd will do 64-bit operation, which violates the intention of cas_int. +// To simulate a 32-bit atomic operation, the value loaded with LLD should be split into +// tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval, +// plus the high-32 bits or memory value, are stored togethor with SCD. +// +//Example: +// +// double d = 3.1415926; +// System.err.println("hello" + d); +// +// sun.misc.FloatingDecimal$1.() +// | +// `- java.util.concurrent.atomic.AtomicInteger::compareAndSet() +// +// 38 cas_int [a7a7|J] [a0|I] [a6|I] +// a0: 0xffffffffe8ea9f63 pc: 0x55647f3354 +// a6: 0x4ab325aa +// +//again: +// 0x00000055647f3c5c: lld at, 0x0(a7) ; 64-bit load, "0xe8ea9f63" +// +// 0x00000055647f3c60: sll t9, at, 0 ; t9: low-32 bits (sign extended) +// 0x00000055647f3c64: dsrl32 t8, at, 0 ; t8: high-32 bits +// 0x00000055647f3c68: dsll32 t8, t8, 0 +// 0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c ; goto nequal +// 0x00000055647f3c70: sll zero, zero, 0 +// +// 0x00000055647f3c74: ori v1, zero, 0xffffffff ; v1: low-32 bits of newval (sign unextended) +// 0x00000055647f3c78: dsll v1, v1, 16 ; v1 = a6 & 0xFFFFFFFF; +// 0x00000055647f3c7c: ori v1, v1, 0xffffffff +// 0x00000055647f3c80: and v1, a6, v1 +// 0x00000055647f3c84: or at, t8, v1 +// 0x00000055647f3c88: scd at, 0x0(a7) +// 0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c ; goto again +// 0x00000055647f3c90: sll zero, zero, 0 +// 0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac ; goto done +// 0x00000055647f3c98: sll zero, zero, 0 +//nequal: +// 0x00000055647f45a4: daddu a0, t9, zero +// 0x00000055647f45a8: daddu at, zero, zero +//done: +// + +void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) { + // MIPS64 can use ll/sc for 32-bit atomic memory access + Label done, again, nequal; + + bind(again); + + if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); + ll(AT, dest); + bne(AT, c_reg, nequal); + delayed()->nop(); + + move(AT, x_reg); + sc(AT, dest); + beq(AT, R0, again); + delayed()->nop(); + b(done); + delayed()->nop(); + + // not xchged + bind(nequal); + sync(); + move(c_reg, AT); + move(AT, R0); + + bind(done); +} + +void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) { + Label done, again, nequal; + + bind(again); + if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); + lld(AT, dest); + bne(AT, c_reg, nequal); + delayed()->nop(); + + move(AT, x_reg); + scd(AT, dest); + beq(AT, R0, again); + delayed()->nop(); + b(done); + delayed()->nop(); + + // not xchged + bind(nequal); + sync(); + move(c_reg, AT); + move(AT, R0); + + bind(done); +} + +void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { + Label done, again, nequal; + + Register x_reg = x_regLo; + dsll32(x_regHi, x_regHi, 0); + dsll32(x_regLo, x_regLo, 0); + dsrl32(x_regLo, x_regLo, 0); + orr(x_reg, x_regLo, x_regHi); + + Register c_reg = c_regLo; + dsll32(c_regHi, c_regHi, 0); + dsll32(c_regLo, c_regLo, 0); + dsrl32(c_regLo, c_regLo, 0); + orr(c_reg, c_regLo, c_regHi); + + bind(again); + + if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); + lld(AT, dest); + bne(AT, c_reg, nequal); + delayed()->nop(); + + //move(AT, x_reg); + daddu(AT, x_reg, R0); + scd(AT, dest); + beq(AT, R0, again); + delayed()->nop(); + b(done); + delayed()->nop(); + + // not xchged + bind(nequal); + sync(); + //move(c_reg, AT); + //move(AT, R0); + daddu(c_reg, AT, R0); + daddu(AT, R0, R0); + bind(done); +} + +// be sure the three register is different +void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { + assert_different_registers(tmp, fs, ft); + div_s(tmp, fs, ft); + trunc_l_s(tmp, tmp); + cvt_s_l(tmp, tmp); + mul_s(tmp, tmp, ft); + sub_s(fd, fs, tmp); +} + +// be sure the three register is different +void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { + assert_different_registers(tmp, fs, ft); + div_d(tmp, fs, ft); + trunc_l_d(tmp, tmp); + cvt_d_l(tmp, tmp); + mul_d(tmp, tmp, ft); + sub_d(fd, fs, tmp); +} + +// Fast_Lock and Fast_Unlock used by C2 + +// Because the transitions from emitted code to the runtime +// monitorenter/exit helper stubs are so slow it's critical that +// we inline both the stack-locking fast-path and the inflated fast path. +// +// See also: cmpFastLock and cmpFastUnlock. +// +// What follows is a specialized inline transliteration of the code +// in slow_enter() and slow_exit(). If we're concerned about I$ bloat +// another option would be to emit TrySlowEnter and TrySlowExit methods +// at startup-time. These methods would accept arguments as +// (Obj, Self, box, Scratch) and return success-failure +// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply +// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. +// In practice, however, the # of lock sites is bounded and is usually small. +// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer +// if the processor uses simple bimodal branch predictors keyed by EIP +// Since the helper routines would be called from multiple synchronization +// sites. +// +// An even better approach would be write "MonitorEnter()" and "MonitorExit()" +// in java - using j.u.c and unsafe - and just bind the lock and unlock sites +// to those specialized methods. That'd give us a mostly platform-independent +// implementation that the JITs could optimize and inline at their pleasure. +// Done correctly, the only time we'd need to cross to native could would be +// to park() or unpark() threads. We'd also need a few more unsafe operators +// to (a) prevent compiler-JIT reordering of non-volatile accesses, and +// (b) explicit barriers or fence operations. +// +// TODO: +// +// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). +// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. +// Given TLAB allocation, Self is usually manifested in a register, so passing it into +// the lock operators would typically be faster than reifying Self. +// +// * Ideally I'd define the primitives as: +// fast_lock (nax Obj, nax box, tmp, nax scr) where box, tmp and scr are KILLED. +// fast_unlock (nax Obj, box, nax tmp) where box and tmp are KILLED +// Unfortunately ADLC bugs prevent us from expressing the ideal form. +// Instead, we're stuck with a rather awkward and brittle register assignments below. +// Furthermore the register assignments are overconstrained, possibly resulting in +// sub-optimal code near the synchronization site. +// +// * Eliminate the sp-proximity tests and just use "== Self" tests instead. +// Alternately, use a better sp-proximity test. +// +// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. +// Either one is sufficient to uniquely identify a thread. +// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. +// +// * Intrinsify notify() and notifyAll() for the common cases where the +// object is locked by the calling thread but the waitlist is empty. +// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). +// +// * use jccb and jmpb instead of jcc and jmp to improve code density. +// But beware of excessive branch density on AMD Opterons. +// +// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success +// or failure of the fast-path. If the fast-path fails then we pass +// control to the slow-path, typically in C. In Fast_Lock and +// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 +// will emit a conditional branch immediately after the node. +// So we have branches to branches and lots of ICC.ZF games. +// Instead, it might be better to have C2 pass a "FailureLabel" +// into Fast_Lock and Fast_Unlock. In the case of success, control +// will drop through the node. ICC.ZF is undefined at exit. +// In the case of failure, the node will branch directly to the +// FailureLabel + + +// obj: object to lock +// box: on-stack box address (displaced header location) - KILLED +// tmp: tmp -- KILLED +// scr: tmp -- KILLED +void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) { + + // Ensure the register assignents are disjoint + guarantee (objReg != boxReg, "") ; + guarantee (objReg != tmpReg, "") ; + guarantee (objReg != scrReg, "") ; + guarantee (boxReg != tmpReg, "") ; + guarantee (boxReg != scrReg, "") ; + + + block_comment("FastLock"); + if (PrintBiasedLockingStatistics) { + push(tmpReg); + atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg); + pop(tmpReg); + } + + if (EmitSync & 1) { + move(AT, 0x0); + return; + } else + if (EmitSync & 2) { + Label DONE_LABEL ; + if (UseBiasedLocking) { + // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); + } + + ld(tmpReg, Address(objReg, 0)) ; // fetch markword + ori(tmpReg, tmpReg, 0x1); + sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS + + cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg + bne(AT, R0, DONE_LABEL); + delayed()->nop(); + + // Recursive locking + dsubu(tmpReg, tmpReg, SP); + li(AT, (7 - os::vm_page_size() )); + andr(tmpReg, tmpReg, AT); + sd(tmpReg, Address(boxReg, 0)); + bind(DONE_LABEL) ; + } else { + // Possible cases that we'll encounter in fast_lock + // ------------------------------------------------ + // * Inflated + // -- unlocked + // -- Locked + // = by self + // = by other + // * biased + // -- by Self + // -- by other + // * neutral + // * stack-locked + // -- by self + // = sp-proximity test hits + // = sp-proximity test generates false-negative + // -- by other + // + + Label IsInflated, DONE_LABEL, PopDone ; + + // TODO: optimize away redundant LDs of obj->mark and improve the markword triage + // order to reduce the number of conditional branches in the most common cases. + // Beware -- there's a subtle invariant that fetch of the markword + // at [FETCH], below, will never observe a biased encoding (*101b). + // If this invariant is not held we risk exclusion (safety) failure. + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); + } + + ld(tmpReg, Address(objReg, 0)) ; //Fetch the markword of the object. + andi(AT, tmpReg, markOopDesc::monitor_value); + bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias + delayed()->nop(); + + // Attempt stack-locking ... + ori (tmpReg, tmpReg, markOopDesc::unlocked_value); + sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS + //if (os::is_MP()) { + // sync(); + //} + + cmpxchg(boxReg, Address(objReg, 0), tmpReg); // Updates tmpReg + //AT == 1: unlocked + + if (PrintBiasedLockingStatistics) { + Label L; + beq(AT, R0, L); + delayed()->nop(); + push(T0); + push(T1); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); + pop(T1); + pop(T0); + bind(L); + } + bne(AT, R0, DONE_LABEL); + delayed()->nop(); + + // Recursive locking + // The object is stack-locked: markword contains stack pointer to BasicLock. + // Locked by current thread if difference with current SP is less than one page. + dsubu(tmpReg, tmpReg, SP); + li(AT, 7 - os::vm_page_size() ); + andr(tmpReg, tmpReg, AT); + sd(tmpReg, Address(boxReg, 0)); + if (PrintBiasedLockingStatistics) { + Label L; + // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ + bne(tmpReg, R0, L); + delayed()->nop(); + push(T0); + push(T1); + atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1); + pop(T1); + pop(T0); + bind(L); + } + sltiu(AT, tmpReg, 1); // AT = (tmpReg == 0) ? 1 : 0 + + b(DONE_LABEL) ; + delayed()->nop(); + + bind(IsInflated) ; + // The object's monitor m is unlocked iff m->owner == NULL, + // otherwise m->owner may contain a thread or a stack address. + + // TODO: someday avoid the ST-before-CAS penalty by + // relocating (deferring) the following ST. + // We should also think about trying a CAS without having + // fetched _owner. If the CAS is successful we may + // avoid an RTO->RTS upgrade on the $line. + // Without cast to int32_t a movptr will destroy r10 which is typically obj + li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); + sd(AT, Address(boxReg, 0)); + + move(boxReg, tmpReg) ; + ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; + // if (m->owner != 0) => AT = 0, goto slow path. + move(AT, R0); + bne(tmpReg, R0, DONE_LABEL); + delayed()->nop(); + +#ifndef OPT_THREAD + get_thread (TREG) ; +#endif + // It's inflated and appears unlocked + //if (os::is_MP()) { + // sync(); + //} + cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ; + // Intentional fall-through into DONE_LABEL ... + + + // DONE_LABEL is a hot target - we'd really like to place it at the + // start of cache line by padding with NOPs. + // See the AMD and Intel software optimization manuals for the + // most efficient "long" NOP encodings. + // Unfortunately none of our alignment mechanisms suffice. + bind(DONE_LABEL); + + // At DONE_LABEL the AT is set as follows ... + // Fast_Unlock uses the same protocol. + // AT == 1 -> Success + // AT == 0 -> Failure - force control through the slow-path + + // Avoid branch-to-branch on AMD processors + // This appears to be superstition. + if (EmitSync & 32) nop() ; + + } +} + +// obj: object to unlock +// box: box address (displaced header location), killed. +// tmp: killed tmp; cannot be obj nor box. +// +// Some commentary on balanced locking: +// +// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. +// Methods that don't have provably balanced locking are forced to run in the +// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. +// The interpreter provides two properties: +// I1: At return-time the interpreter automatically and quietly unlocks any +// objects acquired the current activation (frame). Recall that the +// interpreter maintains an on-stack list of locks currently held by +// a frame. +// I2: If a method attempts to unlock an object that is not held by the +// the frame the interpreter throws IMSX. +// +// Lets say A(), which has provably balanced locking, acquires O and then calls B(). +// B() doesn't have provably balanced locking so it runs in the interpreter. +// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O +// is still locked by A(). +// +// The only other source of unbalanced locking would be JNI. The "Java Native Interface: +// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter +// should not be unlocked by "normal" java-level locking and vice-versa. The specification +// doesn't specify what will occur if a program engages in such mixed-mode locking, however. + +void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) { + + guarantee (objReg != boxReg, "") ; + guarantee (objReg != tmpReg, "") ; + guarantee (boxReg != tmpReg, "") ; + + block_comment("FastUnlock"); + + + if (EmitSync & 4) { + // Disable - inhibit all inlining. Force control through the slow-path + move(AT, 0x0); + return; + } else + if (EmitSync & 8) { + Label DONE_LABEL ; + if (UseBiasedLocking) { + biased_locking_exit(objReg, tmpReg, DONE_LABEL); + } + // classic stack-locking code ... + ld(tmpReg, Address(boxReg, 0)) ; + beq(tmpReg, R0, DONE_LABEL) ; + move(AT, 0x1); // delay slot + + cmpxchg(tmpReg, Address(objReg, 0), boxReg); + bind(DONE_LABEL); + } else { + Label DONE_LABEL, Stacked, CheckSucc, Inflated ; + + // Critically, the biased locking test must have precedence over + // and appear before the (box->dhw == 0) recursive stack-lock test. + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_exit(objReg, tmpReg, DONE_LABEL); + } + + ld(AT, Address(boxReg, 0)) ; // Examine the displaced header + beq(AT, R0, DONE_LABEL) ; // 0 indicates recursive stack-lock + delayed()->daddiu(AT, R0, 0x1); + + ld(tmpReg, Address(objReg, 0)) ; // Examine the object's markword + andi(AT, tmpReg, markOopDesc::monitor_value) ; // Inflated? + beq(AT, R0, Stacked) ; // Inflated? + delayed()->nop(); + + bind(Inflated) ; + // It's inflated. + // Despite our balanced locking property we still check that m->_owner == Self + // as java routines or native JNI code called by this thread might + // have released the lock. + // Refer to the comments in synchronizer.cpp for how we might encode extra + // state in _succ so we can avoid fetching EntryList|cxq. + // + // I'd like to add more cases in fast_lock() and fast_unlock() -- + // such as recursive enter and exit -- but we have to be wary of + // I$ bloat, T$ effects and BP$ effects. + // + // If there's no contention try a 1-0 exit. That is, exit without + // a costly MEMBAR or CAS. See synchronizer.cpp for details on how + // we detect and recover from the race that the 1-0 exit admits. + // + // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier + // before it STs null into _owner, releasing the lock. Updates + // to data protected by the critical section must be visible before + // we drop the lock (and thus before any other thread could acquire + // the lock and observe the fields protected by the lock). +#ifndef OPT_THREAD + get_thread (TREG) ; +#endif + + // It's inflated + ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; + xorr(boxReg, boxReg, TREG); + + ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ; + orr(boxReg, boxReg, AT); + + move(AT, R0); + bne(boxReg, R0, DONE_LABEL); + delayed()->nop(); + + ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; + ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; + orr(boxReg, boxReg, AT); + + move(AT, R0); + bne(boxReg, R0, DONE_LABEL); + delayed()->nop(); + + sync(); + sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; + move(AT, 0x1); + b(DONE_LABEL); + delayed()->nop(); + + bind (Stacked); + ld(tmpReg, Address(boxReg, 0)) ; + //if (os::is_MP()) { sync(); } + cmpxchg(tmpReg, Address(objReg, 0), boxReg); + + if (EmitSync & 65536) { + bind (CheckSucc); + } + + bind(DONE_LABEL); + + // Avoid branch to branch on AMD processors + if (EmitSync & 32768) { nop() ; } + } +} + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); +} + + +void MacroAssembler::verify_FPU(int stack_depth, const char* s) { + //Unimplemented(); +} + +Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; +Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; + +//In MIPS64, F0~23 are all caller-saved registers +FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; + +// We preserve all caller-saved register +void MacroAssembler::pushad(){ + int i; + + // Fixed-point registers + int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); + daddiu(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) + { + sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize); + } + + // Floating-point registers + len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + daddiu(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) + { + sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } +}; + +void MacroAssembler::popad(){ + int i; + + // Floating-point registers + int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + for (i = 0; i < len; i++) + { + ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } + daddiu(SP, SP, len * wordSize); + + // Fixed-point registers + len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); + for (i = 0; i < len; i++) + { + ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize); + } + daddiu(SP, SP, len * wordSize); +}; + +// We preserve all caller-saved register except V0 +void MacroAssembler::pushad_except_v0() { + int i; + + // Fixed-point registers + int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); + daddiu(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); + } + + // Floating-point registers + len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + daddiu(SP, SP, -1 * len * wordSize); + for (i = 0; i < len; i++) { + sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } +} + +void MacroAssembler::popad_except_v0() { + int i; + + // Floating-point registers + int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); + for (i = 0; i < len; i++) { + ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); + } + daddiu(SP, SP, len * wordSize); + + // Fixed-point registers + len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); + for (i = 0; i < len; i++) { + ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); + } + daddiu(SP, SP, len * wordSize); +} + +void MacroAssembler::push2(Register reg1, Register reg2) { + daddiu(SP, SP, -16); + sd(reg1, SP, 8); + sd(reg2, SP, 0); +} + +void MacroAssembler::pop2(Register reg1, Register reg2) { + ld(reg1, SP, 8); + ld(reg2, SP, 0); + daddiu(SP, SP, 16); +} + +// for UseCompressedOops Option +void MacroAssembler::load_klass(Register dst, Register src) { + if(UseCompressedClassPointers){ + lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst); + } else + ld(dst, src, oopDesc::klass_offset_in_bytes()); +} + +void MacroAssembler::store_klass(Register dst, Register src) { + if(UseCompressedClassPointers){ + encode_klass_not_null(src); + sw(src, dst, oopDesc::klass_offset_in_bytes()); + } else { + sd(src, dst, oopDesc::klass_offset_in_bytes()); + } +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ld(dst, Address(dst, Klass::prototype_header_offset())); +} + +void MacroAssembler::store_klass_gap(Register dst, Register src) { + if (UseCompressedClassPointers) { + sw(src, dst, oopDesc::klass_gap_offset_in_bytes()); + } +} + +void MacroAssembler::load_heap_oop(Register dst, Address src) { + if(UseCompressedOops){ + lwu(dst, src); + decode_heap_oop(dst); + } else { + ld(dst, src); + } +} + +void MacroAssembler::store_heap_oop(Address dst, Register src){ + if(UseCompressedOops){ + assert(!dst.uses(src), "not enough registers"); + encode_heap_oop(src); + sw(src, dst); + } else { + sd(src, dst); + } +} + +void MacroAssembler::store_heap_oop_null(Address dst){ + if(UseCompressedOops){ + sw(R0, dst); + } else { + sd(R0, dst); + } +} + +#ifdef ASSERT +void MacroAssembler::verify_heapbase(const char* msg) { + assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); +} +#endif + + +// Algorithm must match oop.inline.hpp encode_heap_oop. +void MacroAssembler::encode_heap_oop(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); +#endif + verify_oop(r, "broken oop in encode_heap_oop"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } + return; + } + + movz(r, S5_heapbase, r); + dsubu(r, r, S5_heapbase); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::encode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); +#endif + verify_oop(src, "broken oop in encode_heap_oop"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + dsrl(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) move(dst, src); + } + } else { + if (dst == src) { + movz(dst, S5_heapbase, dst); + dsubu(dst, dst, S5_heapbase); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(dst, LogMinObjAlignmentInBytes); + } + } else { + dsubu(dst, src, S5_heapbase); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(dst, LogMinObjAlignmentInBytes); + } + movz(dst, R0, src); + } + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register r) { + assert (UseCompressedOops, "should be compressed"); +#ifdef ASSERT + if (CheckCompressedOops) { + Label ok; + bne(r, R0, ok); + delayed()->nop(); + stop("null oop passed to encode_heap_oop_not_null"); + bind(ok); + } +#endif + verify_oop(r, "broken oop in encode_heap_oop_not_null"); + if (Universe::narrow_oop_base() != NULL) { + dsubu(r, r, S5_heapbase); + } + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(r, LogMinObjAlignmentInBytes); + } + +} + +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { + assert (UseCompressedOops, "should be compressed"); +#ifdef ASSERT + if (CheckCompressedOops) { + Label ok; + bne(src, R0, ok); + delayed()->nop(); + stop("null oop passed to encode_heap_oop_not_null2"); + bind(ok); + } +#endif + verify_oop(src, "broken oop in encode_heap_oop_not_null2"); + + if (Universe::narrow_oop_base() != NULL) { + dsubu(dst, src, S5_heapbase); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shr(dst, LogMinObjAlignmentInBytes); + } + } else { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + dsrl(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) move(dst, src); + } + } +} + +void MacroAssembler::decode_heap_oop(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); +#endif + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shl(r, LogMinObjAlignmentInBytes); + } + } else { + move(AT, r); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shl(r, LogMinObjAlignmentInBytes); + } + daddu(r, r, S5_heapbase); + movz(r, R0, AT); + } + verify_oop(r, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); +#endif + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (dst != src) nop(); // DON'T DELETE THIS GUY. + dsll(dst, src, LogMinObjAlignmentInBytes); + } else { + if (dst != src) move(dst, src); + } + } else { + if (dst == src) { + move(AT, dst); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shl(dst, LogMinObjAlignmentInBytes); + } + daddu(dst, dst, S5_heapbase); + movz(dst, R0, AT); + } else { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + dsll(dst, src, LogMinObjAlignmentInBytes); + daddu(dst, dst, S5_heapbase); + } else { + daddu(dst, src, S5_heapbase); + } + movz(dst, R0, src); + } + } + verify_oop(dst, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + // Note: it will change flags + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shl(r, LogMinObjAlignmentInBytes); + if (Universe::narrow_oop_base() != NULL) { + daddu(r, r, S5_heapbase); + } + } else { + assert (Universe::narrow_oop_base() == NULL, "sanity"); + } +} + +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes == Address::times_8) { + dsll(dst, src, LogMinObjAlignmentInBytes); + daddu(dst, dst, S5_heapbase); + } else { + dsll(dst, src, LogMinObjAlignmentInBytes); + if (Universe::narrow_oop_base() != NULL) { + daddu(dst, dst, S5_heapbase); + } + } + } else { + assert (Universe::narrow_oop_base() == NULL, "sanity"); + if (dst != src) { + move(dst, src); + } + } +} + +void MacroAssembler::encode_klass_not_null(Register r) { + if (Universe::narrow_klass_base() != NULL) { + assert(r != AT, "Encoding a klass in AT"); + set64(AT, (int64_t)Universe::narrow_klass_base()); + dsubu(r, r, AT); + } + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shr(r, LogKlassAlignmentInBytes); + } +} + +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + if (dst == src) { + encode_klass_not_null(src); + } else { + if (Universe::narrow_klass_base() != NULL) { + set64(dst, (int64_t)Universe::narrow_klass_base()); + dsubu(dst, src, dst); + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shr(dst, LogKlassAlignmentInBytes); + } + } else { + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + dsrl(dst, src, LogKlassAlignmentInBytes); + } else { + move(dst, src); + } + } + } +} + +// Function instr_size_for_decode_klass_not_null() counts the instructions +// generated by decode_klass_not_null(register r) and reinit_heapbase(), +// when (Universe::heap() != NULL). Hence, if the instructions they +// generate change, then this method needs to be updated. +int MacroAssembler::instr_size_for_decode_klass_not_null() { + assert (UseCompressedClassPointers, "only for compressed klass ptrs"); + if (Universe::narrow_klass_base() != NULL) { + // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). + return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); + } else { + // longest load decode klass function, mov64, leaq + return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); + } +} + +void MacroAssembler::decode_klass_not_null(Register r) { + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + assert(r != AT, "Decoding a klass in AT"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shl(r, LogKlassAlignmentInBytes); + } + if (Universe::narrow_klass_base() != NULL) { + set64(AT, (int64_t)Universe::narrow_klass_base()); + daddu(r, r, AT); + //Not neccessary for MIPS at all. + //reinit_heapbase(); + } +} + +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { + assert (UseCompressedClassPointers, "should only be used for compressed headers"); + + if (dst == src) { + decode_klass_not_null(dst); + } else { + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + set64(dst, (int64_t)Universe::narrow_klass_base()); + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); + dsll(AT, src, Address::times_8); + daddu(dst, dst, AT); + } else { + daddu(dst, src, dst); + } + } +} + +void MacroAssembler::incrementl(Register reg, int value) { + if (value == min_jint) { + move(AT, value); + addu32(reg, reg, AT); + return; + } + if (value < 0) { decrementl(reg, -value); return; } + if (value == 0) { ; return; } + + move(AT, value); + addu32(reg, reg, AT); +} + +void MacroAssembler::decrementl(Register reg, int value) { + if (value == min_jint) { + move(AT, value); + subu32(reg, reg, AT); + return; + } + if (value < 0) { incrementl(reg, -value); return; } + if (value == 0) { ; return; } + + move(AT, value); + subu32(reg, reg, AT); +} + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops || UseCompressedClassPointers) { + if (Universe::heap() != NULL) { + if (Universe::narrow_oop_base() == NULL) { + move(S5_heapbase, R0); + } else { + set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); + } + } else { + set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); + ld(S5_heapbase, S5_heapbase, 0); + } + } +} + +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success) { +//implement ind gen_subtype_check + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + +SkipIfEqual::SkipIfEqual( + MacroAssembler* masm, const bool* flag_addr, bool value) { + _masm = masm; + _masm->li(AT, (address)flag_addr); + _masm->lb(AT, AT, 0); + _masm->addiu(AT, AT, -value); + _masm->beq(AT, R0, _label); + _masm->delayed()->nop(); +} +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + assert_different_registers(sub_klass, super_klass, temp_reg); + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + beq(sub_klass, super_klass, *L_success); + delayed()->nop(); + // Check the supertype display: + if (must_load_sco) { + lwu(temp_reg, super_klass, sco_offset); + super_check_offset = RegisterOrConstant(temp_reg); + } + daddu(AT, sub_klass, super_check_offset.register_or_noreg()); + ld(AT, AT, super_check_offset.constant_or_zero()); + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + if (super_check_offset.is_register()) { + beq(super_klass, AT, *L_success); + delayed()->nop(); + addiu(AT, super_check_offset.as_register(), -sc_offset); + if (L_failure == &L_fallthrough) { + beq(AT, R0, *L_slow_path); + delayed()->nop(); + } else { + bne_far(AT, R0, *L_failure); + delayed()->nop(); + b(*L_slow_path); + delayed()->nop(); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + beq(super_klass, AT, *L_success); + delayed()->nop(); + } else { + bne(super_klass, AT, *L_slow_path); + delayed()->nop(); + b(*L_success); + delayed()->nop(); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + beq(super_klass, AT, *L_success); + delayed()->nop(); + } else { + bne_far(super_klass, AT, *L_failure); + delayed()->nop(); + b(*L_success); + delayed()->nop(); + } + } + + bind(L_fallthrough); + +} + + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + if (temp2_reg == noreg) + temp2_reg = TSR; + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connections with the input regs. + +#ifndef PRODUCT + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; + ExternalAddress pst_counter_addr((address) pst_counter); +#endif //PRODUCT + + // We will consult the secondary-super array. + ld(temp_reg, secondary_supers_addr); + // Load the array length. + lw(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); + // Skip to start of data. + daddiu(temp_reg, temp_reg, Array::base_offset_in_bytes()); + + // OpenJDK8 never compresses klass pointers in secondary-super array. + Label Loop, subtype; + bind(Loop); + beq(temp2_reg, R0, *L_failure); + delayed()->nop(); + ld(AT, temp_reg, 0); + beq(AT, super_klass, subtype); + delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize); + b(Loop); + delayed()->daddiu(temp2_reg, temp2_reg, -1); + + bind(subtype); + sd(super_klass, super_cache_addr); + if (L_success != &L_fallthrough) { + b(*L_success); + delayed()->nop(); + } + + // Success. Cache the super we found and proceed in triumph. +#undef IS_A_TEMP + + bind(L_fallthrough); +} + +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { + ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); + sd(R0, Address(java_thread, JavaThread::vm_result_offset())); + verify_oop(oop_result, "broken oop in call_VM_base"); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { + ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); + sd(R0, Address(java_thread, JavaThread::vm_result_2_offset())); +} + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); +#ifdef ASSERT + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); + assert(offset1 - offset == stackElementSize, "correct arithmetic"); +#endif + Register scale_reg = NOREG; + Address::ScaleFactor scale_factor = Address::no_scale; + if (arg_slot.is_constant()) { + offset += arg_slot.as_constant() * stackElementSize; + } else { + scale_reg = arg_slot.as_register(); + scale_factor = Address::times_8; + } + // We don't push RA on stack in prepare_invoke. + // offset += wordSize; // return PC is on stack + if(scale_reg==NOREG) return Address(SP, offset); + else { + dsll(scale_reg, scale_reg, scale_factor); + daddu(scale_reg, SP, scale_reg); + return Address(scale_reg, offset); + } +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { + switch (size_in_bytes) { + case 8: ld(dst, src); break; + case 4: lw(dst, src); break; + case 2: is_signed ? lh(dst, src) : lhu(dst, src); break; + case 1: is_signed ? lb( dst, src) : lbu( dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { + switch (size_in_bytes) { + case 8: sd(src, dst); break; + case 4: sw(src, dst); break; + case 2: sh(src, dst); break; + case 1: sb(src, dst); break; + default: ShouldNotReachHere(); + } +} + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& L_no_such_interface, + bool return_method) { + assert_different_registers(recv_klass, intf_klass, scan_temp, AT); + assert_different_registers(method_result, intf_klass, scan_temp, AT); + assert(recv_klass != method_result || !return_method, + "recv_klass can be destroyed when method isn't needed"); + + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size() * wordSize; + Address::ScaleFactor times_vte_scale = Address::times_ptr; + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); + + // %%% Could store the aligned, prescaled offset in the klassoop. + dsll(scan_temp, scan_temp, times_vte_scale); + daddu(scan_temp, recv_klass, scan_temp); + daddiu(scan_temp, scan_temp, vtable_base); + if (HeapWordsPerLong > 1) { + // Round up to align_object_offset boundary + // see code for InstanceKlass::start_of_itable! + round_to(scan_temp, BytesPerLong); + } + + if (return_method) { + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + if (itable_index.is_constant()) { + set64(AT, (int)itable_index.is_constant()); + dsll(AT, AT, (int)Address::times_ptr); + } else { + dsll(AT, itable_index.as_register(), (int)Address::times_ptr); + } + daddu(AT, AT, recv_klass); + daddiu(recv_klass, AT, itentry_off); + } + + Label search, found_method; + + for (int peel = 1; peel >= 0; peel--) { + ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + + if (peel) { + beq(intf_klass, method_result, found_method); + delayed()->nop(); + } else { + bne(intf_klass, method_result, search); + delayed()->nop(); + // (invert the test to fall through to found_method...) + } + + if (!peel) break; + + bind(search); + + // Check that the previous entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + beq(method_result, R0, L_no_such_interface); + delayed()->nop(); + daddiu(scan_temp, scan_temp, scan_step); + } + + bind(found_method); + + if (return_method) { + // Got a hit. + lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); + if (UseLEXT1) { + gsldx(method_result, recv_klass, scan_temp, 0); + } else { + daddu(AT, recv_klass, scan_temp); + ld(method_result, AT, 0); + } + } +} + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + Register tmp = GP; + push(tmp); + + if (vtable_index.is_constant()) { + assert_different_registers(recv_klass, method_result, tmp); + } else { + assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); + } + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); + if (vtable_index.is_constant()) { + set64(AT, vtable_index.as_constant()); + dsll(AT, AT, (int)Address::times_ptr); + } else { + dsll(AT, vtable_index.as_register(), (int)Address::times_ptr); + } + set64(tmp, base + vtableEntry::method_offset_in_bytes()); + daddu(tmp, tmp, AT); + daddu(tmp, tmp, recv_klass); + ld(method_result, tmp, 0); + + pop(tmp); +} + +void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) { + switch (type) { + case T_LONG: + st_ptr(src_reg, tmp_reg, disp); + break; + case T_ARRAY: + case T_OBJECT: + if (UseCompressedOops && !wide) { + sw(src_reg, tmp_reg, disp); + } else { + st_ptr(src_reg, tmp_reg, disp); + } + break; + case T_ADDRESS: + st_ptr(src_reg, tmp_reg, disp); + break; + case T_INT: + sw(src_reg, tmp_reg, disp); + break; + case T_CHAR: + case T_SHORT: + sh(src_reg, tmp_reg, disp); + break; + case T_BYTE: + case T_BOOLEAN: + sb(src_reg, tmp_reg, disp); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) { + Register tmp_reg = T9; + Register index_reg = addr.index(); + if (index_reg == NOREG) { + tmp_reg = NOREG; + } + + int scale = addr.scale(); + if (tmp_reg != NOREG && scale >= 0) { + dsll(tmp_reg, index_reg, scale); + } + + int disp = addr.disp(); + bool disp_is_simm16 = true; + if (!Assembler::is_simm16(disp)) { + disp_is_simm16 = false; + } + + Register base_reg = addr.base(); + if (tmp_reg != NOREG) { + assert_different_registers(tmp_reg, base_reg, index_reg); + } + + if (tmp_reg != NOREG) { + daddu(tmp_reg, base_reg, tmp_reg); + if (!disp_is_simm16) { + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); + } else { + if (!disp_is_simm16) { + tmp_reg = T9; + assert_different_registers(tmp_reg, base_reg); + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); + } +} + +void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) { + switch (type) { + case T_DOUBLE: + sdc1(src_reg, tmp_reg, disp); + break; + case T_FLOAT: + swc1(src_reg, tmp_reg, disp); + break; + default: + ShouldNotReachHere(); + } +} + +void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) { + Register tmp_reg = T9; + Register index_reg = addr.index(); + if (index_reg == NOREG) { + tmp_reg = NOREG; + } + + int scale = addr.scale(); + if (tmp_reg != NOREG && scale >= 0) { + dsll(tmp_reg, index_reg, scale); + } + + int disp = addr.disp(); + bool disp_is_simm16 = true; + if (!Assembler::is_simm16(disp)) { + disp_is_simm16 = false; + } + + Register base_reg = addr.base(); + if (tmp_reg != NOREG) { + assert_different_registers(tmp_reg, base_reg, index_reg); + } + + if (tmp_reg != NOREG) { + daddu(tmp_reg, base_reg, tmp_reg); + if (!disp_is_simm16) { + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); + } else { + if (!disp_is_simm16) { + tmp_reg = T9; + assert_different_registers(tmp_reg, base_reg); + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); + } +} + +void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) { + switch (type) { + case T_LONG: + ld_ptr(dst_reg, tmp_reg, disp); + break; + case T_ARRAY: + case T_OBJECT: + if (UseCompressedOops && !wide) { + lwu(dst_reg, tmp_reg, disp); + } else { + ld_ptr(dst_reg, tmp_reg, disp); + } + break; + case T_ADDRESS: + if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) { + lwu(dst_reg, tmp_reg, disp); + } else { + ld_ptr(dst_reg, tmp_reg, disp); + } + break; + case T_INT: + lw(dst_reg, tmp_reg, disp); + break; + case T_CHAR: + lhu(dst_reg, tmp_reg, disp); + break; + case T_SHORT: + lh(dst_reg, tmp_reg, disp); + break; + case T_BYTE: + case T_BOOLEAN: + lb(dst_reg, tmp_reg, disp); + break; + default: + ShouldNotReachHere(); + } +} + +int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) { + int code_offset = 0; + Register tmp_reg = T9; + Register index_reg = addr.index(); + if (index_reg == NOREG) { + tmp_reg = NOREG; + } + + int scale = addr.scale(); + if (tmp_reg != NOREG && scale >= 0) { + dsll(tmp_reg, index_reg, scale); + } + + int disp = addr.disp(); + bool disp_is_simm16 = true; + if (!Assembler::is_simm16(disp)) { + disp_is_simm16 = false; + } + + Register base_reg = addr.base(); + if (tmp_reg != NOREG) { + assert_different_registers(tmp_reg, base_reg, index_reg); + } + + if (tmp_reg != NOREG) { + daddu(tmp_reg, base_reg, tmp_reg); + if (!disp_is_simm16) { + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + code_offset = offset(); + load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); + } else { + if (!disp_is_simm16) { + tmp_reg = T9; + assert_different_registers(tmp_reg, base_reg); + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + code_offset = offset(); + load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); + } + + return code_offset; +} + +void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) { + switch (type) { + case T_DOUBLE: + ldc1(dst_reg, tmp_reg, disp); + break; + case T_FLOAT: + lwc1(dst_reg, tmp_reg, disp); + break; + default: + ShouldNotReachHere(); + } +} + +int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) { + int code_offset = 0; + Register tmp_reg = T9; + Register index_reg = addr.index(); + if (index_reg == NOREG) { + tmp_reg = NOREG; + } + + int scale = addr.scale(); + if (tmp_reg != NOREG && scale >= 0) { + dsll(tmp_reg, index_reg, scale); + } + + int disp = addr.disp(); + bool disp_is_simm16 = true; + if (!Assembler::is_simm16(disp)) { + disp_is_simm16 = false; + } + + Register base_reg = addr.base(); + if (tmp_reg != NOREG) { + assert_different_registers(tmp_reg, base_reg, index_reg); + } + + if (tmp_reg != NOREG) { + daddu(tmp_reg, base_reg, tmp_reg); + if (!disp_is_simm16) { + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + code_offset = offset(); + load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); + } else { + if (!disp_is_simm16) { + tmp_reg = T9; + assert_different_registers(tmp_reg, base_reg); + move(tmp_reg, disp); + daddu(tmp_reg, base_reg, tmp_reg); + } + code_offset = offset(); + load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); + } + + return code_offset; +} + +void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { + const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); + STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code + // The inverted mask is sign-extended + move(AT, inverted_jweak_mask); + andr(possibly_jweak, AT, possibly_jweak); +} + +void MacroAssembler::resolve_jobject(Register value, + Register thread, + Register tmp) { + assert_different_registers(value, thread, tmp); + Label done, not_weak; + beq(value, R0, done); // Use NULL as-is. + delayed()->nop(); + move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. + andr(AT, value, AT); + beq(AT, R0, not_weak); + delayed()->nop(); + // Resolve jweak. + ld(value, value, -JNIHandles::weak_tag_value); + verify_oop(value); + #if INCLUDE_ALL_GCS + if (UseG1GC) { + g1_write_barrier_pre(noreg /* obj */, + value /* pre_val */, + thread /* thread */, + tmp /* tmp */, + true /* tosca_live */, + true /* expand_call */); + } + #endif // INCLUDE_ALL_GCS + b(done); + delayed()->nop(); + bind(not_weak); + // Resolve (untagged) jobject. + ld(value, value, 0); + verify_oop(value); + bind(done); +} + +void MacroAssembler::cmp_cmov(Register op1, + Register op2, + Register dst, + Register src, + CMCompare cmp, + bool is_signed) { + switch (cmp) { + case EQ: + subu(AT, op1, op2); + movz(dst, src, AT); + break; + + case NE: + subu(AT, op1, op2); + movn(dst, src, AT); + break; + + case GT: + if (is_signed) { + slt(AT, op2, op1); + } else { + sltu(AT, op2, op1); + } + movn(dst, src, AT); + break; + + case GE: + if (is_signed) { + slt(AT, op1, op2); + } else { + sltu(AT, op1, op2); + } + movz(dst, src, AT); + break; + + case LT: + if (is_signed) { + slt(AT, op1, op2); + } else { + sltu(AT, op1, op2); + } + movn(dst, src, AT); + break; + + case LE: + if (is_signed) { + slt(AT, op2, op1); + } else { + sltu(AT, op2, op1); + } + movz(dst, src, AT); + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::cmp_cmov(FloatRegister op1, + FloatRegister op2, + Register dst, + Register src, + CMCompare cmp, + bool is_float) { + switch(cmp) { + case EQ: + if (is_float) { + c_eq_s(op1, op2); + } else { + c_eq_d(op1, op2); + } + movt(dst, src); + break; + + case NE: + if (is_float) { + c_eq_s(op1, op2); + } else { + c_eq_d(op1, op2); + } + movf(dst, src); + break; + + case GT: + if (is_float) { + c_ule_s(op1, op2); + } else { + c_ule_d(op1, op2); + } + movf(dst, src); + break; + + case GE: + if (is_float) { + c_ult_s(op1, op2); + } else { + c_ult_d(op1, op2); + } + movf(dst, src); + break; + + case LT: + if (is_float) { + c_ult_s(op1, op2); + } else { + c_ult_d(op1, op2); + } + movt(dst, src); + break; + + case LE: + if (is_float) { + c_ule_s(op1, op2); + } else { + c_ule_d(op1, op2); + } + movt(dst, src); + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::cmp_cmov(FloatRegister op1, + FloatRegister op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp, + bool is_float) { + switch(cmp) { + case EQ: + if (!is_float) { + c_eq_d(op1, op2); + movt_d(dst, src); + } else { + c_eq_s(op1, op2); + movt_s(dst, src); + } + break; + + case NE: + if (!is_float) { + c_eq_d(op1, op2); + movf_d(dst, src); + } else { + c_eq_s(op1, op2); + movf_s(dst, src); + } + break; + + case GT: + if (!is_float) { + c_ule_d(op1, op2); + movf_d(dst, src); + } else { + c_ule_s(op1, op2); + movf_s(dst, src); + } + break; + + case GE: + if (!is_float) { + c_ult_d(op1, op2); + movf_d(dst, src); + } else { + c_ult_s(op1, op2); + movf_s(dst, src); + } + break; + + case LT: + if (!is_float) { + c_ult_d(op1, op2); + movt_d(dst, src); + } else { + c_ult_s(op1, op2); + movt_s(dst, src); + } + break; + + case LE: + if (!is_float) { + c_ule_d(op1, op2); + movt_d(dst, src); + } else { + c_ule_s(op1, op2); + movt_s(dst, src); + } + break; + + default: + Unimplemented(); + } +} + +void MacroAssembler::cmp_cmov(Register op1, + Register op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp, + bool is_float) { + Label L; + + switch(cmp) { + case EQ: + bne(op1, op2, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + case NE: + beq(op1, op2, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + case GT: + slt(AT, op2, op1); + beq(AT, R0, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + case GE: + slt(AT, op1, op2); + bne(AT, R0, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + case LT: + slt(AT, op1, op2); + beq(AT, R0, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + case LE: + slt(AT, op2, op1); + bne(AT, R0, L); + delayed()->nop(); + if (is_float) { + mov_s(dst, src); + } else { + mov_d(dst, src); + } + bind(L); + break; + + default: + Unimplemented(); + } +} diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp new file mode 100644 index 00000000000..ab9727793f4 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp @@ -0,0 +1,701 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP +#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP + +#include "asm/assembler.hpp" +#include "utilities/macros.hpp" +#include "runtime/rtmLocking.hpp" + +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +class MacroAssembler: public Assembler { + friend class LIR_Assembler; + friend class Runtime1; // as_Address() + + public: + // Compare code + typedef enum { + EQ = 0x01, + NE = 0x02, + GT = 0x03, + GE = 0x04, + LT = 0x05, + LE = 0x06 + } CMCompare; + + protected: + + Address as_Address(AddressLiteral adr); + Address as_Address(ArrayAddress adr); + + // Support for VM calls + // + // This is the base routine called by the different versions of call_VM_leaf. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). +#ifdef CC_INTERP + // c++ interpreter never wants to use interp_masm version of call_VM + #define VIRTUAL +#else + #define VIRTUAL virtual +#endif + + VIRTUAL void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments // the number of arguments to pop after the call + ); + + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + // + // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base + // returns the register which contains the thread upon return. If a thread register has been + // specified, the return value will correspond to that register. If no last_java_sp is specified + // (noreg) than sp will be used instead. + VIRTUAL void call_VM_base( // returns the register containing the thread upon return + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call + bool check_exceptions // whether to check for pending exceptions after return + ); + + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); + + // helpers for FPU flag access + // tmp is a temporary register, if none is available use noreg + + public: + static intptr_t i[32]; + static float f[32]; + static void print(outputStream *s); + + static int i_offset(unsigned int k); + static int f_offset(unsigned int k); + + static void save_registers(MacroAssembler *masm); + static void restore_registers(MacroAssembler *masm); + + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + void pd_patch_instruction(address branch, address target); + + address emit_trampoline_stub(int insts_call_instruction_offset, address target); + + // Support for inc/dec with optimal instruction selection depending on value + void incrementl(Register reg, int value = 1); + void decrementl(Register reg, int value = 1); + + + // Alignment + void align(int modulus); + + + // Stack frame creation/removal + void enter(); + void leave(); + + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) + // The pointer will be loaded into the thread register. + void get_thread(Register thread); + + + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + + void call_VM(Register oop_result, + address entry_point, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + // Overloadings with last_Java_sp + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments = 0, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, bool + check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + void get_vm_result (Register oop_result, Register thread); + void get_vm_result_2(Register metadata_result, Register thread); + void call_VM_leaf(address entry_point, + int number_of_arguments = 0); + void call_VM_leaf(address entry_point, + Register arg_1); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2, Register arg_3); + + // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls + void super_call_VM_leaf(address entry_point); + void super_call_VM_leaf(address entry_point, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + + // last Java Frame (fills frame anchor) + void set_last_Java_frame(Register thread, + Register last_java_sp, + Register last_java_fp, + address last_java_pc); + + // thread in the default location (S6) + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc); + + void reset_last_Java_frame(Register thread, bool clear_fp); + + // thread in the default location (S6) + void reset_last_Java_frame(bool clear_fp); + + // Stores + void store_check(Register obj); // store check for obj - register is destroyed afterwards + void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) + + void resolve_jobject(Register value, Register thread, Register tmp); + void clear_jweak_tag(Register possibly_jweak); + +#if INCLUDE_ALL_GCS + + void g1_write_barrier_pre(Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void g1_write_barrier_post(Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2); + +#endif // INCLUDE_ALL_GCS + + // split store_check(Register obj) to enhance instruction interleaving + void store_check_part_1(Register obj); + void store_check_part_2(Register obj); + + // C 'boolean' to Java boolean: x == 0 ? 0 : 1 + void c2bool(Register x); + //add for compressedoops + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + void load_prototype_header(Register dst, Register src); + + void store_klass_gap(Register dst, Register src); + + void load_heap_oop(Register dst, Address src); + void store_heap_oop(Address dst, Register src); + void store_heap_oop_null(Address dst); + void encode_heap_oop(Register r); + void encode_heap_oop(Register dst, Register src); + void decode_heap_oop(Register r); + void decode_heap_oop(Register dst, Register src); + void encode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register r); + void encode_heap_oop_not_null(Register dst, Register src); + void decode_heap_oop_not_null(Register dst, Register src); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register dst, Register src); + void decode_klass_not_null(Register dst, Register src); + + // Returns the byte size of the instructions generated by decode_klass_not_null() + // when compressed klass pointers are being used. + static int instr_size_for_decode_klass_not_null(); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); + + DEBUG_ONLY(void verify_heapbase(const char* msg);) + + void set_narrow_klass(Register dst, Klass* k); + void set_narrow_oop(Register dst, jobject obj); + + + + + // Sign extension + void sign_extend_short(Register reg) { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); } + void sign_extend_byte(Register reg) { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); } + void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); + void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); + + void trigfunc(char trig, int num_fpu_regs_in_use = 1); + // allocation + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); + void incr_allocated_bytes(Register thread, + Register var_size_in_bytes, int con_size_in_bytes, + Register t1 = noreg); + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& no_such_interface, + bool return_method = true); + + // virtual method calling + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg and temp2_reg can be noreg, if no temps are available. + // Updates the sub's secondary super cache as necessary. + // If set_cond_codes, condition codes will be Z on success, NZ on failure. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success); + + + // Debugging + + // only if +VerifyOops + void verify_oop(Register reg, const char* s = "broken oop"); + void verify_oop_addr(Address addr, const char * s = "broken oop addr"); + void verify_oop_subroutine(); + // TODO: verify method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + + #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) + #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // only if +VerifyFPU + void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); + + // prints msg, dumps registers and stops execution + void stop(const char* msg); + + // prints msg and continues + void warn(const char* msg); + + static void debug(char* msg/*, RegistersForDebugging* regs*/); + static void debug64(char* msg, int64_t pc, int64_t regs[]); + + void print_reg(Register reg); + void print_reg(FloatRegister reg); + + void untested() { stop("untested"); } + + void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, sizeof(b), "unimplemented: %s", what); stop(b); } + + void should_not_reach_here() { stop("should not reach here"); } + + void print_CPU_state(); + + // Stack overflow checking + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + if (offset <= 32768) { + sw(RA0, SP, -offset); + } else { + li(AT, offset); + dsubu(AT, SP, AT); + sw(RA0, AT, 0); + } + } + + // Writes to stack successive pages until offset reached to check for + // stack overflow + shadow pages. Also, clobbers tmp + void bang_stack_size(Register size, Register tmp); + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); + + // Support for serializing memory accesses between threads + void serialize_memory(Register thread, Register tmp); + + //void verify_tlab(); + void verify_tlab(Register t1, Register t2); + + // Biased locking support + // lock_reg and obj_reg must be loaded up with the appropriate values. + // tmp_reg is optional. If it is supplied (i.e., != noreg) it will + // be killed; if not supplied, push/pop will be used internally to + // allocate a temporary (inefficient, avoid if possible). + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. + // Returns offset of first potentially-faulting instruction for null + // check info (currently consumed only by C1). If + // swap_reg_contains_mark is true then returns -1 as it is assumed + // the calling code has already passed any potential faults. + int biased_locking_enter(Register lock_reg, Register obj_reg, + Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL); + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); +#ifdef COMPILER2 + void fast_lock(Register obj, Register box, Register tmp, Register scr); + void fast_unlock(Register obj, Register box, Register tmp); +#endif + + + // Arithmetics + // Regular vs. d* versions + inline void addu_long(Register rd, Register rs, Register rt) { + daddu(rd, rs, rt); + } + inline void addu_long(Register rd, Register rs, long imm32_64) { + daddiu(rd, rs, imm32_64); + } + + void round_to(Register reg, int modulus) { + assert_different_registers(reg, AT); + increment(reg, modulus - 1); + move(AT, - modulus); + andr(reg, reg, AT); + } + + // the follow two might use AT register, be sure you have no meanful data in AT before you call them + void increment(Register reg, int imm); + void decrement(Register reg, int imm); + + void shl(Register reg, int sa) { dsll(reg, reg, sa); } + void shr(Register reg, int sa) { dsrl(reg, reg, sa); } + void sar(Register reg, int sa) { dsra(reg, reg, sa); } + + // Helper functions for statistics gathering. + void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); + + // Calls + void call(address entry); + void call(address entry, relocInfo::relocType rtype); + void call(address entry, RelocationHolder& rh); + + address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); + + // Emit the CompiledIC call idiom + void ic_call(address entry); + + // Jumps + void jmp(address entry); + void jmp(address entry, relocInfo::relocType rtype); + void jmp_far(Label& L); // always long jumps + + /* branches may exceed 16-bit offset */ + void b_far(address entry); + void b_far(Label& L); + + void bne_far (Register rs, Register rt, address entry); + void bne_far (Register rs, Register rt, Label& L); + + void beq_far (Register rs, Register rt, address entry); + void beq_far (Register rs, Register rt, Label& L); + + // For C2 to support long branches + void beq_long (Register rs, Register rt, Label& L); + void bne_long (Register rs, Register rt, Label& L); + void bc1t_long (Label& L); + void bc1f_long (Label& L); + + void patchable_call(address target); + void general_call(address target); + + void patchable_jump(address target); + void general_jump(address target); + + static int insts_for_patchable_call(address target); + static int insts_for_general_call(address target); + + static int insts_for_patchable_jump(address target); + static int insts_for_general_jump(address target); + + // Floating + // Data + + // Load and store values by size and signed-ness + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs + inline void ld_ptr(Register rt, Address a) { + ld(rt, a); + } + + inline void ld_ptr(Register rt, Register base, int offset16) { + ld(rt, base, offset16); + } + + // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs + inline void st_ptr(Register rt, Address a) { + sd(rt, a); + } + + inline void st_ptr(Register rt, Register base, int offset16) { + sd(rt, base, offset16); + } + + void ld_ptr(Register rt, Register base, Register offset); + void st_ptr(Register rt, Register base, Register offset); + + // swap the two byte of the low 16-bit halfword + // this directive will use AT, be sure the high 16-bit of reg is zero + void hswap(Register reg); + void huswap(Register reg); + + // convert big endian integer to little endian integer + void swap(Register reg); + + // implement the x86 instruction semantic + // if c_reg == *dest then *dest <= x_reg + // else c_reg <= *dest + // the AT indicate if xchg occurred, 1 for xchged, else 0 + void cmpxchg(Register x_reg, Address dest, Register c_reg); + void cmpxchg32(Register x_reg, Address dest, Register c_reg); + void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi); + + //pop & push + void extend_sign(Register rh, Register rl) { stop("extend_sign"); } + void neg(Register reg) { dsubu(reg, R0, reg); } + void push (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } + void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); } + void pop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } + void pop (FloatRegister reg) { ldc1(reg, SP, 0); daddiu(SP, SP, 8); } + void pop () { daddiu(SP, SP, 8); } + void pop2 () { daddiu(SP, SP, 16); } + void push2(Register reg1, Register reg2); + void pop2 (Register reg1, Register reg2); + void dpush (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } + void dpop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } + //we need 2 fun to save and resotre general register + void pushad(); + void popad(); + void pushad_except_v0(); + void popad_except_v0(); + + //move an 32-bit immediate to Register + void move(Register reg, int imm32) { li32(reg, imm32); } + void li (Register rd, long imm); + void li (Register rd, address addr) { li(rd, (long)addr); } + //replace move(Register reg, int imm) + void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64 + void set64(Register d, jlong value); + static int insts_for_set64(jlong value); + + void patchable_set48(Register d, jlong value); + void patchable_set32(Register d, jlong value); + + void patchable_call32(Register d, jlong value); + + static int call_size(address target, bool far, bool patchable); + + static bool reachable_from_cache(address target); + static bool reachable_from_cache(); + + + void dli(Register rd, long imm) { li(rd, imm); } + void li64(Register rd, long imm); + void li48(Register rd, long imm); + + void move(Register rd, Register rs) { daddu(rd, rs, R0); } + void move_u32(Register rd, Register rs) { addu32(rd, rs, R0); } + void dmove(Register rd, Register rs) { daddu(rd, rs, R0); } + void mov_metadata(Register dst, Metadata* obj); + void mov_metadata(Address dst, Metadata* obj); + + void store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide); + void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type); + void store_for_type(Register src_reg, Address addr, BasicType type = T_INT, bool wide = false); + void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT); + void load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide); + void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type); + int load_for_type(Register dst_reg, Address addr, BasicType type = T_INT, bool wide = false); + int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT); + +#ifndef PRODUCT + static void pd_print_patched_instruction(address branch) { + jint stub_inst = *(jint*) branch; + print_instruction(stub_inst); + ::tty->print("%s", " (unresolved)"); + + } +#endif + + //FIXME + void empty_FPU_stack(){/*need implemented*/}; + + + // method handles (JSR 292) + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + // Conditional move + void cmp_cmov(Register op1, + Register op2, + Register dst, + Register src, + CMCompare cmp = EQ, + bool is_signed = true); + void cmp_cmov(FloatRegister op1, + FloatRegister op2, + Register dst, + Register src, + CMCompare cmp = EQ, + bool is_float = true); + void cmp_cmov(FloatRegister op1, + FloatRegister op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp = EQ, + bool is_float = true); + void cmp_cmov(Register op1, + Register op2, + FloatRegister dst, + FloatRegister src, + CMCompare cmp = EQ, + bool is_float = true); + +#undef VIRTUAL + +}; + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual { + private: + MacroAssembler* _masm; + Label _label; + + public: + SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); + ~SkipIfEqual(); +}; + +#ifdef ASSERT +inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } +#endif + + +#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp new file mode 100644 index 00000000000..92c05fb726a --- /dev/null +++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP +#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP diff --git a/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp b/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp new file mode 100644 index 00000000000..0c467df2f38 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "memory/metaspaceShared.hpp" + +// Generate the self-patching vtable method: +// +// This method will be called (as any other Klass virtual method) with +// the Klass itself as the first argument. Example: +// +// oop obj; +// int size = obj->klass()->klass_part()->oop_size(this); +// +// for which the virtual method call is Klass::oop_size(); +// +// The dummy method is called with the Klass object as the first +// operand, and an object as the second argument. +// + +//===================================================================== + +// All of the dummy methods in the vtable are essentially identical, +// differing only by an ordinal constant, and they bear no releationship +// to the original method which the caller intended. Also, there needs +// to be 'vtbl_list_size' instances of the vtable in order to +// differentiate between the 'vtable_list_size' original Klass objects. + +#define __ masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +void MetaspaceShared::generate_vtable_methods(void** vtbl_list, + void** vtable, + char** md_top, + char* md_end, + char** mc_top, + char* mc_end) { + + intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*); + *(intptr_t *)(*md_top) = vtable_bytes; + *md_top += sizeof(intptr_t); + void** dummy_vtable = (void**)*md_top; + *vtable = dummy_vtable; + *md_top += vtable_bytes; + + // Get ready to generate dummy methods. + + CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top); + MacroAssembler* masm = new MacroAssembler(&cb); + + Label common_code; + for (int i = 0; i < vtbl_list_size; ++i) { + for (int j = 0; j < num_virtuals; ++j) { + dummy_vtable[num_virtuals * i + j] = (void*)masm->pc(); + + // Load V0 with a value indicating vtable/offset pair. + // -- bits[ 7..0] (8 bits) which virtual method in table? + // -- bits[12..8] (5 bits) which virtual method table? + // -- must fit in 13-bit instruction immediate field. + __ move(V0, (i << 8) + j); + __ b(common_code); + __ delayed()->nop(); + } + } + + __ bind(common_code); + + __ srl(T9, V0, 8); // isolate vtable identifier. + __ shl(T9, LogBytesPerWord); + __ li(AT, (long)vtbl_list); + __ addu(T9, AT, T9); + __ ld(T9, T9, 0); // get correct vtable address. + __ sd(T9, A0, 0); // update vtable pointer. + + __ andi(V0, V0, 0x00ff); // isolate vtable method index + __ shl(V0, LogBytesPerWord); + __ addu(T9, T9, V0); + __ ld(T9, T9, 0); // address of real method pointer. + __ jr(T9); // get real method pointer. + __ delayed()->nop(); + + __ flush(); + + *mc_top = (char*)__ pc(); +} diff --git a/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp b/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp new file mode 100644 index 00000000000..428c2713621 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp @@ -0,0 +1,576 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "prims/methodHandles.hpp" + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#define STOP(error) block_comment(error); __ stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { + if (VerifyMethodHandles) + verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + "MH argument is a Class"); + __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, err_msg("%s should be nonzero", xname)); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //ASSERT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message) { +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { + Label L; + BLOCK_COMMENT("verify_ref_kind {"); + __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); + __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); + __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); + __ andr(temp, temp, AT); + __ move(AT, ref_kind); + __ beq(temp, AT, L); + __ delayed()->nop(); + { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); + jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); + if (ref_kind == JVM_REF_invokeVirtual || + ref_kind == JVM_REF_invokeSpecial) + // could do this for all ref_kinds, but would explode assembly code size + trace_method_handle(_masm, buf); + __ STOP(buf); + } + BLOCK_COMMENT("} verify_ref_kind"); + __ bind(L); +} + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry) { + assert(method == Rmethod, "interpreter calling convention"); + + Label L_no_such_method; + __ beq(method, R0, L_no_such_method); + __ delayed()->nop(); + + __ verify_method_ptr(method); + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + Register rthread = TREG; + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); + __ beq(AT, R0, run_compiled_code); + __ delayed()->nop(); + __ ld(T9, method, in_bytes(Method::interpreter_entry_offset())); + __ jr(T9); + __ delayed()->nop(); + __ BIND(run_compiled_code); + } + + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + __ ld(T9, method, in_bytes(entry_offset)); + __ jr(T9); + __ delayed()->nop(); + + __ bind(L_no_such_method); + address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); + __ jmp(wrong_method, relocInfo::runtime_call_type); + __ delayed()->nop(); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry) { + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2); + assert(recv != noreg, "required register"); + assert(method_temp == Rmethod, "required register for loading method"); + + //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes()))); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes()))); + __ verify_oop(method_temp); + // the following assumes that a Method* is normally compressed in the vmtarget field: + __ ld(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()))); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ld(temp2, Address(method_temp, Method::const_offset())); + __ load_sized_value(temp2, + Address(temp2, ConstMethod::size_of_parameters_offset()), + sizeof(u2), false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + Label L; + Address recv_addr = __ argument_address(temp2, -1); + __ ld(AT, recv_addr); + __ beq(recv, AT, L); + __ delayed()->nop(); + + recv_addr = __ argument_address(temp2, -1); + __ ld(V0, recv_addr); + __ STOP("receiver not on stack"); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all allow an appendix argument. + __ stop("empty stubs make SG sick"); + return NULL; + } + + // Rmethod: Method* + // T9: argument locator (parameter slot count, added to sp) + // S7: used as temp to hold mh or receiver + Register t9_argp = T9; // argument list ptr, live on error paths + Register s7_mh = S7; // MH receiver; dies quickly and is recycled + Register rm_method = Rmethod; // eventual target of this invocation + + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ lbu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); + guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions."); + __ addiu(AT, AT, -1 * (int) iid); + __ beq(AT, R0, L); + __ delayed()->nop(); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*::intrinsic_id"); + } + __ STOP("bad Method*::intrinsic_id"); + __ bind(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address t9_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ld(t9_argp, Address(rm_method, Method::const_offset())); + __ load_sized_value(t9_argp, + Address(t9_argp, ConstMethod::size_of_parameters_offset()), + sizeof(u2), false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + t9_first_arg_addr = __ argument_address(t9_argp, -1); + } else { + DEBUG_ONLY(t9_argp = noreg); + } + + if (!is_signature_polymorphic_static(iid)) { + __ ld(s7_mh, t9_first_arg_addr); + DEBUG_ONLY(t9_argp = noreg); + } + + // t9_first_arg_addr is live! + + trace_method_handle_interpreter_entry(_masm, iid); + + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); + + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register r_recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ld(r_recv = T2, t9_first_arg_addr); + } + DEBUG_ONLY(t9_argp = noreg); + Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now + __ pop(rm_member); // extract last argument + generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); + } + + return entry_point; +} + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert(is_signature_polymorphic(iid), "expected invoke iid"); + Register rm_method = Rmethod; // eventual target of this invocation + // temps used in this code are not used in *either* compiled or interpreted calling sequences + Register j_rarg0 = T0; + Register j_rarg1 = A0; + Register j_rarg2 = A1; + Register j_rarg3 = A2; + Register j_rarg4 = A3; + Register j_rarg5 = A4; + + Register temp1 = T8; + Register temp2 = T9; + Register temp3 = V0; + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); + assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); + } + else { + assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP + } + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + + if (iid == vmIntrinsics::_invokeBasic) { + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); + + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); + Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg); + } else { + // load receiver klass itself + __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop(temp2_defc, member_clazz); + load_klass_from_Class(_masm, temp2_defc); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); + // If we get here, the type check failed! + __ STOP("receiver class disagrees with MemberName.clazz"); + __ bind(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument + // temp1_recv_klass - klass of stacked receiver, if needed + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + } + __ ld(rm_method, member_vmtarget); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + } + __ ld(rm_method, member_vmtarget); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ ld(temp2_index, member_vmindex); + + if (VerifyMethodHandles) { + Label L_index_ok; + __ slt(AT, R0, temp2_index); + __ bne(AT, R0, L_index_ok); + __ delayed()->nop(); + __ STOP("no virtual index"); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + } + + Register temp3_intf = temp3; + __ load_heap_oop(temp3_intf, member_clazz); + load_klass_from_Class(_masm, temp3_intf); + __ verify_klass_ptr(temp3_intf); + + Register rm_index = rm_method; + __ ld(rm_index, member_vmindex); + if (VerifyMethodHandles) { + Label L; + __ slt(AT, rm_index, R0); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ STOP("invalid vtable index for MH.invokeInterface"); + __ bind(L); + } + + // given intf, index, and recv klass, dispatch to the implementation method + __ lookup_interface_method(temp1_recv_klass, temp3_intf, + // note: next two args must be the same: + rm_index, rm_method, + temp2, + L_incompatible_class_change_error); + break; + } + + default: + fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + break; + } + + // Live at this point: + // rm_method + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that r_recv be shifted out. + __ verify_method_ptr(rm_method); + jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); + + if (iid == vmIntrinsics::_linkToInterface) { + __ bind(L_incompatible_class_change_error); + address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); + __ jmp(icce_entry, relocInfo::runtime_call_type); + __ delayed()->nop(); + } + } +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oop mh, + intptr_t* saved_regs, + intptr_t* entry_sp) { + // called as a leaf from native code: do not block the JVM! + bool has_mh = (strstr(adaptername, "/static") == NULL && + strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH + const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; + tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, + adaptername, mh_reg_name, + p2i(mh), p2i(entry_sp)); + + if (Verbose) { + tty->print_cr("Registers:"); + const int saved_regs_count = RegisterImpl::number_of_registers; + for (int i = 0; i < saved_regs_count; i++) { + Register r = as_Register(i); + // The registers are stored in reverse order on the stack (by pusha). + tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); + if ((i + 1) % 4 == 0) { + tty->cr(); + } else { + tty->print(", "); + } + } + tty->cr(); + + { + // dumping last frame with frame::describe + + JavaThread* p = JavaThread::active(); + + ResourceMark rm; + PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here + FrameValues values; + + // Note: We want to allow trace_method_handle from any call site. + // While trace_method_handle creates a frame, it may be entered + // without a PC on the stack top (e.g. not just after a call). + // Walking that frame could lead to failures due to that invalid PC. + // => carefully detect that frame when doing the stack walking + + // Current C frame + frame cur_frame = os::current_frame(); + + // Robust search of trace_calling_frame (independant of inlining). + // Assumes saved_regs comes from a pusha in the trace_calling_frame. + assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); + frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); + while (trace_calling_frame.fp() < saved_regs) { + trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); + } + + // safely create a frame and call frame::describe + intptr_t *dump_sp = trace_calling_frame.sender_sp(); + intptr_t *dump_fp = trace_calling_frame.link(); + + bool walkable = has_mh; // whether the traced frame shoud be walkable + + if (walkable) { + // The previous definition of walkable may have to be refined + // if new call sites cause the next frame constructor to start + // failing. Alternatively, frame constructors could be + // modified to support the current or future non walkable + // frames (but this is more intrusive and is not considered as + // part of this RFE, which will instead use a simpler output). + frame dump_frame = frame(dump_sp, dump_fp); + dump_frame.describe(values, 1); + } else { + // Stack may not be walkable (invalid PC above FP): + // Add descriptions without building a Java frame to avoid issues + values.describe(-1, dump_fp, "fp for #1 "); + values.describe(-1, dump_sp, "sp for #1"); + } + values.describe(-1, entry_sp, "raw top of stack"); + + tty->print_cr("Stack layout:"); + values.print(p); + } + if (has_mh && mh->is_oop()) { + mh->print(); + if (java_lang_invoke_MethodHandle::is_instance(mh)) { + if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) + java_lang_invoke_MethodHandle::form(mh)->print(); + } + } + } +} + +// The stub wraps the arguments in a struct on the stack to avoid +// dealing with the different calling conventions for passing 6 +// arguments. +struct MethodHandleStubArguments { + const char* adaptername; + oopDesc* mh; + intptr_t* saved_regs; + intptr_t* entry_sp; +}; +void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { + trace_method_handle_stub(args->adaptername, + args->mh, + args->saved_regs, + args->entry_sp); +} + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { +} +#endif //PRODUCT diff --git a/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp b/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp new file mode 100644 index 00000000000..03b65fc8ef2 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +enum /* platform_dependent_constants */ { + adapter_code_size = 32000 DEBUG_ONLY(+ 150000) +}; + +// Additional helper methods for MethodHandles code generation: +public: + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { + verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + "reference is a MH"); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry); + + static Register saved_last_sp_register() { + // Should be in sharedRuntime, not here. + return I29; + } diff --git a/hotspot/src/cpu/mips/vm/mips.ad b/hotspot/src/cpu/mips/vm/mips.ad new file mode 100644 index 00000000000..3563bbe0e59 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/mips.ad @@ -0,0 +1,25 @@ +// +// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + diff --git a/hotspot/src/cpu/mips/vm/mips_64.ad b/hotspot/src/cpu/mips/vm/mips_64.ad new file mode 100644 index 00000000000..29125913a4f --- /dev/null +++ b/hotspot/src/cpu/mips/vm/mips_64.ad @@ -0,0 +1,14036 @@ +// +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// GodSon3 Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. + +// format: +// reg_def name (call convention, c-call convention, ideal type, encoding); +// call convention : +// NS = No-Save +// SOC = Save-On-Call +// SOE = Save-On-Entry +// AS = Always-Save +// ideal type : +// see opto/opcodes.hpp for more info +// reg_class name (reg, ...); +// alloc_class name (reg, ...); +register %{ + +// General Registers +// Integer Registers + reg_def R0 ( NS, NS, Op_RegI, 0, VMRegImpl::Bad()); + reg_def AT ( NS, NS, Op_RegI, 1, AT->as_VMReg()); + reg_def AT_H ( NS, NS, Op_RegI, 1, AT->as_VMReg()->next()); + reg_def V0 (SOC, SOC, Op_RegI, 2, V0->as_VMReg()); + reg_def V0_H (SOC, SOC, Op_RegI, 2, V0->as_VMReg()->next()); + reg_def V1 (SOC, SOC, Op_RegI, 3, V1->as_VMReg()); + reg_def V1_H (SOC, SOC, Op_RegI, 3, V1->as_VMReg()->next()); + reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); + reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); + reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); + reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); + reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); + reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); + reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); + reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); + reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); + reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); + reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); + reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); + reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); + reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); + reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); + reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); + reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); + reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); + reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); + reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); + reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); + reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); + reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); + reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); + reg_def S0 (SOC, SOE, Op_RegI, 16, S0->as_VMReg()); + reg_def S0_H (SOC, SOE, Op_RegI, 16, S0->as_VMReg()->next()); + reg_def S1 (SOC, SOE, Op_RegI, 17, S1->as_VMReg()); + reg_def S1_H (SOC, SOE, Op_RegI, 17, S1->as_VMReg()->next()); + reg_def S2 (SOC, SOE, Op_RegI, 18, S2->as_VMReg()); + reg_def S2_H (SOC, SOE, Op_RegI, 18, S2->as_VMReg()->next()); + reg_def S3 (SOC, SOE, Op_RegI, 19, S3->as_VMReg()); + reg_def S3_H (SOC, SOE, Op_RegI, 19, S3->as_VMReg()->next()); + reg_def S4 (SOC, SOE, Op_RegI, 20, S4->as_VMReg()); + reg_def S4_H (SOC, SOE, Op_RegI, 20, S4->as_VMReg()->next()); + reg_def S5 (SOC, SOE, Op_RegI, 21, S5->as_VMReg()); + reg_def S5_H (SOC, SOE, Op_RegI, 21, S5->as_VMReg()->next()); + reg_def S6 (SOC, SOE, Op_RegI, 22, S6->as_VMReg()); + reg_def S6_H (SOC, SOE, Op_RegI, 22, S6->as_VMReg()->next()); + reg_def S7 (SOC, SOE, Op_RegI, 23, S7->as_VMReg()); + reg_def S7_H (SOC, SOE, Op_RegI, 23, S7->as_VMReg()->next()); + reg_def T8 (SOC, SOC, Op_RegI, 24, T8->as_VMReg()); + reg_def T8_H (SOC, SOC, Op_RegI, 24, T8->as_VMReg()->next()); + reg_def T9 (SOC, SOC, Op_RegI, 25, T9->as_VMReg()); + reg_def T9_H (SOC, SOC, Op_RegI, 25, T9->as_VMReg()->next()); + +// Special Registers + reg_def K0 ( NS, NS, Op_RegI, 26, K0->as_VMReg()); + reg_def K1 ( NS, NS, Op_RegI, 27, K1->as_VMReg()); + reg_def GP ( NS, NS, Op_RegI, 28, GP->as_VMReg()); + reg_def GP_H ( NS, NS, Op_RegI, 28, GP->as_VMReg()->next()); + reg_def SP ( NS, NS, Op_RegI, 29, SP->as_VMReg()); + reg_def SP_H ( NS, NS, Op_RegI, 29, SP->as_VMReg()->next()); + reg_def FP ( NS, NS, Op_RegI, 30, FP->as_VMReg()); + reg_def FP_H ( NS, NS, Op_RegI, 30, FP->as_VMReg()->next()); + reg_def RA ( NS, NS, Op_RegI, 31, RA->as_VMReg()); + reg_def RA_H ( NS, NS, Op_RegI, 31, RA->as_VMReg()->next()); + +// Floating registers. +reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()); +reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()); +reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()); +reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()); +reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()); +reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()); +reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()); +reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()); +reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()); +reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()); +reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()); +reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()); +reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()); +reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()); +reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()); +reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()); +reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()); +reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()); +reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()); +reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()); +reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()); +reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()); +reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()); +reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()); +reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()); +reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()); +reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()); +reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()); +reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()); +reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()); +reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()); +reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()); +reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()); +reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()); +reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()); +reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()); +reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()); +reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()); +reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()); +reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()); +reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()); +reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()); +reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()); +reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()); +reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()); +reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()); +reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()); +reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()); +reg_def F24 ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()); +reg_def F24_H ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next()); +reg_def F25 ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()); +reg_def F25_H ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next()); +reg_def F26 ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()); +reg_def F26_H ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next()); +reg_def F27 ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()); +reg_def F27_H ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next()); +reg_def F28 ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()); +reg_def F28_H ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next()); +reg_def F29 ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()); +reg_def F29_H ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next()); +reg_def F30 ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()); +reg_def F30_H ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next()); +reg_def F31 ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()); +reg_def F31_H ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next()); + + +// ---------------------------- +// Special Registers +//S6 is used for get_thread(S6) +//S5 is uesd for heapbase of compressed oop +alloc_class chunk0( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S5, S5_H, + S6, S6_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T8, T8_H, + T9, T9_H, + T1, T1_H, // inline_cache_reg + V1, V1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + V0, V0_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H, + GP, GP_H + RA, RA_H, + SP, SP_H, // stack_pointer + FP, FP_H // frame_pointer + ); + +alloc_class chunk1( F0, F0_H, + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F8, F8_H, + F9, F9_H, + F10, F10_H, + F11, F11_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F23, F23_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, + F27, F27_H, + F28, F28_H, + F19, F19_H, + F18, F18_H, + F17, F17_H, + F16, F16_H, + F15, F15_H, + F14, F14_H, + F13, F13_H, + F12, F12_H, + F29, F29_H, + F30, F30_H, + F31, F31_H); + +reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); +reg_class s0_reg( S0 ); +reg_class s1_reg( S1 ); +reg_class s2_reg( S2 ); +reg_class s3_reg( S3 ); +reg_class s4_reg( S4 ); +reg_class s5_reg( S5 ); +reg_class s6_reg( S6 ); +reg_class s7_reg( S7 ); + +reg_class t_reg( T0, T1, T2, T3, T8, T9 ); +reg_class t0_reg( T0 ); +reg_class t1_reg( T1 ); +reg_class t2_reg( T2 ); +reg_class t3_reg( T3 ); +reg_class t8_reg( T8 ); +reg_class t9_reg( T9 ); + +reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); +reg_class a0_reg( A0 ); +reg_class a1_reg( A1 ); +reg_class a2_reg( A2 ); +reg_class a3_reg( A3 ); +reg_class a4_reg( A4 ); +reg_class a5_reg( A5 ); +reg_class a6_reg( A6 ); +reg_class a7_reg( A7 ); + +reg_class v0_reg( V0 ); +reg_class v1_reg( V1 ); + +reg_class sp_reg( SP, SP_H ); +reg_class fp_reg( FP, FP_H ); + +reg_class v0_long_reg( V0, V0_H ); +reg_class v1_long_reg( V1, V1_H ); +reg_class a0_long_reg( A0, A0_H ); +reg_class a1_long_reg( A1, A1_H ); +reg_class a2_long_reg( A2, A2_H ); +reg_class a3_long_reg( A3, A3_H ); +reg_class a4_long_reg( A4, A4_H ); +reg_class a5_long_reg( A5, A5_H ); +reg_class a6_long_reg( A6, A6_H ); +reg_class a7_long_reg( A7, A7_H ); +reg_class t0_long_reg( T0, T0_H ); +reg_class t1_long_reg( T1, T1_H ); +reg_class t2_long_reg( T2, T2_H ); +reg_class t3_long_reg( T3, T3_H ); +reg_class t8_long_reg( T8, T8_H ); +reg_class t9_long_reg( T9, T9_H ); +reg_class s0_long_reg( S0, S0_H ); +reg_class s1_long_reg( S1, S1_H ); +reg_class s2_long_reg( S2, S2_H ); +reg_class s3_long_reg( S3, S3_H ); +reg_class s4_long_reg( S4, S4_H ); +reg_class s5_long_reg( S5, S5_H ); +reg_class s6_long_reg( S6, S6_H ); +reg_class s7_long_reg( S7, S7_H ); + +reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 ); + +reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 ); + +reg_class p_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T8, T8_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H + ); + +reg_class no_T8_p_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H + ); + +reg_class long_reg( + S7, S7_H, + S0, S0_H, + S1, S1_H, + S2, S2_H, + S4, S4_H, + S3, S3_H, + T8, T8_H, + T2, T2_H, + T3, T3_H, + T1, T1_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, + A4, A4_H, + A3, A3_H, + A2, A2_H, + A1, A1_H, + A0, A0_H, + T0, T0_H + ); + + +// Floating point registers. +// F31 are not used as temporary registers in D2I +reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31); +reg_class dbl_reg( F0, F0_H, + F1, F1_H, + F2, F2_H, + F3, F3_H, + F4, F4_H, + F5, F5_H, + F6, F6_H, + F7, F7_H, + F8, F8_H, + F9, F9_H, + F10, F10_H, + F11, F11_H, + F12, F12_H, + F13, F13_H, + F14, F14_H, + F15, F15_H, + F16, F16_H, + F17, F17_H, + F18, F18_H, + F19, F19_H, + F20, F20_H, + F21, F21_H, + F22, F22_H, + F23, F23_H, + F24, F24_H, + F25, F25_H, + F26, F26_H, + F27, F27_H, + F28, F28_H, + F29, F29_H, + F31, F31_H); + +reg_class flt_arg0( F12 ); +reg_class dbl_arg0( F12, F12_H ); +reg_class dbl_arg1( F14, F14_H ); + +%} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def ( , ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// +definitions %{ + int_def DEFAULT_COST ( 100, 100); + int_def HUGE_COST (1000000, 1000000); + + // Memory refs are twice as expensive as run-of-the-mill. + int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); + + // Branches are even more expensive. + int_def BRANCH_COST ( 300, DEFAULT_COST * 3); + // we use jr instruction to construct call, so more expensive + int_def CALL_COST ( 500, DEFAULT_COST * 5); +/* + int_def EQUAL ( 1, 1 ); + int_def NOT_EQUAL ( 2, 2 ); + int_def GREATER ( 3, 3 ); + int_def GREATER_EQUAL ( 4, 4 ); + int_def LESS ( 5, 5 ); + int_def LESS_EQUAL ( 6, 6 ); +*/ +%} + + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description + +source_hpp %{ +// Header information of the source block. +// Method declarations/definitions which are used outside +// the ad-scope can conveniently be defined here. +// +// To keep related declarations/definitions/uses close together, +// we switch between source %{ }% and source_hpp %{ }% freely as needed. + +class CallStubImpl { + + //-------------------------------------------------------------- + //---< Used for optimization in Compile::shorten_branches >--- + //-------------------------------------------------------------- + + public: + // Size of call trampoline stub. + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // number of relocations needed by a call trampoline stub + static uint reloc_call_trampoline() { + return 0; // no call trampolines on this platform + } +}; + +class HandlerImpl { + + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { + // NativeCall instruction size is the same as NativeJump. + // exception handler starts out as jump and can be patched to + // a call be deoptimization. (4932387) + // Note that this value is also credited (in output.cpp) to + // the size of the code section. + int size = NativeCall::instruction_size; + return round_to(size, 16); + } + + static uint size_deopt_handler() { + int size = NativeCall::instruction_size; + return round_to(size, 16); + } +}; + +%} // end source_hpp + +source %{ + +#define NO_INDEX 0 +#define RELOC_IMM64 Assembler::imm_operand +#define RELOC_DISP32 Assembler::disp32_operand + + +#define __ _masm. + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + + +// Emit exception handler code. +// Stuff framesize into a register and call a VM stub routine. +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + __ block_comment("; emit_exception_handler"); + + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); + __ align(16); + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + +// Emit deopt handler code. +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a handler. + MacroAssembler _masm(&cbuf); + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + __ block_comment("; emit_deopt_handler"); + + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_call(SharedRuntime::deopt_blob()->unpack()); + __ align(16); + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); + __ end_a_stub(); + return offset; +} + + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + switch (opcode) { + //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz. + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + if (!UseCountLeadingZerosInstructionMIPS64) + return false; + break; + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + if (!UseCountTrailingZerosInstructionMIPS64) + return false; + break; + } + + return true; // Per default match rules are supported. +} + +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + int offs = offset - br_size + 4; + // To be conservative on MIPS + // branch node should be end with: + // branch inst + // delay slot + const int safety_zone = 3 * BytesPerInstWord; + return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2); +} + + +// No additional cost for CMOVL. +const int Matcher::long_cmove_cost() { return 0; } + +// No CMOVF/CMOVD with SSE2 +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } + +// Does the CPU require late expand (see block.cpp for description of late expand)? +const bool Matcher::require_postalloc_expand = false; + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? True for Intel but false for most RISCs +const bool Matcher::clone_shift_expressions = false; + +// Do we need to mask the count passed to shift instructions or does +// the cpu only look at the lower 5/6 bits anyway? +const bool Matcher::need_masked_shift_count = false; + +bool Matcher::narrow_oop_use_complex_address() { + assert(UseCompressedOops, "only for compressed oops code"); + return false; +} + +bool Matcher::narrow_klass_use_complex_address() { + assert(UseCompressedClassPointers, "only for compressed klass code"); + return false; +} + +// This is UltraSparc specific, true just means we have fast l2f conversion +const bool Matcher::convL2FSupported(void) { + return true; +} + +// Max vector size in bytes. 0 if not supported. +const int Matcher::vector_width_in_bytes(BasicType bt) { + if (MaxVectorSize == 0) + return 0; + assert(MaxVectorSize == 8, ""); + return 8; +} + +// Vector ideal reg +const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize == 8, ""); + switch(size) { + case 8: return Op_VecD; + } + ShouldNotReachHere(); + return 0; +} + +// Only lowest bits of xmm reg are used for vector shift count. +const uint Matcher::vector_shift_count_ideal_reg(int size) { + fatal("vector shift is not supported"); + return Node::NotAMachineReg; +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + return max_vector_size(bt); // Same as max. +} + +// MIPS supports misaligned vectors store/load? FIXME +const bool Matcher::misaligned_vectors_ok() { + return false; + //return !AlignVector; // can be changed by flag +} + +// Register for DIVI projection of divmodI +RegMask Matcher::divI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODI projection of divmodI +RegMask Matcher::modI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for DIVL projection of divmodL +RegMask Matcher::divL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +int Matcher::regnum_to_fpu_offset(int regnum) { + return regnum - 32; // The FP registers are in the second chunk +} + + +const bool Matcher::isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + return true; +} + + +// Return whether or not this register is ever used as an argument. This +// function is used on startup to build the trampoline stubs in generateOptoStub. +// Registers not mentioned will be killed by the VM call in the trampoline, and +// arguments in those registers not be available to the callee. +bool Matcher::can_be_java_arg( int reg ) { + // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention() + if ( reg == T0_num || reg == T0_H_num + || reg == A0_num || reg == A0_H_num + || reg == A1_num || reg == A1_H_num + || reg == A2_num || reg == A2_H_num + || reg == A3_num || reg == A3_H_num + || reg == A4_num || reg == A4_H_num + || reg == A5_num || reg == A5_H_num + || reg == A6_num || reg == A6_H_num + || reg == A7_num || reg == A7_H_num ) + return true; + + if ( reg == F12_num || reg == F12_H_num + || reg == F13_num || reg == F13_H_num + || reg == F14_num || reg == F14_H_num + || reg == F15_num || reg == F15_H_num + || reg == F16_num || reg == F16_H_num + || reg == F17_num || reg == F17_H_num + || reg == F18_num || reg == F18_H_num + || reg == F19_num || reg == F19_H_num ) + return true; + + return false; +} + +bool Matcher::is_spillable_arg( int reg ) { + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { + return false; +} + +// Register for MODL projection of divmodL +RegMask Matcher::modL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return FP_REG_mask(); +} + +// MIPS doesn't support AES intrinsics +const bool Matcher::pass_original_key_for_aes() { + return false; +} + +int CallStaticJavaDirectNode::compute_padding(int current_offset) const { + return round_to(current_offset, alignment_required()) - current_offset; +} + +int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { + return round_to(current_offset, alignment_required()) - current_offset; +} + +int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { + return round_to(current_offset, alignment_required()) - current_offset; +} + +int CallLeafDirectNode::compute_padding(int current_offset) const { + return round_to(current_offset, alignment_required()) - current_offset; +} + +int CallRuntimeDirectNode::compute_padding(int current_offset) const { + return round_to(current_offset, alignment_required()) - current_offset; +} + +// If CPU can load and store mis-aligned doubles directly then no fixup is +// needed. Else we split the double into 2 integer pieces and move it +// piece-by-piece. Only happens when passing doubles into C code as the +// Java calling convention forces doubles to be aligned. +const bool Matcher::misaligned_doubles_ok = false; +// Do floats take an entire double register or just half? +//const bool Matcher::float_in_double = true; +bool Matcher::float_in_double() { return false; } +// Threshold size for cleararray. +const int Matcher::init_array_short_size = 8 * BytesPerLong; +// Do ints take an entire long register or just half? +const bool Matcher::int_in_long = true; +// Is it better to copy float constants, or load them directly from memory? +// Intel can load a float constant from a direct address, requiring no +// extra registers. Most RISCs will have to materialize an address into a +// register first, so they would do better to copy the constant from stack. +const bool Matcher::rematerialize_float_constants = false; +// Advertise here if the CPU requires explicit rounding operations +// to implement the UseStrictFP mode. +const bool Matcher::strict_fp_requires_explicit_rounding = false; +// false => size gets scaled to BytesPerLong, ok. +const bool Matcher::init_array_count_is_in_bytes = false; + +// Indicate if the safepoint node needs the polling page as an input. +// Since MIPS doesn't have absolute addressing, it needs. +bool SafePointNode::needs_polling_address_input() { + return false; +} + +// !!!!! Special hack to get all type of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. +int MachCallStaticJavaNode::ret_addr_offset() { + //lui + //ori + //nop + //nop + //jalr + //nop + return 24; +} + +int MachCallDynamicJavaNode::ret_addr_offset() { + //lui IC_Klass, + //ori IC_Klass, + //dsll IC_Klass + //ori IC_Klass + + //lui T9 + //ori T9 + //nop + //nop + //jalr T9 + //nop + return 4 * 4 + 4 * 6; +} + +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float, rc_stack +enum RC { rc_bad, rc_int, rc_float, rc_stack }; +static enum RC rc_class( OptoReg::Name reg ) { + if( !OptoReg::is_valid(reg) ) return rc_bad; + if (OptoReg::is_stack(reg)) return rc_stack; + VMReg r = OptoReg::as_VMReg(reg); + if (r->is_Register()) return rc_int; + assert(r->is_FloatRegister(), "must be"); + return rc_float; +} + +uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { + // Get registers to move + OptoReg::Name src_second = ra_->get_reg_second(in(1)); + OptoReg::Name src_first = ra_->get_reg_first(in(1)); + OptoReg::Name dst_second = ra_->get_reg_second(this ); + OptoReg::Name dst_first = ra_->get_reg_first(this ); + + enum RC src_second_rc = rc_class(src_second); + enum RC src_first_rc = rc_class(src_first); + enum RC dst_second_rc = rc_class(dst_second); + enum RC dst_first_rc = rc_class(dst_first); + + assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); + + // Generate spill code! + int size = 0; + + if( src_first == dst_first && src_second == dst_second ) + return 0; // Self copy, no move + + if (src_first_rc == rc_stack) { + // mem -> + if (dst_first_rc == rc_stack) { + // mem -> mem + assert(src_second != dst_first, "overlap"); + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ld(AT, Address(SP, src_offset)); + __ sd(AT, Address(SP, dst_offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("ld AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" + "sd AT, [SP + #%d]", + src_offset, dst_offset); + } +#endif + } + size += 8; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + // No pushl/popl, so: + int src_offset = ra_->reg2offset(src_first); + int dst_offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ lw(AT, Address(SP, src_offset)); + __ sw(AT, Address(SP, dst_offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("lw AT, [SP + #%d] spill 2\n\t" + "sw AT, [SP + #%d]\n\t", + src_offset, dst_offset); + } +#endif + } + size += 8; + } + return size; + } else if (dst_first_rc == rc_int) { + // mem -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("ld %s, [SP + #%d]\t# spill 3", + Matcher::regName[dst_first], + offset); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + if (this->ideal_reg() == Op_RegI) + __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); + else + __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + if (this->ideal_reg() == Op_RegI) + st->print("lw %s, [SP + #%d]\t# spill 4", + Matcher::regName[dst_first], + offset); + else + st->print("lwu %s, [SP + #%d]\t# spill 5", + Matcher::regName[dst_first], + offset); + } +#endif + } + size += 4; + } + return size; + } else if (dst_first_rc == rc_float) { + // mem-> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("ldc1 %s, [SP + #%d]\t# spill 6", + Matcher::regName[dst_first], + offset); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(src_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("lwc1 %s, [SP + #%d]\t# spill 7", + Matcher::regName[dst_first], + offset); + } +#endif + } + size += 4; + } + return size; + } + } else if (src_first_rc == rc_int) { + // gpr -> + if (dst_first_rc == rc_stack) { + // gpr -> mem + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("sd %s, [SP + #%d] # spill 8", + Matcher::regName[src_first], + offset); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("sw %s, [SP + #%d]\t# spill 9", + Matcher::regName[src_first], offset); + } +#endif + } + size += 4; + } + return size; + } else if (dst_first_rc == rc_int) { + // gpr -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ move(as_Register(Matcher::_regEncode[dst_first]), + as_Register(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("move(64bit) %s <-- %s\t# spill 10", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + return size; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + if (this->ideal_reg() == Op_RegI) + __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); + else + __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); +#ifndef PRODUCT + } else { + if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("move(32-bit) %s <-- %s\t# spill 11", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + return size; + } + } else if (dst_first_rc == rc_float) { + // gpr -> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("dmtc1 %s, %s\t# spill 12", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) ); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("mtc1 %s, %s\t# spill 13", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + } + return size; + } + } else if (src_first_rc == rc_float) { + // xmm -> + if (dst_first_rc == rc_stack) { + // xmm -> mem + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("sdc1 %s, [SP + #%d]\t# spill 14", + Matcher::regName[src_first], + offset); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + int offset = ra_->reg2offset(dst_first); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("swc1 %s, [SP + #%d]\t# spill 15", + Matcher::regName[src_first], + offset); + } +#endif + } + size += 4; + } + return size; + } else if (dst_first_rc == rc_int) { + // xmm -> gpr + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("dmfc1 %s, %s\t# spill 16", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("mfc1 %s, %s\t# spill 17", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + } + return size; + } else if (dst_first_rc == rc_float) { + // xmm -> xmm + if ((src_first & 1) == 0 && src_first + 1 == src_second && + (dst_first & 1) == 0 && dst_first + 1 == dst_second) { + // 64-bit + if (cbuf) { + MacroAssembler _masm(cbuf); + __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("mov_d %s <-- %s\t# spill 18", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + } else { + // 32-bit + assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); + assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); + if (cbuf) { + MacroAssembler _masm(cbuf); + __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else { + if(!do_size){ + if (size != 0) st->print("\n\t"); + st->print("mov_s %s <-- %s\t# spill 19", + Matcher::regName[dst_first], + Matcher::regName[src_first]); + } +#endif + } + size += 4; + } + return size; + } + } + + assert(0," foo "); + Unimplemented(); + return size; + +} + +#ifndef PRODUCT +void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + implementation( NULL, ra_, false, st ); +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation( &cbuf, ra_, false, NULL ); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +//============================================================================= +# + +#ifndef PRODUCT +void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { + st->print("BRK"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { + MacroAssembler _masm(&cbuf); + __ brk(5); +} + +uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { + return MachNode::size(ra_); +} + + +//============================================================================= +#ifndef PRODUCT +void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + Compile *C = ra_->C; + int framesize = C->frame_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + st->print_cr("daddiu SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); + st->print("\t"); + if (UseLEXT1) { + st->print_cr("gslq RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2); + } else { + st->print_cr("ld RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); + st->print("\t"); + st->print_cr("ld FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); + } + + if( do_polling() && C->is_method_compilation() ) { + st->print("\t"); + st->print_cr("Poll Safepoint # MachEpilogNode"); + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile *C = ra_->C; + MacroAssembler _masm(&cbuf); + int framesize = C->frame_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + if (UseLEXT1) { + __ gslq(RA, FP, SP, framesize - wordSize * 2); + } else { + __ ld(RA, SP, framesize - wordSize ); + __ ld(FP, SP, framesize - wordSize * 2); + } + __ daddiu(SP, SP, framesize); + + if( do_polling() && C->is_method_compilation() ) { + __ set64(AT, (long)os::get_polling_page()); + __ relocate(relocInfo::poll_return_type); + __ lw(AT, AT, 0); + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); // too many variables; just compute it the hard way fujie debug +} + +int MachEpilogNode::reloc() const { + return 0; // a large enough number +} + +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +int MachEpilogNode::safepoint_offset() const { return 0; } + +//============================================================================= + +#ifndef PRODUCT +void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("ADDI %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); +} +#endif + + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + return 4; +} + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + + __ addiu(as_Register(reg), SP, offset); +} + + +//static int sizeof_FFree_Float_Stack_All = -1; + +int MachCallRuntimeNode::ret_addr_offset() { + //lui + //ori + //dsll + //ori + //jalr + //nop + assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()"); + return NativeCall::instruction_size; +} + + +//============================================================================= +#ifndef PRODUCT +void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { + st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); +} +#endif + +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { + MacroAssembler _masm(&cbuf); + int i = 0; + for(i = 0; i < _count; i++) + __ nop(); +} + +uint MachNopNode::size(PhaseRegAlloc *) const { + return 4 * _count; +} +const Pipeline* MachNopNode::pipeline() const { + return MachNode::pipeline_class(); +} + +//============================================================================= + +//============================================================================= +#ifndef PRODUCT +void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + st->print_cr("load_klass(T9, T0)"); + st->print_cr("\tbeq(T9, iCache, L)"); + st->print_cr("\tnop"); + st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); + st->print_cr("\tnop"); + st->print_cr("\tnop"); + st->print_cr(" L:"); +} +#endif + + +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + int ic_reg = Matcher::inline_cache_reg_encode(); + Label L; + Register receiver = T0; + Register iCache = as_Register(ic_reg); + + __ load_klass(T9, receiver); + __ beq(T9, iCache, L); + __ delayed()->nop(); + __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ bind(L); +} + +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + + + +//============================================================================= + +const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); + +int Compile::ConstantTable::calculate_table_base_offset() const { + return 0; // absolute addressing, no offset +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + Compile* C = ra_->C; + Compile::ConstantTable& constant_table = C->constant_table(); + MacroAssembler _masm(&cbuf); + + Register Rtoc = as_Register(ra_->get_encode(this)); + CodeSection* consts_section = __ code()->consts(); + int consts_size = consts_section->align_at_start(consts_section->size()); + assert(constant_table.size() == consts_size, "must be equal"); + + if (consts_section->size()) { + // Materialize the constant table base. + address baseaddr = consts_section->start() + -(constant_table.table_base_offset()); + // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr); + __ relocate(relocInfo::internal_word_type); + __ patchable_set48(Rtoc, (long)baseaddr); + } +} + +uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { + // patchable_set48 (4 insts) + return 4 * 4; +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + Register r = as_Register(ra_->get_encode(this)); + st->print("patchable_set48 %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); +} +#endif + + +//============================================================================= +#ifndef PRODUCT +void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { + Compile* C = ra_->C; + + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(bangsize)) { + st->print_cr("# stack bang"); st->print("\t"); + } + if (UseLEXT1) { + st->print("gssq RA, FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); + } else { + st->print("sd RA, %d(SP) @ MachPrologNode\n\t", -wordSize); + st->print("sd FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); + } + st->print("daddiu FP, SP, -%d \n\t", wordSize*2); + st->print("daddiu SP, SP, -%d \t",framesize); +} +#endif + + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + + // Make enough room for patch_verified_entry + __ nop(); + __ nop(); + + if (C->need_stack_bang(bangsize)) { + __ generate_stack_overflow_check(bangsize); + } + + __ daddiu(SP, SP, -framesize); + if (UseLEXT1) { + __ gssq(RA, FP, SP, framesize - wordSize * 2); + } else { + __ sd(RA, SP, framesize - wordSize); + __ sd(FP, SP, framesize - wordSize * 2); + } + __ daddiu(FP, SP, framesize - wordSize * 2); + + C->set_frame_complete(cbuf.insts_size()); + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + Compile::ConstantTable& constant_table = C->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + + +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); // too many variables; just compute it the hard way +} + +int MachPrologNode::reloc() const { + return 0; // a large enough number +} + +%} + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to output +// byte streams. Encoding classes generate functions which are called by +// Machine Instruction Nodes in order to generate the bit encoding of the +// instruction. Operands specify their base encoding interface with the +// interface keyword. There are currently supported four interfaces, +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an +// operand to generate a function which returns its register number when +// queried. CONST_INTER causes an operand to generate a function which +// returns the value of the constant when queried. MEMORY_INTER causes an +// operand to generate four functions which return the Base Register, the +// Index Register, the Scale Value, and the Offset Value of the operand when +// queried. COND_INTER causes an operand to generate six functions which +// return the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional instruction. +// Instructions specify two basic values for encoding. They use the +// ins_encode keyword to specify their encoding class (which must be one of +// the class names specified in the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular instruction +// needs for encoding need to be specified. +encode %{ + + //Load byte signed + enc_class load_B_enc (mRegI dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int dst = $dst$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if( Assembler::is_simm16(disp) ) { + if (UseLEXT1) { + if (scale == 0) { + __ gslbx(as_Register(dst), as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gslbx(as_Register(dst), as_Register(base), AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ lb(as_Register(dst), AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ move(T9, disp); + if (UseLEXT1) { + __ gslbx(as_Register(dst), AT, T9, 0); + } else { + __ addu(AT, AT, T9); + __ lb(as_Register(dst), AT, 0); + } + } + } else { + if( Assembler::is_simm16(disp) ) { + __ lb(as_Register(dst), as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gslbx(as_Register(dst), as_Register(base), T9, 0); + } else { + __ addu(AT, as_Register(base), T9); + __ lb(as_Register(dst), AT, 0); + } + } + } + %} + + //Load byte unsigned + enc_class load_UB_enc (mRegI dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int dst = $dst$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ lbu(as_Register(dst), AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ lbu(as_Register(dst), AT, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ lbu(as_Register(dst), as_Register(base), disp); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ lbu(as_Register(dst), AT, 0); + } + } + %} + + enc_class store_B_reg_enc (memory mem, mRegI src) %{ + MacroAssembler _masm(&cbuf); + int src = $src$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (scale == 0) { + if( Assembler::is_simm(disp, 8) ) { + if (UseLEXT1) { + __ gssbx(as_Register(src), as_Register(base), as_Register(index), disp); + } else { + __ addu(AT, as_Register(base), as_Register(index)); + __ sb(as_Register(src), AT, disp); + } + } else if( Assembler::is_simm16(disp) ) { + __ addu(AT, as_Register(base), as_Register(index)); + __ sb(as_Register(src), AT, disp); + } else { + __ addu(AT, as_Register(base), as_Register(index)); + __ move(T9, disp); + if (UseLEXT1) { + __ gssbx(as_Register(src), AT, T9, 0); + } else { + __ addu(AT, AT, T9); + __ sb(as_Register(src), AT, 0); + } + } + } else { + __ dsll(AT, as_Register(index), scale); + if( Assembler::is_simm(disp, 8) ) { + if (UseLEXT1) { + __ gssbx(as_Register(src), AT, as_Register(base), disp); + } else { + __ addu(AT, as_Register(base), AT); + __ sb(as_Register(src), AT, disp); + } + } else if( Assembler::is_simm16(disp) ) { + __ addu(AT, as_Register(base), AT); + __ sb(as_Register(src), AT, disp); + } else { + __ addu(AT, as_Register(base), AT); + __ move(T9, disp); + if (UseLEXT1) { + __ gssbx(as_Register(src), AT, T9, 0); + } else { + __ addu(AT, AT, T9); + __ sb(as_Register(src), AT, 0); + } + } + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sb(as_Register(src), as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gssbx(as_Register(src), as_Register(base), T9, 0); + } else { + __ addu(AT, as_Register(base), T9); + __ sb(as_Register(src), AT, 0); + } + } + } + %} + + enc_class store_B_immI_enc (memory mem, immI8 src) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + int value = $src$$constant; + + if( index != 0 ) { + if (!UseLEXT1) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + if (value == 0) { + __ sb(R0, AT, disp); + } else { + __ move(T9, value); + __ sb(T9, AT, disp); + } + } else { + if (value == 0) { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ sb(R0, AT, 0); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ move(T9, value); + __ sb(T9, AT, 0); + } + } + } else { + + if (scale == 0) { + if( Assembler::is_simm(disp, 8) ) { + if (value == 0) { + __ gssbx(R0, as_Register(base), as_Register(index), disp); + } else { + __ move(T9, value); + __ gssbx(T9, as_Register(base), as_Register(index), disp); + } + } else if( Assembler::is_simm16(disp) ) { + __ daddu(AT, as_Register(base), as_Register(index)); + if (value == 0) { + __ sb(R0, AT, disp); + } else { + __ move(T9, value); + __ sb(T9, AT, disp); + } + } else { + if (value == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + __ move(T9, disp); + __ gssbx(R0, AT, T9, 0); + } else { + __ move(AT, disp); + __ move(T9, value); + __ daddu(AT, as_Register(base), AT); + __ gssbx(T9, AT, as_Register(index), 0); + } + } + + } else { + + if( Assembler::is_simm(disp, 8) ) { + __ dsll(AT, as_Register(index), scale); + if (value == 0) { + __ gssbx(R0, as_Register(base), AT, disp); + } else { + __ move(T9, value); + __ gssbx(T9, as_Register(base), AT, disp); + } + } else if( Assembler::is_simm16(disp) ) { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + if (value == 0) { + __ sb(R0, AT, disp); + } else { + __ move(T9, value); + __ sb(T9, AT, disp); + } + } else { + __ dsll(AT, as_Register(index), scale); + if (value == 0) { + __ daddu(AT, as_Register(base), AT); + __ move(T9, disp); + __ gssbx(R0, AT, T9, 0); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ move(T9, value); + __ gssbx(T9, as_Register(base), AT, 0); + } + } + } + } + } else { + if( Assembler::is_simm16(disp) ) { + if (value == 0) { + __ sb(R0, as_Register(base), disp); + } else { + __ move(AT, value); + __ sb(AT, as_Register(base), disp); + } + } else { + if (value == 0) { + __ move(T9, disp); + if (UseLEXT1) { + __ gssbx(R0, as_Register(base), T9, 0); + } else { + __ daddu(AT, as_Register(base), T9); + __ sb(R0, AT, 0); + } + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ move(AT, value); + __ gssbx(AT, as_Register(base), T9, 0); + } else { + __ daddu(AT, as_Register(base), T9); + __ move(T9, value); + __ sb(T9, AT, 0); + } + } + } + } + %} + + + enc_class store_B_immI_enc_sync (memory mem, immI8 src) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + int value = $src$$constant; + + if( index != 0 ) { + if (UseLEXT1) { + if ( Assembler::is_simm(disp,8) ) { + if ( scale == 0 ) { + if ( value == 0 ) { + __ gssbx(R0, as_Register(base), as_Register(index), disp); + } else { + __ move(AT, value); + __ gssbx(AT, as_Register(base), as_Register(index), disp); + } + } else { + __ dsll(AT, as_Register(index), scale); + if ( value == 0 ) { + __ gssbx(R0, as_Register(base), AT, disp); + } else { + __ move(T9, value); + __ gssbx(T9, as_Register(base), AT, disp); + } + } + } else if ( Assembler::is_simm16(disp) ) { + if ( scale == 0 ) { + __ daddu(AT, as_Register(base), as_Register(index)); + if ( value == 0 ){ + __ sb(R0, AT, disp); + } else { + __ move(T9, value); + __ sb(T9, AT, disp); + } + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + if ( value == 0 ) { + __ sb(R0, AT, disp); + } else { + __ move(T9, value); + __ sb(T9, AT, disp); + } + } + } else { + if ( scale == 0 ) { + __ move(AT, disp); + __ daddu(AT, as_Register(index), AT); + if ( value == 0 ) { + __ gssbx(R0, as_Register(base), AT, 0); + } else { + __ move(T9, value); + __ gssbx(T9, as_Register(base), AT, 0); + } + } else { + __ dsll(AT, as_Register(index), scale); + __ move(T9, disp); + __ daddu(AT, AT, T9); + if ( value == 0 ) { + __ gssbx(R0, as_Register(base), AT, 0); + } else { + __ move(T9, value); + __ gssbx(T9, as_Register(base), AT, 0); + } + } + } + } else { //not use loongson isa + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + if (value == 0) { + __ sb(R0, AT, disp); + } else { + __ move(T9, value); + __ sb(T9, AT, disp); + } + } else { + if (value == 0) { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ sb(R0, AT, 0); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ move(T9, value); + __ sb(T9, AT, 0); + } + } + } + } else { + if (UseLEXT1){ + if ( Assembler::is_simm16(disp) ){ + if ( value == 0 ) { + __ sb(R0, as_Register(base), disp); + } else { + __ move(AT, value); + __ sb(AT, as_Register(base), disp); + } + } else { + __ move(AT, disp); + if ( value == 0 ) { + __ gssbx(R0, as_Register(base), AT, 0); + } else { + __ move(T9, value); + __ gssbx(T9, as_Register(base), AT, 0); + } + } + } else { + if( Assembler::is_simm16(disp) ) { + if (value == 0) { + __ sb(R0, as_Register(base), disp); + } else { + __ move(AT, value); + __ sb(AT, as_Register(base), disp); + } + } else { + if (value == 0) { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ sb(R0, AT, 0); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ move(T9, value); + __ sb(T9, AT, 0); + } + } + } + } + + __ sync(); + %} + + // Load Short (16bit signed) + enc_class load_S_enc (mRegI dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int dst = $dst$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (UseLEXT1) { + if ( Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + __ gslhx(as_Register(dst), as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gslhx(as_Register(dst), as_Register(base), AT, disp); + } + } else if ( Assembler::is_simm16(disp) ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + __ lh(as_Register(dst), AT, disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + __ lh(as_Register(dst), AT, disp); + } + } else { + if (scale == 0) { + __ move(AT, disp); + __ daddu(AT, as_Register(index), AT); + __ gslhx(as_Register(dst), as_Register(base), AT, 0); + } else { + __ dsll(AT, as_Register(index), scale); + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ gslhx(as_Register(dst), as_Register(base), AT, 0); + } + } + } else { // not use loongson isa + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ lh(as_Register(dst), AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ lh(as_Register(dst), AT, 0); + } + } + } else { // index is 0 + if (UseLEXT1) { + if ( Assembler::is_simm16(disp) ) { + __ lh(as_Register(dst), as_Register(base), disp); + } else { + __ move(T9, disp); + __ gslhx(as_Register(dst), as_Register(base), T9, 0); + } + } else { //not use loongson isa + if( Assembler::is_simm16(disp) ) { + __ lh(as_Register(dst), as_Register(base), disp); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ lh(as_Register(dst), AT, 0); + } + } + } + %} + + // Load Char (16bit unsigned) + enc_class load_C_enc (mRegI dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int dst = $dst$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ lhu(as_Register(dst), AT, disp); + } else { + __ move(T9, disp); + __ addu(AT, AT, T9); + __ lhu(as_Register(dst), AT, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ lhu(as_Register(dst), as_Register(base), disp); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ lhu(as_Register(dst), AT, 0); + } + } + %} + + // Store Char (16bit unsigned) + enc_class store_C_reg_enc (memory mem, mRegI src) %{ + MacroAssembler _masm(&cbuf); + int src = $src$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if( Assembler::is_simm16(disp) ) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + __ gsshx(as_Register(src), as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gsshx(as_Register(src), as_Register(base), AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ sh(as_Register(src), AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ move(T9, disp); + if (UseLEXT1) { + __ gsshx(as_Register(src), AT, T9, 0); + } else { + __ addu(AT, AT, T9); + __ sh(as_Register(src), AT, 0); + } + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sh(as_Register(src), as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gsshx(as_Register(src), as_Register(base), T9, 0); + } else { + __ addu(AT, as_Register(base), T9); + __ sh(as_Register(src), AT, 0); + } + } + } + %} + + enc_class store_C0_enc (memory mem) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if ( Assembler::is_simm16(disp) ) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + __ gsshx(R0, as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gsshx(R0, as_Register(base), AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ sh(R0, AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ move(T9, disp); + if (UseLEXT1) { + __ gsshx(R0, AT, T9, 0); + } else { + __ addu(AT, AT, T9); + __ sh(R0, AT, 0); + } + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sh(R0, as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gsshx(R0, as_Register(base), T9, 0); + } else { + __ addu(AT, as_Register(base), T9); + __ sh(R0, AT, 0); + } + } + } + %} + + enc_class load_I_enc (mRegI dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int dst = $dst$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if( Assembler::is_simm16(disp) ) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + __ gslwx(as_Register(dst), as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gslwx(as_Register(dst), as_Register(base), AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ lw(as_Register(dst), AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ move(T9, disp); + if (UseLEXT1) { + __ gslwx(as_Register(dst), AT, T9, 0); + } else { + __ addu(AT, AT, T9); + __ lw(as_Register(dst), AT, 0); + } + } + } else { + if( Assembler::is_simm16(disp) ) { + __ lw(as_Register(dst), as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gslwx(as_Register(dst), as_Register(base), T9, 0); + } else { + __ addu(AT, as_Register(base), T9); + __ lw(as_Register(dst), AT, 0); + } + } + } + %} + + enc_class store_I_reg_enc (memory mem, mRegI src) %{ + MacroAssembler _masm(&cbuf); + int src = $src$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if( Assembler::is_simm16(disp) ) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gsswx(as_Register(src), as_Register(base), AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ sw(as_Register(src), AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ move(T9, disp); + if (UseLEXT1) { + __ gsswx(as_Register(src), AT, T9, 0); + } else { + __ addu(AT, AT, T9); + __ sw(as_Register(src), AT, 0); + } + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sw(as_Register(src), as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gsswx(as_Register(src), as_Register(base), T9, 0); + } else { + __ addu(AT, as_Register(base), T9); + __ sw(as_Register(src), AT, 0); + } + } + } + %} + + enc_class store_I_immI_enc (memory mem, immI src) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + int value = $src$$constant; + + if( index != 0 ) { + if (UseLEXT1) { + if ( Assembler::is_simm(disp, 8) ) { + if ( scale == 0 ) { + if ( value == 0 ) { + __ gsswx(R0, as_Register(base), as_Register(index), disp); + } else { + __ move(T9, value); + __ gsswx(T9, as_Register(base), as_Register(index), disp); + } + } else { + __ dsll(AT, as_Register(index), scale); + if ( value == 0 ) { + __ gsswx(R0, as_Register(base), AT, disp); + } else { + __ move(T9, value); + __ gsswx(T9, as_Register(base), AT, disp); + } + } + } else if ( Assembler::is_simm16(disp) ) { + if ( scale == 0 ) { + __ daddu(AT, as_Register(base), as_Register(index)); + if ( value == 0 ) { + __ sw(R0, AT, disp); + } else { + __ move(T9, value); + __ sw(T9, AT, disp); + } + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + if ( value == 0 ) { + __ sw(R0, AT, disp); + } else { + __ move(T9, value); + __ sw(T9, AT, disp); + } + } + } else { + if ( scale == 0 ) { + __ move(T9, disp); + __ daddu(AT, as_Register(index), T9); + if ( value ==0 ) { + __ gsswx(R0, as_Register(base), AT, 0); + } else { + __ move(T9, value); + __ gsswx(T9, as_Register(base), AT, 0); + } + } else { + __ dsll(AT, as_Register(index), scale); + __ move(T9, disp); + __ daddu(AT, AT, T9); + if ( value == 0 ) { + __ gsswx(R0, as_Register(base), AT, 0); + } else { + __ move(T9, value); + __ gsswx(T9, as_Register(base), AT, 0); + } + } + } + } else { //not use loongson isa + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + if (value == 0) { + __ sw(R0, AT, disp); + } else { + __ move(T9, value); + __ sw(T9, AT, disp); + } + } else { + if (value == 0) { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ sw(R0, AT, 0); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ move(T9, value); + __ sw(T9, AT, 0); + } + } + } + } else { + if (UseLEXT1) { + if ( Assembler::is_simm16(disp) ) { + if ( value == 0 ) { + __ sw(R0, as_Register(base), disp); + } else { + __ move(AT, value); + __ sw(AT, as_Register(base), disp); + } + } else { + __ move(T9, disp); + if ( value == 0 ) { + __ gsswx(R0, as_Register(base), T9, 0); + } else { + __ move(AT, value); + __ gsswx(AT, as_Register(base), T9, 0); + } + } + } else { + if( Assembler::is_simm16(disp) ) { + if (value == 0) { + __ sw(R0, as_Register(base), disp); + } else { + __ move(AT, value); + __ sw(AT, as_Register(base), disp); + } + } else { + if (value == 0) { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ sw(R0, AT, 0); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ move(T9, value); + __ sw(T9, AT, 0); + } + } + } + } + %} + + enc_class load_N_enc (mRegN dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int dst = $dst$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + + if( index != 0 ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ lwu(as_Register(dst), AT, disp); + } else { + __ set64(T9, disp); + __ daddu(AT, AT, T9); + __ lwu(as_Register(dst), AT, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ lwu(as_Register(dst), as_Register(base), disp); + } else { + __ set64(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ lwu(as_Register(dst), AT, 0); + } + } + %} + + + enc_class load_P_enc (mRegP dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int dst = $dst$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + + if( index != 0 ) { + if (UseLEXT1) { + if ( Assembler::is_simm(disp, 8) ) { + if ( scale != 0 ) { + __ dsll(AT, as_Register(index), scale); + __ gsldx(as_Register(dst), as_Register(base), AT, disp); + } else { + __ gsldx(as_Register(dst), as_Register(base), as_Register(index), disp); + } + } else if ( Assembler::is_simm16(disp) ){ + if ( scale != 0 ) { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, AT, as_Register(base)); + } else { + __ daddu(AT, as_Register(index), as_Register(base)); + } + __ ld(as_Register(dst), AT, disp); + } else { + if ( scale != 0 ) { + __ dsll(AT, as_Register(index), scale); + __ move(T9, disp); + __ daddu(AT, AT, T9); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(index), T9); + } + __ gsldx(as_Register(dst), as_Register(base), AT, 0); + } + } else { //not use loongson isa + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ ld(as_Register(dst), AT, disp); + } else { + __ set64(T9, disp); + __ daddu(AT, AT, T9); + __ ld(as_Register(dst), AT, 0); + } + } + } else { + if (UseLEXT1) { + if ( Assembler::is_simm16(disp) ){ + __ ld(as_Register(dst), as_Register(base), disp); + } else { + __ set64(T9, disp); + __ gsldx(as_Register(dst), as_Register(base), T9, 0); + } + } else { //not use loongson isa + if( Assembler::is_simm16(disp) ) { + __ ld(as_Register(dst), as_Register(base), disp); + } else { + __ set64(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ ld(as_Register(dst), AT, 0); + } + } + } + %} + + // Load acquire. + // load_P_enc + sync + enc_class load_P_enc_ac (mRegP dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int dst = $dst$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + relocInfo::relocType disp_reloc = $mem->disp_reloc(); + assert(disp_reloc == relocInfo::none, "cannot have disp"); + + if( index != 0 ) { + if (UseLEXT1) { + if ( Assembler::is_simm(disp, 8) ) { + if ( scale != 0 ) { + __ dsll(AT, as_Register(index), scale); + __ gsldx(as_Register(dst), as_Register(base), AT, disp); + } else { + __ gsldx(as_Register(dst), as_Register(base), as_Register(index), disp); + } + } else if ( Assembler::is_simm16(disp) ){ + if ( scale != 0 ) { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, AT, as_Register(base)); + } else { + __ daddu(AT, as_Register(index), as_Register(base)); + } + __ ld(as_Register(dst), AT, disp); + } else { + if ( scale != 0 ) { + __ dsll(AT, as_Register(index), scale); + __ move(T9, disp); + __ daddu(AT, AT, T9); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(index), T9); + } + __ gsldx(as_Register(dst), as_Register(base), AT, 0); + } + } else { //not use loongson isa + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ ld(as_Register(dst), AT, disp); + } else { + __ set64(T9, disp); + __ daddu(AT, AT, T9); + __ ld(as_Register(dst), AT, 0); + } + } + } else { + if (UseLEXT1) { + if ( Assembler::is_simm16(disp) ){ + __ ld(as_Register(dst), as_Register(base), disp); + } else { + __ set64(T9, disp); + __ gsldx(as_Register(dst), as_Register(base), T9, 0); + } + } else { //not use loongson isa + if( Assembler::is_simm16(disp) ) { + __ ld(as_Register(dst), as_Register(base), disp); + } else { + __ set64(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ ld(as_Register(dst), AT, 0); + } + } + } + __ sync(); + %} + + enc_class store_P_reg_enc (memory mem, mRegP src) %{ + MacroAssembler _masm(&cbuf); + int src = $src$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (UseLEXT1){ + if ( Assembler::is_simm(disp, 8) ) { + if ( scale == 0 ) { + __ gssdx(as_Register(src), as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gssdx(as_Register(src), as_Register(base), AT, disp); + } + } else if ( Assembler::is_simm16(disp) ) { + if ( scale == 0 ) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + __ sd(as_Register(src), AT, disp); + } else { + if ( scale == 0 ) { + __ move(T9, disp); + __ daddu(AT, as_Register(index), T9); + } else { + __ dsll(AT, as_Register(index), scale); + __ move(T9, disp); + __ daddu(AT, AT, T9); + } + __ gssdx(as_Register(src), as_Register(base), AT, 0); + } + } else { //not use loongson isa + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ sd(as_Register(src), AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ sd(as_Register(src), AT, 0); + } + } + } else { + if (UseLEXT1) { + if ( Assembler::is_simm16(disp) ) { + __ sd(as_Register(src), as_Register(base), disp); + } else { + __ move(T9, disp); + __ gssdx(as_Register(src), as_Register(base), T9, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sd(as_Register(src), as_Register(base), disp); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ sd(as_Register(src), AT, 0); + } + } + } + %} + + enc_class store_N_reg_enc (memory mem, mRegN src) %{ + MacroAssembler _masm(&cbuf); + int src = $src$$reg; + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (UseLEXT1){ + if ( Assembler::is_simm(disp, 8) ) { + if ( scale == 0 ) { + __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gsswx(as_Register(src), as_Register(base), AT, disp); + } + } else if ( Assembler::is_simm16(disp) ) { + if ( scale == 0 ) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + __ sw(as_Register(src), AT, disp); + } else { + if ( scale == 0 ) { + __ move(T9, disp); + __ daddu(AT, as_Register(index), T9); + } else { + __ dsll(AT, as_Register(index), scale); + __ move(T9, disp); + __ daddu(AT, AT, T9); + } + __ gsswx(as_Register(src), as_Register(base), AT, 0); + } + } else { //not use loongson isa + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ sw(as_Register(src), AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ sw(as_Register(src), AT, 0); + } + } + } else { + if (UseLEXT1) { + if ( Assembler::is_simm16(disp) ) { + __ sw(as_Register(src), as_Register(base), disp); + } else { + __ move(T9, disp); + __ gsswx(as_Register(src), as_Register(base), T9, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sw(as_Register(src), as_Register(base), disp); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ sw(as_Register(src), AT, 0); + } + } + } + %} + + enc_class store_P_immP0_enc (memory mem) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (scale == 0) { + if ( Assembler::is_simm16(disp) ) { + if (UseLEXT1 && Assembler::is_simm(disp, 8)) { + __ gssdx(R0, as_Register(base), as_Register(index), disp); + } else { + __ daddu(AT, as_Register(base), as_Register(index)); + __ sd(R0, AT, disp); + } + } else { + __ daddu(AT, as_Register(base), as_Register(index)); + __ move(T9, disp); + if (UseLEXT1) { + __ gssdx(R0, AT, T9, 0); + } else { + __ daddu(AT, AT, T9); + __ sd(R0, AT, 0); + } + } + } else { + __ dsll(AT, as_Register(index), scale); + if( Assembler::is_simm16(disp) ) { + if (UseLEXT1 && Assembler::is_simm(disp, 8)) { + __ gssdx(R0, as_Register(base), AT, disp); + } else { + __ daddu(AT, as_Register(base), AT); + __ sd(R0, AT, disp); + } + } else { + __ daddu(AT, as_Register(base), AT); + __ move(T9, disp); + if (UseLEXT1) { + __ gssdx(R0, AT, T9, 0); + } else { + __ daddu(AT, AT, T9); + __ sd(R0, AT, 0); + } + } + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sd(R0, as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gssdx(R0, as_Register(base), T9, 0); + } else { + __ daddu(AT, as_Register(base), T9); + __ sd(R0, AT, 0); + } + } + } + %} + + enc_class storeImmN0_enc(memory mem, ImmN0 src) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if(index!=0){ + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + + if( Assembler::is_simm16(disp) ) { + __ sw(R0, AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ sw(R0, AT, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sw(R0, as_Register(base), disp); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ sw(R0, AT, 0); + } + } + %} + + enc_class load_L_enc (mRegL dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + Register dst_reg = as_Register($dst$$reg); + + if( index != 0 ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ ld(dst_reg, AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ ld(dst_reg, AT, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ ld(dst_reg, as_Register(base), disp); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ ld(dst_reg, AT, 0); + } + } + %} + + enc_class store_L_reg_enc (memory mem, mRegL src) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + Register src_reg = as_Register($src$$reg); + + if( index != 0 ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ sd(src_reg, AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ sd(src_reg, AT, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sd(src_reg, as_Register(base), disp); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ sd(src_reg, AT, 0); + } + } + %} + + enc_class store_L_immL_0_enc (memory mem, immL_0 src) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ sd(R0, AT, disp); + } else { + __ move(T9, disp); + __ addu(AT, AT, T9); + __ sd(R0, AT, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sd(R0, as_Register(base), disp); + } else { + __ move(T9, disp); + __ addu(AT, as_Register(base), T9); + __ sd(R0, AT, 0); + } + } + %} + + enc_class store_L_immL_enc (memory mem, immL src) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + long imm = $src$$constant; + + if( index != 0 ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + if( Assembler::is_simm16(disp) ) { + __ set64(T9, imm); + __ sd(T9, AT, disp); + } else { + __ move(T9, disp); + __ addu(AT, AT, T9); + __ set64(T9, imm); + __ sd(T9, AT, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ move(AT, as_Register(base)); + __ set64(T9, imm); + __ sd(T9, AT, disp); + } else { + __ move(T9, disp); + __ addu(AT, as_Register(base), T9); + __ set64(T9, imm); + __ sd(T9, AT, 0); + } + } + %} + + enc_class load_F_enc (regF dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + FloatRegister dst = $dst$$FloatRegister; + + if( index != 0 ) { + if( Assembler::is_simm16(disp) ) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + __ gslwxc1(dst, as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gslwxc1(dst, as_Register(base), AT, disp); + } + } else { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + __ lwc1(dst, AT, disp); + } + } else { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + __ move(T9, disp); + if (UseLEXT1) { + __ gslwxc1(dst, AT, T9, 0); + } else { + __ daddu(AT, AT, T9); + __ lwc1(dst, AT, 0); + } + } + } else { + if( Assembler::is_simm16(disp) ) { + __ lwc1(dst, as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gslwxc1(dst, as_Register(base), T9, 0); + } else { + __ daddu(AT, as_Register(base), T9); + __ lwc1(dst, AT, 0); + } + } + } + %} + + enc_class store_F_reg_enc (memory mem, regF src) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + FloatRegister src = $src$$FloatRegister; + + if( index != 0 ) { + if ( Assembler::is_simm16(disp) ) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + __ gsswxc1(src, as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gsswxc1(src, as_Register(base), AT, disp); + } + } else { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + __ swc1(src, AT, disp); + } + } else { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + __ move(T9, disp); + if (UseLEXT1) { + __ gsswxc1(src, AT, T9, 0); + } else { + __ daddu(AT, AT, T9); + __ swc1(src, AT, 0); + } + } + } else { + if( Assembler::is_simm16(disp) ) { + __ swc1(src, as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gsswxc1(src, as_Register(base), T9, 0); + } else { + __ daddu(AT, as_Register(base), T9); + __ swc1(src, AT, 0); + } + } + } + %} + + enc_class load_D_enc (regD dst, memory mem) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + + if ( index != 0 ) { + if ( Assembler::is_simm16(disp) ) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + __ gsldxc1(dst_reg, as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gsldxc1(dst_reg, as_Register(base), AT, disp); + } + } else { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + __ ldc1(dst_reg, AT, disp); + } + } else { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + __ move(T9, disp); + if (UseLEXT1) { + __ gsldxc1(dst_reg, AT, T9, 0); + } else { + __ addu(AT, AT, T9); + __ ldc1(dst_reg, AT, 0); + } + } + } else { + if( Assembler::is_simm16(disp) ) { + __ ldc1(dst_reg, as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gsldxc1(dst_reg, as_Register(base), T9, 0); + } else { + __ addu(AT, as_Register(base), T9); + __ ldc1(dst_reg, AT, 0); + } + } + } + %} + + enc_class store_D_reg_enc (memory mem, regD src) %{ + MacroAssembler _masm(&cbuf); + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + FloatRegister src_reg = as_FloatRegister($src$$reg); + + if ( index != 0 ) { + if ( Assembler::is_simm16(disp) ) { + if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + __ gssdxc1(src_reg, as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gssdxc1(src_reg, as_Register(base), AT, disp); + } + } else { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + __ sdc1(src_reg, AT, disp); + } + } else { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + __ move(T9, disp); + if (UseLEXT1) { + __ gssdxc1(src_reg, AT, T9, 0); + } else { + __ addu(AT, AT, T9); + __ sdc1(src_reg, AT, 0); + } + } + } else { + if ( Assembler::is_simm16(disp) ) { + __ sdc1(src_reg, as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gssdxc1(src_reg, as_Register(base), T9, 0); + } else { + __ addu(AT, as_Register(base), T9); + __ sdc1(src_reg, AT, 0); + } + } + } + %} + + enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf + MacroAssembler _masm(&cbuf); + // This is the instruction starting address for relocation info. + __ block_comment("Java_To_Runtime"); + cbuf.set_insts_mark(); + __ relocate(relocInfo::runtime_call_type); + __ patchable_call((address)$meth$$method); + %} + + enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine + // who we intended to call. + MacroAssembler _masm(&cbuf); + address addr = (address)$meth$$method; + address call; + __ block_comment("Java_Static_Call"); + + if ( !_method ) { + // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. + call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); + } else if(_optimized_virtual) { + call = __ trampoline_call(AddressLiteral(addr, relocInfo::opt_virtual_call_type), &cbuf); + } else { + call = __ trampoline_call(AddressLiteral(addr, relocInfo::static_call_type), &cbuf); + } + + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + + if( _method ) { // Emit stub for static call + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + %} + + + // + // [Ref: LIR_Assembler::ic_call() ] + // + enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL + MacroAssembler _masm(&cbuf); + __ block_comment("Java_Dynamic_Call"); + __ ic_call((address)$meth$$method); + %} + + + enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ + Register result = $result$$Register; + Register sub = $sub$$Register; + Register super = $super$$Register; + Register length = $tmp$$Register; + Register tmp = T9; + Label miss; + + // result may be the same as sub + // 47c B40: # B21 B41 <- B20 Freq: 0.155379 + // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 + // 4bc mov S2, NULL #@loadConP + // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 + // + MacroAssembler _masm(&cbuf); + Label done; + __ check_klass_subtype_slow_path(sub, super, length, tmp, + NULL, &miss, + /*set_cond_codes:*/ true); + // Refer to X86_64's RDI + __ move(result, 0); + __ b(done); + __ delayed()->nop(); + + __ bind(miss); + __ move(result, 1); + __ bind(done); + %} + +%} + + +//---------MIPS FRAME-------------------------------------------------------------- +// Definition of frame structure and management information. +// +// S T A C K L A Y O U T Allocators stack-slot number +// | (to get allocators register number +// G Owned by | | v add SharedInfo::stack0) +// r CALLER | | +// o | +--------+ pad to even-align allocators stack-slot +// w V | pad0 | numbers; owned by CALLER +// t -----------+--------+----> Matcher::_in_arg_limit, unaligned +// h ^ | in | 5 +// | | args | 4 Holes in incoming args owned by SELF +// | | old | | 3 +// | | SP-+--------+----> Matcher::_old_SP, even aligned +// v | | ret | 3 return address +// Owned by +--------+ +// Self | pad2 | 2 pad to align old SP +// | +--------+ 1 +// | | locks | 0 +// | +--------+----> SharedInfo::stack0, even aligned +// | | pad1 | 11 pad to align new SP +// | +--------+ +// | | | 10 +// | | spills | 9 spills +// V | | 8 (pad0 slot for callee) +// -----------+--------+----> Matcher::_out_arg_limit, unaligned +// ^ | out | 7 +// | | args | 6 Holes in outgoing args owned by CALLEE +// Owned by new | | +// Callee SP-+--------+----> Matcher::_new_SP, even aligned +// | | +// +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is +// known from SELF's arguments and the Java calling convention. +// Region 6-7 is determined per call site. +// Note 2: If the calling convention leaves holes in the incoming argument +// area, those holes are owned by SELF. Holes in the outgoing area +// are owned by the CALLEE. Holes should not be nessecary in the +// incoming area, as the Java calling convention is completely under +// the control of the AD file. Doubles can be sorted and packed to +// avoid holes. Holes in the outgoing arguments may be nessecary for +// varargs C calling conventions. +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is +// even aligned with pad0 as needed. +// Region 6 is even aligned. Region 6-7 is NOT even aligned; +// region 6-11 is even aligned; it may be padded out more so that +// the region from SP to FP meets the minimum stack alignment. +// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack +// alignment. Region 11, pad1, may be dynamically extended so that +// SP meets the minimum alignment. + + +frame %{ + + stack_direction(TOWARDS_LOW); + + // These two registers define part of the calling convention + // between compiled code and the interpreter. + // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention + // for more information. + + inline_cache_reg(T1); // Inline Cache Register + interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter + + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset32); + + // Number of stack slots consumed by locking an object + // generate Compile::sync_stack_slots + sync_stack_slots(2); + + frame_pointer(SP); + + // Interpreter stores its frame pointer in a register which is + // stored to the stack by I2CAdaptors. + // I2CAdaptors convert from interpreted java to compiled java. + + interpreter_frame_pointer(FP); + + // generate Matcher::stack_alignment + stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); + + // Number of stack slots between incoming argument block and the start of + // a new frame. The PROLOG must add this many slots to the stack. The + // EPILOG must remove this many slots. + in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp + + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + varargs_C_out_slots_killed(0); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + // Ret Addr is on stack in slot 0 if no locks or verification or alignment. + // Otherwise, it is above the locks and verification slot and alignment word + //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); + return_addr(REG RA); + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + + + // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) + // StartNode::calling_convention call this. + calling_convention %{ + SharedRuntime::java_calling_convention(sig_bt, regs, length, false); + %} + + + + + // Body of function which returns an integer array locating + // arguments either in registers or in stack slots. Passed an array + // of ideal registers called "sig" and a "length" count. Stack-slot + // offsets are based on outgoing arguments, i.e. a CALLER setting up + // arguments for a CALLEE. Incoming stack arguments are + // automatically biased by the preserve_stack_slots field above. + + + // SEE CallRuntimeNode::calling_convention for more information. + c_calling_convention %{ + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); + %} + + + // Location of C & interpreter return values + // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. + // SEE Matcher::match. + c_return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ + static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; + return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); + %} + + // Location of return values + // register(s) contain(s) return value for Op_StartC2I and Op_Start. + // SEE Matcher::match. + + return_value %{ + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ + static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; + return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); + %} + +%} + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(0); // Required cost attribute + +//----------Instruction Attributes--------------------------------------------- +ins_attrib ins_cost(100); // Required cost attribute +ins_attrib ins_size(32); // Required size attribute (in bits) +ins_attrib ins_pc_relative(0); // Required PC Relative flag +ins_attrib ins_short_branch(0); // Required flag: is this instruction a + // non-matching short branch variant of some + // long branch? +ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) + // specifies the alignment that some part of the instruction (not + // necessarily the start) requires. If > 1, a compute_padding() + // function must be provided for the instruction + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +// Vectors +operand vecD() %{ + constraint(ALLOC_IN_RC(dbl_reg)); + match(VecD); + + format %{ %} + interface(REG_INTER); +%} + +// Flags register, used as output of compare instructions +operand FlagsReg() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegFlags); + + format %{ "T0" %} + interface(REG_INTER); +%} + +//----------Simple Operands---------------------------------------------------- +// TODO: Should we need to define some more special immediate number ? +// Immediate Operands +// Integer Immediate +operand immI() %{ + match(ConI); + // TODO: should not match immI8 here LEE + match(immI8); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +operand immI8() %{ + predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI16() %{ + predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); + match(ConI); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_M65536() %{ + predicate(n->get_int() == -65536); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for decrement +operand immI_M1() %{ + predicate(n->get_int() == -1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for test vs zero +operand immI_0() %{ + predicate(n->get_int() == 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for increment +operand immI_1() %{ + predicate(n->get_int() == 1); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constants for increment +operand immI_16() %{ + predicate(n->get_int() == 16); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_24() %{ + predicate(n->get_int() == 24); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +// Constant for long shifts +operand immI_32() %{ + predicate(n->get_int() == 32); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Constant for byte-wide masking +operand immI_255() %{ + predicate(n->get_int() == 255); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_65535() %{ + predicate(n->get_int() == 65535); + match(ConI); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_MaxI() %{ + predicate(n->get_int() == 2147483647); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_M32767_32768() %{ + predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768)); + match(ConI); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// Valid scale values for addressing modes +operand immI_0_3() %{ + predicate(0 <= n->get_int() && (n->get_int() <= 3)); + match(ConI); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_0_31() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 31); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_0_32767() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 32767); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_0_65535() %{ + predicate(n->get_int() >= 0 && n->get_int() <= 65535); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immI_32_63() %{ + predicate(n->get_int() >= 32 && n->get_int() <= 63); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Operand for non-negtive integer mask +operand immI_nonneg_mask() %{ + predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate +operand immL() %{ + match(ConL); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate 8-bit +operand immL8() %{ + predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L); + match(ConL); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +operand immL16() %{ + predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767)); + match(ConL); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate 32-bit signed +operand immL32() %{ + predicate(n->get_long() == (int)(n->get_long())); + match(ConL); + + op_cost(15); + format %{ %} + interface(CONST_INTER); +%} + +// bit 3..6 zero +operand immL_M121() %{ + predicate(n->get_long() == -121L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 0..2 zero +operand immL_M8() %{ + predicate(n->get_long() == -8L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 1..2 zero +operand immL_M7() %{ + predicate(n->get_long() == -7L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 2 zero +operand immL_M5() %{ + predicate(n->get_long() == -5L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// bit 0..1 zero +operand immL_M4() %{ + predicate(n->get_long() == -4L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_M1() %{ + predicate(n->get_long() == -1L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate zero +operand immL_0() %{ + predicate(n->get_long() == 0L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_7() %{ + predicate(n->get_long() == 7L); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: low 32-bit mask +operand immL_MaxUI() %{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(20); + + format %{ %} + interface(CONST_INTER); +%} + +operand immL_M32767_32768() %{ + predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768)); + match(ConL); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immL_0_65535() %{ + predicate(n->get_long() >= 0 && n->get_long() <= 65535); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Operand for non-negtive long mask +operand immL_nonneg_mask() %{ + predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); + match(ConL); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immP() %{ + match(ConP); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immP_0() %{ + predicate(n->get_ptr() == 0); + match(ConP); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 64-bit +operand immP_no_oop_cheap() %{ + predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3)); + match(ConP); + + op_cost(5); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Pointer for polling page +operand immP_poll() %{ + predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); + match(ConP); + op_cost(5); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immN() %{ + match(ConN); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() %{ + match(ConNKlass); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immN_0() %{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Single-precision floating-point immediate +operand immF() %{ + match(ConF); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Single-precision floating-point zero +operand immF_0() %{ + predicate(jint_cast(n->getf()) == 0); + match(ConF); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Double-precision floating-point immediate +operand immD() %{ + match(ConD); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Double-precision floating-point zero +operand immD_0() %{ + predicate(jlong_cast(n->getd()) == 0); + match(ConD); + + op_cost(5); + format %{ %} + interface(CONST_INTER); +%} + +// Register Operands +// Integer Register +operand mRegI() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegI); + + format %{ %} + interface(REG_INTER); +%} + +operand no_Ax_mRegI() %{ + constraint(ALLOC_IN_RC(no_Ax_int_reg)); + match(RegI); + match(mRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand mS0RegI() %{ + constraint(ALLOC_IN_RC(s0_reg)); + match(RegI); + match(mRegI); + + format %{ "S0" %} + interface(REG_INTER); +%} + +operand mS1RegI() %{ + constraint(ALLOC_IN_RC(s1_reg)); + match(RegI); + match(mRegI); + + format %{ "S1" %} + interface(REG_INTER); +%} + +operand mS2RegI() %{ + constraint(ALLOC_IN_RC(s2_reg)); + match(RegI); + match(mRegI); + + format %{ "S2" %} + interface(REG_INTER); +%} + +operand mS3RegI() %{ + constraint(ALLOC_IN_RC(s3_reg)); + match(RegI); + match(mRegI); + + format %{ "S3" %} + interface(REG_INTER); +%} + +operand mS4RegI() %{ + constraint(ALLOC_IN_RC(s4_reg)); + match(RegI); + match(mRegI); + + format %{ "S4" %} + interface(REG_INTER); +%} + +operand mS5RegI() %{ + constraint(ALLOC_IN_RC(s5_reg)); + match(RegI); + match(mRegI); + + format %{ "S5" %} + interface(REG_INTER); +%} + +operand mS6RegI() %{ + constraint(ALLOC_IN_RC(s6_reg)); + match(RegI); + match(mRegI); + + format %{ "S6" %} + interface(REG_INTER); +%} + +operand mS7RegI() %{ + constraint(ALLOC_IN_RC(s7_reg)); + match(RegI); + match(mRegI); + + format %{ "S7" %} + interface(REG_INTER); +%} + + +operand mT0RegI() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegI); + match(mRegI); + + format %{ "T0" %} + interface(REG_INTER); +%} + +operand mT1RegI() %{ + constraint(ALLOC_IN_RC(t1_reg)); + match(RegI); + match(mRegI); + + format %{ "T1" %} + interface(REG_INTER); +%} + +operand mT2RegI() %{ + constraint(ALLOC_IN_RC(t2_reg)); + match(RegI); + match(mRegI); + + format %{ "T2" %} + interface(REG_INTER); +%} + +operand mT3RegI() %{ + constraint(ALLOC_IN_RC(t3_reg)); + match(RegI); + match(mRegI); + + format %{ "T3" %} + interface(REG_INTER); +%} + +operand mT8RegI() %{ + constraint(ALLOC_IN_RC(t8_reg)); + match(RegI); + match(mRegI); + + format %{ "T8" %} + interface(REG_INTER); +%} + +operand mT9RegI() %{ + constraint(ALLOC_IN_RC(t9_reg)); + match(RegI); + match(mRegI); + + format %{ "T9" %} + interface(REG_INTER); +%} + +operand mA0RegI() %{ + constraint(ALLOC_IN_RC(a0_reg)); + match(RegI); + match(mRegI); + + format %{ "A0" %} + interface(REG_INTER); +%} + +operand mA1RegI() %{ + constraint(ALLOC_IN_RC(a1_reg)); + match(RegI); + match(mRegI); + + format %{ "A1" %} + interface(REG_INTER); +%} + +operand mA2RegI() %{ + constraint(ALLOC_IN_RC(a2_reg)); + match(RegI); + match(mRegI); + + format %{ "A2" %} + interface(REG_INTER); +%} + +operand mA3RegI() %{ + constraint(ALLOC_IN_RC(a3_reg)); + match(RegI); + match(mRegI); + + format %{ "A3" %} + interface(REG_INTER); +%} + +operand mA4RegI() %{ + constraint(ALLOC_IN_RC(a4_reg)); + match(RegI); + match(mRegI); + + format %{ "A4" %} + interface(REG_INTER); +%} + +operand mA5RegI() %{ + constraint(ALLOC_IN_RC(a5_reg)); + match(RegI); + match(mRegI); + + format %{ "A5" %} + interface(REG_INTER); +%} + +operand mA6RegI() %{ + constraint(ALLOC_IN_RC(a6_reg)); + match(RegI); + match(mRegI); + + format %{ "A6" %} + interface(REG_INTER); +%} + +operand mA7RegI() %{ + constraint(ALLOC_IN_RC(a7_reg)); + match(RegI); + match(mRegI); + + format %{ "A7" %} + interface(REG_INTER); +%} + +operand mV0RegI() %{ + constraint(ALLOC_IN_RC(v0_reg)); + match(RegI); + match(mRegI); + + format %{ "V0" %} + interface(REG_INTER); +%} + +operand mV1RegI() %{ + constraint(ALLOC_IN_RC(v1_reg)); + match(RegI); + match(mRegI); + + format %{ "V1" %} + interface(REG_INTER); +%} + +operand mRegN() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t0_RegN() %{ + constraint(ALLOC_IN_RC(t0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t1_RegN() %{ + constraint(ALLOC_IN_RC(t1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t2_RegN() %{ + constraint(ALLOC_IN_RC(t2_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t3_RegN() %{ + constraint(ALLOC_IN_RC(t3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t8_RegN() %{ + constraint(ALLOC_IN_RC(t8_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand t9_RegN() %{ + constraint(ALLOC_IN_RC(t9_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a0_RegN() %{ + constraint(ALLOC_IN_RC(a0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a1_RegN() %{ + constraint(ALLOC_IN_RC(a1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a2_RegN() %{ + constraint(ALLOC_IN_RC(a2_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a3_RegN() %{ + constraint(ALLOC_IN_RC(a3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a4_RegN() %{ + constraint(ALLOC_IN_RC(a4_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a5_RegN() %{ + constraint(ALLOC_IN_RC(a5_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a6_RegN() %{ + constraint(ALLOC_IN_RC(a6_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand a7_RegN() %{ + constraint(ALLOC_IN_RC(a7_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s0_RegN() %{ + constraint(ALLOC_IN_RC(s0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s1_RegN() %{ + constraint(ALLOC_IN_RC(s1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s2_RegN() %{ + constraint(ALLOC_IN_RC(s2_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s3_RegN() %{ + constraint(ALLOC_IN_RC(s3_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s4_RegN() %{ + constraint(ALLOC_IN_RC(s4_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s5_RegN() %{ + constraint(ALLOC_IN_RC(s5_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s6_RegN() %{ + constraint(ALLOC_IN_RC(s6_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand s7_RegN() %{ + constraint(ALLOC_IN_RC(s7_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand v0_RegN() %{ + constraint(ALLOC_IN_RC(v0_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +operand v1_RegN() %{ + constraint(ALLOC_IN_RC(v1_reg)); + match(RegN); + match(mRegN); + + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register +operand mRegP() %{ + constraint(ALLOC_IN_RC(p_reg)); + match(RegP); + match(a0_RegP); + + format %{ %} + interface(REG_INTER); +%} + +operand no_T8_mRegP() %{ + constraint(ALLOC_IN_RC(no_T8_p_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s0_RegP() +%{ + constraint(ALLOC_IN_RC(s0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s1_RegP() +%{ + constraint(ALLOC_IN_RC(s1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s2_RegP() +%{ + constraint(ALLOC_IN_RC(s2_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s3_RegP() +%{ + constraint(ALLOC_IN_RC(s3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s4_RegP() +%{ + constraint(ALLOC_IN_RC(s4_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s5_RegP() +%{ + constraint(ALLOC_IN_RC(s5_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s6_RegP() +%{ + constraint(ALLOC_IN_RC(s6_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand s7_RegP() +%{ + constraint(ALLOC_IN_RC(s7_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t0_RegP() +%{ + constraint(ALLOC_IN_RC(t0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t1_RegP() +%{ + constraint(ALLOC_IN_RC(t1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t2_RegP() +%{ + constraint(ALLOC_IN_RC(t2_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t3_RegP() +%{ + constraint(ALLOC_IN_RC(t3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t8_RegP() +%{ + constraint(ALLOC_IN_RC(t8_long_reg)); + match(RegP); + match(mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand t9_RegP() +%{ + constraint(ALLOC_IN_RC(t9_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a0_RegP() +%{ + constraint(ALLOC_IN_RC(a0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a1_RegP() +%{ + constraint(ALLOC_IN_RC(a1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a2_RegP() +%{ + constraint(ALLOC_IN_RC(a2_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a3_RegP() +%{ + constraint(ALLOC_IN_RC(a3_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a4_RegP() +%{ + constraint(ALLOC_IN_RC(a4_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + + +operand a5_RegP() +%{ + constraint(ALLOC_IN_RC(a5_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a6_RegP() +%{ + constraint(ALLOC_IN_RC(a6_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand a7_RegP() +%{ + constraint(ALLOC_IN_RC(a7_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand v0_RegP() +%{ + constraint(ALLOC_IN_RC(v0_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand v1_RegP() +%{ + constraint(ALLOC_IN_RC(v1_long_reg)); + match(RegP); + match(mRegP); + match(no_T8_mRegP); + + format %{ %} + interface(REG_INTER); +%} + +/* +operand mSPRegP(mRegP reg) %{ + constraint(ALLOC_IN_RC(sp_reg)); + match(reg); + + format %{ "SP" %} + interface(REG_INTER); +%} + +operand mFPRegP(mRegP reg) %{ + constraint(ALLOC_IN_RC(fp_reg)); + match(reg); + + format %{ "FP" %} + interface(REG_INTER); +%} +*/ + +operand mRegL() %{ + constraint(ALLOC_IN_RC(long_reg)); + match(RegL); + + format %{ %} + interface(REG_INTER); +%} + +operand v0RegL() %{ + constraint(ALLOC_IN_RC(v0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand v1RegL() %{ + constraint(ALLOC_IN_RC(v1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a0RegL() %{ + constraint(ALLOC_IN_RC(a0_long_reg)); + match(RegL); + match(mRegL); + + format %{ "A0" %} + interface(REG_INTER); +%} + +operand a1RegL() %{ + constraint(ALLOC_IN_RC(a1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a2RegL() %{ + constraint(ALLOC_IN_RC(a2_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a3RegL() %{ + constraint(ALLOC_IN_RC(a3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t0RegL() %{ + constraint(ALLOC_IN_RC(t0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t1RegL() %{ + constraint(ALLOC_IN_RC(t1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t2RegL() %{ + constraint(ALLOC_IN_RC(t2_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t3RegL() %{ + constraint(ALLOC_IN_RC(t3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand t8RegL() %{ + constraint(ALLOC_IN_RC(t8_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a4RegL() %{ + constraint(ALLOC_IN_RC(a4_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a5RegL() %{ + constraint(ALLOC_IN_RC(a5_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a6RegL() %{ + constraint(ALLOC_IN_RC(a6_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand a7RegL() %{ + constraint(ALLOC_IN_RC(a7_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s0RegL() %{ + constraint(ALLOC_IN_RC(s0_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s1RegL() %{ + constraint(ALLOC_IN_RC(s1_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s2RegL() %{ + constraint(ALLOC_IN_RC(s2_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s3RegL() %{ + constraint(ALLOC_IN_RC(s3_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s4RegL() %{ + constraint(ALLOC_IN_RC(s4_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand s7RegL() %{ + constraint(ALLOC_IN_RC(s7_long_reg)); + match(RegL); + match(mRegL); + + format %{ %} + interface(REG_INTER); +%} + +// Floating register operands +operand regF() %{ + constraint(ALLOC_IN_RC(flt_reg)); + match(RegF); + + format %{ %} + interface(REG_INTER); +%} + +//Double Precision Floating register operands +operand regD() %{ + constraint(ALLOC_IN_RC(dbl_reg)); + match(RegD); + + format %{ %} + interface(REG_INTER); +%} + +//----------Memory Operands---------------------------------------------------- +// Indirect Memory Operand +operand indirect(mRegP reg) %{ + constraint(ALLOC_IN_RC(p_reg)); + match(reg); + + format %{ "[$reg] @ indirect" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Plus Short Offset Operand +operand indOffset8(mRegP reg, immL8 off) +%{ + constraint(ALLOC_IN_RC(p_reg)); + match(AddP reg off); + + op_cost(10); + format %{ "[$reg + $off (8-bit)] @ indOffset8" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Times Scale Plus Index Register +operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale) +%{ + predicate(UseLEXT1); + constraint(ALLOC_IN_RC(p_reg)); + match(AddP reg (LShiftL lreg scale)); + + op_cost(10); + format %{"[$reg + $lreg << $scale] @ indIndexScale" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale($scale); + disp(0x0); + %} +%} + + +// [base + index + offset] +operand baseIndexOffset8(mRegP base, mRegL index, immL8 off) +%{ + predicate(UseLEXT1); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(5); + match(AddP (AddP base index) off); + + format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale(0x0); + disp($off); + %} +%} + +// [base + index + offset] +operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off) +%{ + predicate(UseLEXT1); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(5); + match(AddP (AddP base (ConvI2L index)) off); + + format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale(0x0); + disp($off); + %} +%} + +// [base + index<in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); + op_cost(10); + match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off); + + format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %} + interface(MEMORY_INTER) %{ + base($base); + index($index); + scale($scale); + disp($off); + %} +%} + +//FIXME: I think it's better to limit the immI to be 16-bit at most! +// Indirect Memory Plus Long Offset Operand +operand indOffset32(mRegP reg, immL32 off) %{ + constraint(ALLOC_IN_RC(p_reg)); + op_cost(20); + match(AddP reg off); + + format %{ "[$reg + $off (32-bit)] @ indOffset32" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); /* NO_INDEX */ + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Plus Index Register +operand indIndex(mRegP addr, mRegL index) %{ + constraint(ALLOC_IN_RC(p_reg)); + match(AddP addr index); + + op_cost(20); + format %{"[$addr + $index] @ indIndex" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale(0x0); + disp(0x0); + %} +%} + +operand indirectNarrowKlass(mRegN reg) +%{ + predicate(Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(DecodeNKlass reg); + + format %{ "[$reg] @ indirectNarrowKlass" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +operand indOffset8NarrowKlass(mRegN reg, immL8 off) +%{ + predicate(Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(AddP (DecodeNKlass reg) off); + + format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($off); + %} +%} + +operand indOffset32NarrowKlass(mRegN reg, immL32 off) +%{ + predicate(Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(AddP (DecodeNKlass reg) off); + + format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($off); + %} +%} + +operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off) +%{ + predicate(UseLEXT1); + predicate(Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + match(AddP (AddP (DecodeNKlass reg) lreg) off); + + op_cost(10); + format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp($off); + %} +%} + +operand indIndexNarrowKlass(mRegN reg, mRegL lreg) +%{ + predicate(Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + match(AddP (DecodeNKlass reg) lreg); + + op_cost(10); + format %{"[$reg + $lreg] @ indIndexNarrowKlass" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Operand +operand indirectNarrow(mRegN reg) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(DecodeN reg); + + format %{ "[$reg] @ indirectNarrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Plus Short Offset Operand +operand indOffset8Narrow(mRegN reg, immL8 off) +%{ + predicate(Universe::narrow_oop_shift() == 0); + constraint(ALLOC_IN_RC(p_reg)); + op_cost(10); + match(AddP (DecodeN reg) off); + + format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($off); + %} +%} + +// Indirect Memory Plus Index Register Plus Offset Operand +operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off) +%{ + predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1); + constraint(ALLOC_IN_RC(p_reg)); + match(AddP (AddP (DecodeN reg) lreg) off); + + op_cost(10); + format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %} + interface(MEMORY_INTER) %{ + base($reg); + index($lreg); + scale(0x0); + disp($off); + %} +%} + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below, such as cmpOpU. + +// Comparision Code +operand cmpOp() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x01); + not_equal(0x02); + greater(0x03); + greater_equal(0x04); + less(0x05); + less_equal(0x06); + overflow(0x7); + no_overflow(0x8); + %} +%} + + +// Comparision Code +// Comparison Code, unsigned compare. Used by FP also, with +// C2 (unordered) turned into GT or LT already. The other bits +// C0 and C3 are turned into Carry & Zero flags. +operand cmpOpU() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x01); + not_equal(0x02); + greater(0x03); + greater_equal(0x04); + less(0x05); + less_equal(0x06); + overflow(0x7); + no_overflow(0x8); + %} +%} + + +//----------Special Memory Operands-------------------------------------------- +// Stack Slot Operand - This operand is used for loading and storing temporary +// values on the stack where a match requires a value to +// flow through memory. +operand stackSlotP(sRegP reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotI(sRegI reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotF(sRegF reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotD(sRegD reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + +operand stackSlotL(sRegL reg) %{ + constraint(ALLOC_IN_RC(stack_slots)); + // No match rule because this operand is only generated in matching + op_cost(50); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base(0x1d); // SP + index(0x0); // No Index + scale(0x0); // No Scale + disp($reg); // Stack Offset + %} +%} + + +//------------------------OPERAND CLASSES-------------------------------------- +//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset ); +opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow); + + +//----------PIPELINE----------------------------------------------------------- +// Rules which define the behavior of the target architectures pipeline. + +pipeline %{ + + //----------ATTRIBUTES--------------------------------------------------------- + attributes %{ + fixed_size_instructions; // Fixed size instructions + branch_has_delay_slot; // branch have delay slot in gs2 + max_instructions_per_bundle = 1; // 1 instruction per bundle + max_bundles_per_cycle = 4; // Up to 4 bundles per cycle + bundle_unit_size=4; + instruction_unit_size = 4; // An instruction is 4 bytes long + instruction_fetch_unit_size = 16; // The processor fetches one line + instruction_fetch_units = 1; // of 16 bytes + + // List of nop instructions + nops( MachNop ); + %} + + //----------RESOURCES---------------------------------------------------------- + // Resources are the functional units available to the machine + + resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); + + //----------PIPELINE DESCRIPTION----------------------------------------------- + // Pipeline Description specifies the stages in the machine's pipeline + + // IF: fetch + // ID: decode + // RD: read + // CA: caculate + // WB: write back + // CM: commit + + pipe_desc(IF, ID, RD, CA, WB, CM); + + + //----------PIPELINE CLASSES--------------------------------------------------- + // Pipeline Classes describe the stages in which input and output are + // referenced by the hardware pipeline. + + //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 + pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ + single_instruction; + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+1; + DECODE : ID; + ALU : CA; + %} + + //No.19 Integer mult operation : dst <-- reg1 mult reg2 + pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+5; + DECODE : ID; + ALU2 : CA; + %} + + pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.19 Integer div operation : dst <-- reg1 div reg2 + pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.19 Integer mod operation : dst <-- reg1 mod reg2 + pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ + instruction_count(2); + src1 : RD(read); + src2 : RD(read); + dst : WB(write)+10; + DECODE : ID; + ALU2 : CA; + %} + + //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 + pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ + instruction_count(2); + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 + pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ + instruction_count(2); + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //no.16 load Long from memory : + pipe_class ialu_loadL(mRegL dst, memory mem) %{ + instruction_count(2); + mem : RD(read); + dst : WB(write)+5; + DECODE : ID; + MEM : RD; + %} + + //No.17 Store Long to Memory : + pipe_class ialu_storeL(mRegL src, memory mem) %{ + instruction_count(2); + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 + pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ + single_instruction; + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.3 Integer move operation : dst <-- reg + pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + ALU : CA; + %} + + //No.4 No instructions : do nothing + pipe_class empty( ) %{ + instruction_count(0); + %} + + //No.5 UnConditional branch : + pipe_class pipe_jump( label labl ) %{ + multiple_bundles; + DECODE : ID; + BR : RD; + %} + + //No.6 ALU Conditional branch : + pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + DECODE : ID; + BR : RD; + %} + + //no.7 load integer from memory : + pipe_class ialu_loadI(mRegI dst, memory mem) %{ + mem : RD(read); + dst : WB(write)+3; + DECODE : ID; + MEM : RD; + %} + + //No.8 Store Integer to Memory : + pipe_class ialu_storeI(mRegI src, memory mem) %{ + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + + //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 + pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU : CA; + %} + + //No.22 Floating div operation : dst <-- reg1 div reg2 + pipe_class fpu_div(regF dst, regF src1, regF src2) %{ + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU2 : CA; + %} + + pipe_class fcvt_I2D(regD dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU1 : CA; + %} + + pipe_class fcvt_D2I(mRegI dst, regD src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU1 : CA; + %} + + pipe_class pipe_mfc1(mRegI dst, regD src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + MEM : RD; + %} + + pipe_class pipe_mtc1(regD dst, mRegI src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + MEM : RD(5); + %} + + //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 + pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + dst : WB(write); + DECODE : ID; + FPU2 : CA; + %} + + //No.11 Load Floating from Memory : + pipe_class fpu_loadF(regF dst, memory mem) %{ + instruction_count(1); + mem : RD(read); + dst : WB(write)+3; + DECODE : ID; + MEM : RD; + %} + + //No.12 Store Floating to Memory : + pipe_class fpu_storeF(regF src, memory mem) %{ + instruction_count(1); + mem : RD(read); + src : RD(read); + DECODE : ID; + MEM : RD; + %} + + //No.13 FPU Conditional branch : + pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ + multiple_bundles; + src1 : RD(read); + src2 : RD(read); + DECODE : ID; + BR : RD; + %} + +//No.14 Floating FPU reg operation : dst <-- op reg + pipe_class fpu1_regF(regF dst, regF src) %{ + src : RD(read); + dst : WB(write); + DECODE : ID; + FPU : CA; + %} + + pipe_class long_memory_op() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(30); + %} + + pipe_class simple_call() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(200); + BR : RD; + %} + + pipe_class call() %{ + instruction_count(10); multiple_bundles; force_serialization; + fixed_latency(200); + %} + + //FIXME: + //No.9 Piple slow : for multi-instructions + pipe_class pipe_slow( ) %{ + instruction_count(20); + force_serialization; + multiple_bundles; + fixed_latency(50); + %} + +%} + + + +//----------INSTRUCTIONS------------------------------------------------------- +// +// match -- States which machine-independent subtree may be replaced +// by this instruction. +// ins_cost -- The estimated cost of this instruction is used by instruction +// selection to identify a minimum cost tree of machine +// instructions that matches a tree of machine-independent +// instructions. +// format -- A string providing the disassembly for this instruction. +// The value of an instruction's operand may be inserted +// by referring to it with a '$' prefix. +// opcode -- Three instruction opcodes may be provided. These are referred +// to within an encode class as $primary, $secondary, and $tertiary +// respectively. The primary opcode is commonly used to +// indicate the type of machine instruction, while secondary +// and tertiary are often used for prefix options or addressing +// modes. +// ins_encode -- A list of encode classes with parameters. The encode class +// name must have been defined in an 'enc_class' specification +// in the encode section of the architecture description. + + +// Load Integer +instruct loadI(mRegI dst, memory mem) %{ + match(Set dst (LoadI mem)); + + ins_cost(125); + format %{ "lw $dst, $mem #@loadI" %} + ins_encode (load_I_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +instruct loadI_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + + ins_cost(125); + format %{ "lw $dst, $mem #@loadI_convI2L" %} + ins_encode (load_I_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +// Load Integer (32 bit signed) to Byte (8 bit signed) +instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "lb $dst, $mem\t# int -> byte #@loadI2B" %} + ins_encode(load_B_enc(dst, mem)); + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) +instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + + ins_cost(125); + format %{ "lbu $dst, $mem\t# int -> ubyte #@loadI2UB" %} + ins_encode(load_UB_enc(dst, mem)); + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Short (16 bit signed) +instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); + + ins_cost(125); + format %{ "lh $dst, $mem\t# int -> short #@loadI2S" %} + ins_encode(load_S_enc(dst, mem)); + ins_pipe(ialu_loadI); +%} + +// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) +instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + + ins_cost(125); + format %{ "lhu $dst, $mem\t# int -> ushort/char #@loadI2US" %} + ins_encode(load_C_enc(dst, mem)); + ins_pipe(ialu_loadI); +%} + +// Load Long. +instruct loadL(mRegL dst, memory mem) %{ +// predicate(!((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + + ins_cost(250); + format %{ "ld $dst, $mem #@loadL" %} + ins_encode(load_L_enc(dst, mem)); + ins_pipe( ialu_loadL ); +%} + +// Load Long - UNaligned +instruct loadL_unaligned(mRegL dst, memory mem) %{ + match(Set dst (LoadL_unaligned mem)); + + // FIXME: Need more effective ldl/ldr + ins_cost(450); + format %{ "ld $dst, $mem #@loadL_unaligned\n\t" %} + ins_encode(load_L_enc(dst, mem)); + ins_pipe( ialu_loadL ); +%} + +// Store Long +instruct storeL_reg(memory mem, mRegL src) %{ + match(Set mem (StoreL mem src)); + + ins_cost(200); + format %{ "sd $mem, $src #@storeL_reg\n" %} + ins_encode(store_L_reg_enc(mem, src)); + ins_pipe( ialu_storeL ); +%} + +instruct storeL_immL_0(memory mem, immL_0 zero) %{ + match(Set mem (StoreL mem zero)); + + ins_cost(180); + format %{ "sd zero, $mem #@storeL_immL_0" %} + ins_encode(store_L_immL_0_enc(mem, zero)); + ins_pipe( ialu_storeL ); +%} + +instruct storeL_imm(memory mem, immL src) %{ + match(Set mem (StoreL mem src)); + + ins_cost(200); + format %{ "sd $src, $mem #@storeL_imm" %} + ins_encode(store_L_immL_enc(mem, src)); + ins_pipe( ialu_storeL ); +%} + +// Load Compressed Pointer +instruct loadN(mRegN dst, memory mem) +%{ + match(Set dst (LoadN mem)); + + ins_cost(125); // XXX + format %{ "lwu $dst, $mem\t# compressed ptr @ loadN" %} + ins_encode (load_N_enc(dst, mem)); + ins_pipe( ialu_loadI ); // XXX +%} + +instruct loadN2P(mRegP dst, memory mem) +%{ + match(Set dst (DecodeN (LoadN mem))); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + + ins_cost(125); // XXX + format %{ "lwu $dst, $mem\t# @ loadN2P" %} + ins_encode (load_N_enc(dst, mem)); + ins_pipe( ialu_loadI ); // XXX +%} + +// Load Pointer +instruct loadP(mRegP dst, memory mem) %{ + match(Set dst (LoadP mem)); + + ins_cost(125); + format %{ "ld $dst, $mem #@loadP" %} + ins_encode (load_P_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +// Load Klass Pointer +instruct loadKlass(mRegP dst, memory mem) %{ + match(Set dst (LoadKlass mem)); + + ins_cost(125); + format %{ "MOV $dst,$mem @ loadKlass" %} + ins_encode (load_P_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +// Load narrow Klass Pointer +instruct loadNKlass(mRegN dst, memory mem) +%{ + match(Set dst (LoadNKlass mem)); + + ins_cost(125); // XXX + format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} + ins_encode (load_N_enc(dst, mem)); + ins_pipe( ialu_loadI ); // XXX +%} + +instruct loadN2PKlass(mRegP dst, memory mem) +%{ + match(Set dst (DecodeNKlass (LoadNKlass mem))); + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + + ins_cost(125); // XXX + format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} + ins_encode (load_N_enc(dst, mem)); + ins_pipe( ialu_loadI ); // XXX +%} + +// Load Constant +instruct loadConI(mRegI dst, immI src) %{ + match(Set dst src); + + ins_cost(150); + format %{ "mov $dst, $src #@loadConI" %} + ins_encode %{ + Register dst = $dst$$Register; + int value = $src$$constant; + __ move(dst, value); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct loadConL_set64(mRegL dst, immL src) %{ + match(Set dst src); + ins_cost(120); + format %{ "li $dst, $src @ loadConL_set64" %} + ins_encode %{ + __ set64($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_regL_regL); +%} + +instruct loadConL16(mRegL dst, immL16 src) %{ + match(Set dst src); + ins_cost(105); + format %{ "mov $dst, $src #@loadConL16" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + int value = $src$$constant; + __ daddiu(dst_reg, R0, value); + %} + ins_pipe( ialu_regL_regL ); +%} + + +instruct loadConL_immL_0(mRegL dst, immL_0 src) %{ + match(Set dst src); + ins_cost(100); + format %{ "mov $dst, zero #@loadConL_immL_0" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + __ daddu(dst_reg, R0, R0); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Load Range +instruct loadRange(mRegI dst, memory mem) %{ + match(Set dst (LoadRange mem)); + + ins_cost(125); + format %{ "MOV $dst,$mem @ loadRange" %} + ins_encode(load_I_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + + +instruct storeP(memory mem, mRegP src ) %{ + match(Set mem (StoreP mem src)); + + ins_cost(125); + format %{ "sd $src, $mem #@storeP" %} + ins_encode(store_P_reg_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +// Store NULL Pointer, mark word, or other simple pointer constant. +instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ + match(Set mem (StoreP mem zero)); + + ins_cost(125); + format %{ "mov $mem, $zero #@storeImmP_immP_0" %} + ins_encode(store_P_immP0_enc(mem)); + ins_pipe( ialu_storeI ); +%} + +// Store Byte Immediate +instruct storeImmB(memory mem, immI8 src) %{ + match(Set mem (StoreB mem src)); + + ins_cost(150); + format %{ "movb $mem, $src #@storeImmB" %} + ins_encode(store_B_immI_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +// Store Compressed Pointer +instruct storeN(memory mem, mRegN src) +%{ + match(Set mem (StoreN mem src)); + + ins_cost(125); // XXX + format %{ "sw $mem, $src\t# compressed ptr @ storeN" %} + ins_encode(store_N_reg_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +instruct storeP2N(memory mem, mRegP src) +%{ + match(Set mem (StoreN mem (EncodeP src))); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + + ins_cost(125); // XXX + format %{ "sw $mem, $src\t# @ storeP2N" %} + ins_encode(store_N_reg_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +instruct storeNKlass(memory mem, mRegN src) +%{ + match(Set mem (StoreNKlass mem src)); + + ins_cost(125); // XXX + format %{ "sw $mem, $src\t# compressed klass ptr @ storeNKlass" %} + ins_encode(store_N_reg_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +instruct storeP2NKlass(memory mem, mRegP src) +%{ + match(Set mem (StoreNKlass mem (EncodePKlass src))); + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + + ins_cost(125); // XXX + format %{ "sw $mem, $src\t# @ storeP2NKlass" %} + ins_encode(store_N_reg_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +instruct storeImmN_immN_0(memory mem, immN_0 zero) +%{ + match(Set mem (StoreN mem zero)); + + ins_cost(125); // XXX + format %{ "storeN0 zero, $mem\t# compressed ptr" %} + ins_encode(storeImmN0_enc(mem, zero)); + ins_pipe( ialu_storeI ); +%} + +// Store Byte +instruct storeB(memory mem, mRegI src) %{ + match(Set mem (StoreB mem src)); + + ins_cost(125); + format %{ "sb $src, $mem #@storeB" %} + ins_encode(store_B_reg_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +instruct storeB_convL2I(memory mem, mRegL src) %{ + match(Set mem (StoreB mem (ConvL2I src))); + + ins_cost(125); + format %{ "sb $src, $mem #@storeB_convL2I" %} + ins_encode(store_B_reg_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +// Load Byte (8bit signed) +instruct loadB(mRegI dst, memory mem) %{ + match(Set dst (LoadB mem)); + + ins_cost(125); + format %{ "lb $dst, $mem #@loadB" %} + ins_encode(load_B_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +instruct loadB_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadB mem))); + + ins_cost(125); + format %{ "lb $dst, $mem #@loadB_convI2L" %} + ins_encode(load_B_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +// Load Byte (8bit UNsigned) +instruct loadUB(mRegI dst, memory mem) %{ + match(Set dst (LoadUB mem)); + + ins_cost(125); + format %{ "lbu $dst, $mem #@loadUB" %} + ins_encode(load_UB_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +instruct loadUB_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + + ins_cost(125); + format %{ "lbu $dst, $mem #@loadUB_convI2L" %} + ins_encode(load_UB_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +// Load Short (16bit signed) +instruct loadS(mRegI dst, memory mem) %{ + match(Set dst (LoadS mem)); + + ins_cost(125); + format %{ "lh $dst, $mem #@loadS" %} + ins_encode(load_S_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +// Load Short (16 bit signed) to Byte (8 bit signed) +instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); + + ins_cost(125); + format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %} + ins_encode(load_B_enc(dst, mem)); + ins_pipe(ialu_loadI); +%} + +instruct loadS_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadS mem))); + + ins_cost(125); + format %{ "lh $dst, $mem #@loadS_convI2L" %} + ins_encode(load_S_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +// Store Integer Immediate +instruct storeImmI(memory mem, immI src) %{ + match(Set mem (StoreI mem src)); + + ins_cost(150); + format %{ "mov $mem, $src #@storeImmI" %} + ins_encode(store_I_immI_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +// Store Integer +instruct storeI(memory mem, mRegI src) %{ + match(Set mem (StoreI mem src)); + + ins_cost(125); + format %{ "sw $mem, $src #@storeI" %} + ins_encode(store_I_reg_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +instruct storeI_convL2I(memory mem, mRegL src) %{ + match(Set mem (StoreI mem (ConvL2I src))); + + ins_cost(125); + format %{ "sw $mem, $src #@storeI_convL2I" %} + ins_encode(store_I_reg_enc(mem, src)); + ins_pipe( ialu_storeI ); +%} + +// Load Float +instruct loadF(regF dst, memory mem) %{ + match(Set dst (LoadF mem)); + + ins_cost(150); + format %{ "loadF $dst, $mem #@loadF" %} + ins_encode(load_F_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +instruct loadConP_general(mRegP dst, immP src) %{ + match(Set dst src); + + ins_cost(120); + format %{ "li $dst, $src #@loadConP_general" %} + + ins_encode %{ + Register dst = $dst$$Register; + long* value = (long*)$src$$constant; + + if($src->constant_reloc() == relocInfo::metadata_type){ + int klass_index = __ oop_recorder()->find_index((Klass*)value); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + + __ relocate(rspec); + __ patchable_set48(dst, (long)value); + } else if($src->constant_reloc() == relocInfo::oop_type){ + int oop_index = __ oop_recorder()->find_index((jobject)value); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + __ relocate(rspec); + __ patchable_set48(dst, (long)value); + } else if ($src->constant_reloc() == relocInfo::none) { + __ set64(dst, (long)value); + } + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ + match(Set dst src); + + ins_cost(80); + format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} + + ins_encode %{ + __ set64($dst$$Register, $src$$constant); + %} + + ins_pipe(ialu_regI_regI); +%} + + +instruct loadConP_poll(mRegP dst, immP_poll src) %{ + match(Set dst src); + + ins_cost(50); + format %{ "li $dst, $src #@loadConP_poll" %} + + ins_encode %{ + Register dst = $dst$$Register; + intptr_t value = (intptr_t)$src$$constant; + + __ set64(dst, (jlong)value); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConP_immP_0(mRegP dst, immP_0 src) +%{ + match(Set dst src); + + ins_cost(50); + format %{ "mov $dst, R0\t# ptr" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + __ daddu(dst_reg, R0, R0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ + match(Set dst src); + format %{ "move $dst, R0\t# compressed NULL ptr" %} + ins_encode %{ + __ move($dst$$Register, R0); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct loadConN(mRegN dst, immN src) %{ + match(Set dst src); + + ins_cost(125); + format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} + ins_encode %{ + Register dst = $dst$$Register; + __ set_narrow_oop(dst, (jobject)$src$$constant); + %} + ins_pipe( ialu_regI_regI ); // XXX +%} + +instruct loadConNKlass(mRegN dst, immNKlass src) %{ + match(Set dst src); + + ins_cost(125); + format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} + ins_encode %{ + Register dst = $dst$$Register; + __ set_narrow_klass(dst, (Klass*)$src$$constant); + %} + ins_pipe( ialu_regI_regI ); // XXX +%} + +//FIXME +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ + match(TailCall jump_target method_oop ); + ins_cost(300); + format %{ "JMP $jump_target \t# @TailCalljmpInd" %} + + ins_encode %{ + Register target = $jump_target$$Register; + Register oop = $method_oop$$Register; + + // RA will be used in generate_forward_exception() + __ push(RA); + + __ move(S3, oop); + __ jr(target); + __ delayed()->nop(); + %} + + ins_pipe( pipe_jump ); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException( a0_RegP ex_oop ) +%{ + match(Set ex_oop (CreateEx)); + + // use the following format syntax + format %{ "# exception oop is in A0; no code emitted @CreateException" %} + ins_encode %{ + // X86 leaves this function empty + __ block_comment("CreateException is empty in MIPS"); + %} + ins_pipe( empty ); +// ins_pipe( pipe_jump ); +%} + + +/* The mechanism of exception handling is clear now. + +- Common try/catch: + [stubGenerator_mips.cpp] generate_forward_exception() + |- V0, V1 are created + |- T9 <= SharedRuntime::exception_handler_for_return_address + `- jr T9 + `- the caller's exception_handler + `- jr OptoRuntime::exception_blob + `- here +- Rethrow(e.g. 'unwind'): + * The callee: + |- an exception is triggered during execution + `- exits the callee method through RethrowException node + |- The callee pushes exception_oop(T0) and exception_pc(RA) + `- The callee jumps to OptoRuntime::rethrow_stub() + * In OptoRuntime::rethrow_stub: + |- The VM calls _rethrow_Java to determine the return address in the caller method + `- exits the stub with tailjmpInd + |- pops exception_oop(V0) and exception_pc(V1) + `- jumps to the return address(usually an exception_handler) + * The caller: + `- continues processing the exception_blob with V0/V1 +*/ + +// Rethrow exception: +// The exception oop will come in the first argument position. +// Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() +%{ + match(Rethrow); + + // use the following format syntax + format %{ "JMP rethrow_stub #@RethrowException" %} + ins_encode %{ + __ block_comment("@ RethrowException"); + + cbuf.set_insts_mark(); + cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); + + // call OptoRuntime::rethrow_stub to get the exception handler in parent method + __ patchable_jump((address)OptoRuntime::rethrow_stub()); + %} + ins_pipe( pipe_jump ); +%} + +// ============================================================================ +// Branch Instructions --- long offset versions + +// Jump Direct +instruct jmpDir_long(label labl) %{ + match(Goto); + effect(USE labl); + + ins_cost(300); + format %{ "JMP $labl #@jmpDir_long" %} + + ins_encode %{ + Label* L = $labl$$label; + __ jmp_far(*L); + %} + + ins_pipe( pipe_jump ); + //ins_pc_relative(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cop$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ slt(AT, op2, op1); + __ bne_long(AT, R0, *L); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ slt(AT, op1, op2); + __ bne_long(AT, R0, *L); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + +instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = AT; + Label* L = $labl$$label; + int flag = $cop$$cmpcode; + + __ move(op2, $src2$$constant); + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ slt(AT, op2, op1); + __ bne_long(AT, R0, *L); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ slt(AT, op1, op2); + __ bne_long(AT, R0, *L); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + + +// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! +instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{ + match(If cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %} + + ins_encode %{ + Label* L = $labl$$label; + switch($cop$$cmpcode) { + case 0x01: //equal + __ bne_long($cr$$Register, R0, *L); + break; + case 0x02: //not equal + __ beq_long($cr$$Register, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); +%} + +// Conditional jumps +instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP op1 zero)); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP (DecodeN op1) zero)); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) + { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ + match(If cmp (CmpP op1 op2)); +// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + ins_cost(200); + format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ sltu(AT, op2, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //above_equal + __ sltu(AT, op1, op2); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ sltu(AT, op1, op2); + __ bne_long(R0, AT, *L); + break; + case 0x06: //below_equal + __ sltu(AT, op2, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_null_branch_long" %} + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + default: + Unimplemented(); + } + %} +//TODO: pipe_branchP or create pipe_branchN LEE + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_reg_branch_long" %} + ins_encode %{ + Register op1_reg = $op1$$Register; + Register op2_reg = $op2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1_reg, op2_reg, *L); + break; + case 0x02: //not_equal + __ bne_long(op1_reg, op2_reg, *L); + break; + case 0x03: //above + __ sltu(AT, op2_reg, op1_reg); + __ bne_long(R0, AT, *L); + break; + case 0x04: //above_equal + __ sltu(AT, op1_reg, op2_reg); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ sltu(AT, op1_reg, op2_reg); + __ bne_long(R0, AT, *L); + break; + case 0x06: //below_equal + __ sltu(AT, op2_reg, op1_reg); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ sltu(AT, op2, op1); + __ bne_long(AT, R0, *L); + break; + case 0x04: //above_equal + __ sltu(AT, op1, op2); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ sltu(AT, op1, op2); + __ bne_long(AT, R0, *L); + break; + case 0x06: //below_equal + __ sltu(AT, op2, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ move(AT, val); + switch(flag) { + case 0x01: //equal + __ beq_long(op1, AT, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, AT, *L); + break; + case 0x03: //above + __ sltu(AT, AT, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //above_equal + __ sltu(AT, op1, AT); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ sltu(AT, op1, AT); + __ bne_long(R0, AT, *L); + break; + case 0x06: //below_equal + __ sltu(AT, AT, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, op2, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, op2, *L); + break; + case 0x03: //above + __ slt(AT, op2, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ slt(AT, op1, op2); + __ bne_long(R0, AT, *L); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(170); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, R0, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, R0, *L); + break; + case 0x03: //greater + __ slt(AT, R0, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //greater_equal + __ slt(AT, op1, R0); + __ beq_long(AT, R0, *L); + break; + case 0x05: //less + __ slt(AT, op1, R0); + __ bne_long(R0, AT, *L); + break; + case 0x06: //less_equal + __ slt(AT, R0, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(200); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + __ move(AT, val); + switch(flag) { + case 0x01: //equal + __ beq_long(op1, AT, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, AT, *L); + break; + case 0x03: //greater + __ slt(AT, AT, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //greater_equal + __ slt(AT, op1, AT); + __ beq_long(AT, R0, *L); + break; + case 0x05: //less + __ slt(AT, op1, AT); + __ bne_long(R0, AT, *L); + break; + case 0x06: //less_equal + __ slt(AT, AT, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ + match( If cmp (CmpU src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(op1, R0, *L); + break; + case 0x02: //not_equal + __ bne_long(op1, R0, *L); + break; + case 0x03: //above + __ bne_long(R0, op1, *L); + break; + case 0x04: //above_equal + __ beq_long(R0, R0, *L); + break; + case 0x05: //below + return; + break; + case 0x06: //below_equal + __ beq_long(op1, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + ins_cost(180); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_long" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ move(AT, val); + __ beq_long(op1, AT, *L); + break; + case 0x02: //not_equal + __ move(AT, val); + __ bne_long(op1, AT, *L); + break; + case 0x03: //above + __ move(AT, val); + __ sltu(AT, AT, op1); + __ bne_long(R0, AT, *L); + break; + case 0x04: //above_equal + __ sltiu(AT, op1, val); + __ beq_long(AT, R0, *L); + break; + case 0x05: //below + __ sltiu(AT, op1, val); + __ bne_long(R0, AT, *L); + break; + case 0x06: //below_equal + __ move(AT, val); + __ sltu(AT, AT, op1); + __ beq_long(AT, R0, *L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + + +instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ slt(AT, opr2_reg, opr1_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x04: //greater_equal + __ slt(AT, opr1_reg, opr2_reg); + __ beq_long(AT, R0, *target); + break; + + case 0x05: //less + __ slt(AT, opr1_reg, opr2_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x06: //less_equal + __ slt(AT, opr2_reg, opr1_reg); + __ beq_long(AT, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ + match(If cmp (CmpUL src1 src2)); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: // not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: // greater + __ sltu(AT, opr2_reg, opr1_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x04: // greater_equal + __ sltu(AT, opr1_reg, opr2_reg); + __ beq_long(AT, R0, *target); + break; + + case 0x05: // less + __ sltu(AT, opr1_reg, opr2_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x06: // less_equal + __ sltu(AT, opr2_reg, opr1_reg); + __ beq_long(AT, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); +%} + +instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match( If cmp (CmpL src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_long" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = R0; + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ slt(AT, opr2_reg, opr1_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x04: //greater_equal + __ slt(AT, opr1_reg, opr2_reg); + __ beq_long(AT, R0, *target); + break; + + case 0x05: //less + __ slt(AT, opr1_reg, opr2_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x06: //less_equal + __ slt(AT, opr2_reg, opr1_reg); + __ beq_long(AT, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match(If cmp (CmpUL src1 zero)); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_long" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = R0; + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + case 0x04: // greater_equal + case 0x06: // less_equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: // not_equal + case 0x03: // greater + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x05: // less + __ beq_long(R0, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); +%} + +instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + __ set64(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: //equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: //not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: //greater + __ slt(AT, opr2_reg, opr1_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x04: //greater_equal + __ slt(AT, opr1_reg, opr2_reg); + __ beq_long(AT, R0, *target); + break; + + case 0x05: //less + __ slt(AT, opr1_reg, opr2_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x06: //less_equal + __ slt(AT, opr2_reg, opr1_reg); + __ beq_long(AT, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); +%} + +instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match(If cmp (CmpUL src1 src2)); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + Label* target = $labl$$label; + int flag = $cmp$$cmpcode; + + __ set64(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: // equal + __ beq_long(opr1_reg, opr2_reg, *target); + break; + + case 0x02: // not_equal + __ bne_long(opr1_reg, opr2_reg, *target); + break; + + case 0x03: // greater + __ sltu(AT, opr2_reg, opr1_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x04: // greater_equal + __ sltu(AT, opr1_reg, opr2_reg); + __ beq_long(AT, R0, *target); + break; + + case 0x05: // less + __ sltu(AT, opr1_reg, opr2_reg); + __ bne_long(AT, R0, *target); + break; + + case 0x06: // less_equal + __ sltu(AT, opr2_reg, opr1_reg); + __ beq_long(AT, R0, *target); + break; + + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); +%} + +//FIXME +instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ + match( If cmp (CmpF src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + __ c_eq_s(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x02: // not_equal + __ c_eq_s(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x03: // greater + __ c_ule_s(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x04: // greater_equal + __ c_ult_s(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x05: // less + __ c_ult_s(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x06: // less_equal + __ c_ule_s(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_slow); +%} + +instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ + match( If cmp (CmpD src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label* L = $labl$$label; + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + __ c_eq_d(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x02: // not_equal + // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. + __ c_eq_d(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x03: // greater + __ c_ule_d(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x04: // greater_equal + __ c_ult_d(reg_op1, reg_op2); + __ bc1f_long(*L); + break; + case 0x05: // less + __ c_ult_d(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + case 0x06: // less_equal + __ c_ule_d(reg_op1, reg_op2); + __ bc1t_long(*L); + break; + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_slow); +%} + + +// ============================================================================ +// Branch Instructions -- short offset versions + +// Jump Direct +instruct jmpDir_short(label labl) %{ + match(Goto); + effect(USE labl); + + ins_cost(300); + format %{ "JMP $labl #@jmpDir_short" %} + + ins_encode %{ + Label &L = *($labl$$label); + if(&L) + __ b(L); + else + __ b(int(0)); + __ delayed()->nop(); + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +// Jump Direct Conditional - Label defines a relative address from Jcc+1 +instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cop$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + __ slt(AT, op2, op1); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ slt(AT, op1, op2); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ + match(CountedLoopEnd cop (CmpI src1 src2)); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = AT; + Label &L = *($labl$$label); + int flag = $cop$$cmpcode; + + __ move(op2, $src2$$constant); + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + __ slt(AT, op2, op1); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ slt(AT, op1, op2); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + + +// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! +instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{ + match(If cop cr); + effect(USE labl); + + ins_cost(300); + format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %} + + ins_encode %{ + Label &L = *($labl$$label); + switch($cop$$cmpcode) { + case 0x01: //equal + if (&L) + __ bne($cr$$Register, R0, L); + else + __ bne($cr$$Register, R0, (int)0); + break; + case 0x02: //not equal + if (&L) + __ beq($cr$$Register, R0, L); + else + __ beq($cr$$Register, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pipe( pipe_jump ); + ins_pc_relative(1); + ins_short_branch(1); +%} + +// Conditional jumps +instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP op1 zero)); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ + match(If cmp (CmpP (DecodeN op1) zero)); + predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); + effect(USE labl); + + ins_cost(180); + format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) + { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ + match(If cmp (CmpP op1 op2)); +// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); + effect(USE labl); + + ins_cost(200); + format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} + + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = $op2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + __ sltu(AT, op2, op1); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //above_equal + __ sltu(AT, op1, op2); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ sltu(AT, op1, op2); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //below_equal + __ sltu(AT, op2, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ + match(If cmp (CmpN op1 null)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,0\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_null_branch_short" %} + ins_encode %{ + Register op1 = $op1$$Register; + Register op2 = R0; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} +//TODO: pipe_branchP or create pipe_branchN LEE + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ + match(If cmp (CmpN op1 op2)); + effect(USE labl); + + ins_cost(180); + format %{ "CMP $op1,$op2\t! compressed ptr\n\t" + "BP$cmp $labl @ cmpN_reg_branch_short" %} + ins_encode %{ + Register op1_reg = $op1$$Register; + Register op2_reg = $op2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1_reg, op2_reg, L); + else + __ beq(op1_reg, op2_reg, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1_reg, op2_reg, L); + else + __ bne(op1_reg, op2_reg, (int)0); + break; + case 0x03: //above + __ sltu(AT, op2_reg, op1_reg); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //above_equal + __ sltu(AT, op1_reg, op2_reg); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ sltu(AT, op1_reg, op2_reg); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //below_equal + __ sltu(AT, op2_reg, op1_reg); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + __ sltu(AT, op2, op1); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x04: //above_equal + __ sltu(AT, op1, op2); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ sltu(AT, op1, op2); + if(&L) + __ bne(AT, R0, L); + else + __ bne(AT, R0, (int)0); + break; + case 0x06: //below_equal + __ sltu(AT, op2, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ move(AT, val); + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, AT, L); + else + __ beq(op1, AT, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, AT, L); + else + __ bne(op1, AT, (int)0); + break; + case 0x03: //above + __ sltu(AT, AT, op1); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //above_equal + __ sltu(AT, op1, AT); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ sltu(AT, op1, AT); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //below_equal + __ sltu(AT, AT, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Register op2 = $src2$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, op2, L); + else + __ beq(op1, op2, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, op2, L); + else + __ bne(op1, op2, (int)0); + break; + case 0x03: //above + __ slt(AT, op2, op1); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //above_equal + __ slt(AT, op1, op2); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ slt(AT, op1, op2); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //below_equal + __ slt(AT, op2, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(170); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, R0, L); + else + __ beq(op1, R0, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, R0, L); + else + __ bne(op1, R0, (int)0); + break; + case 0x03: //greater + if(&L) + __ bgtz(op1, L); + else + __ bgtz(op1, (int)0); + break; + case 0x04: //greater_equal + if(&L) + __ bgez(op1, L); + else + __ bgez(op1, (int)0); + break; + case 0x05: //less + if(&L) + __ bltz(op1, L); + else + __ bltz(op1, (int)0); + break; + case 0x06: //less_equal + if(&L) + __ blez(op1, L); + else + __ blez(op1, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ + match( If cmp (CmpI src1 src2) ); + effect(USE labl); + ins_cost(200); + format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ move(AT, val); + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, AT, L); + else + __ beq(op1, AT, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, AT, L); + else + __ bne(op1, AT, (int)0); + break; + case 0x03: //greater + __ slt(AT, AT, op1); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //greater_equal + __ slt(AT, op1, AT); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //less + __ slt(AT, op1, AT); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //less_equal + __ slt(AT, AT, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ + match( If cmp (CmpU src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&L) + __ beq(op1, R0, L); + else + __ beq(op1, R0, (int)0); + break; + case 0x02: //not_equal + if (&L) + __ bne(op1, R0, L); + else + __ bne(op1, R0, (int)0); + break; + case 0x03: //above + if(&L) + __ bne(R0, op1, L); + else + __ bne(R0, op1, (int)0); + break; + case 0x04: //above_equal + if(&L) + __ beq(R0, R0, L); + else + __ beq(R0, R0, (int)0); + break; + case 0x05: //below + return; + break; + case 0x06: //below_equal + if(&L) + __ beq(op1, R0, L); + else + __ beq(op1, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ + match( If cmp (CmpU src1 src2) ); + effect(USE labl); + ins_cost(180); + format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_short" %} + + ins_encode %{ + Register op1 = $src1$$Register; + int val = $src2$$constant; + Label &L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + __ move(AT, val); + if (&L) + __ beq(op1, AT, L); + else + __ beq(op1, AT, (int)0); + break; + case 0x02: //not_equal + __ move(AT, val); + if (&L) + __ bne(op1, AT, L); + else + __ bne(op1, AT, (int)0); + break; + case 0x03: //above + __ move(AT, val); + __ sltu(AT, AT, op1); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x04: //above_equal + __ sltiu(AT, op1, val); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + case 0x05: //below + __ sltiu(AT, op1, val); + if(&L) + __ bne(R0, AT, L); + else + __ bne(R0, AT, (int)0); + break; + case 0x06: //below_equal + __ move(AT, val); + __ sltu(AT, AT, op1); + if(&L) + __ beq(AT, R0, L); + else + __ beq(AT, R0, (int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + + +instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + __ delayed()->nop(); + break; + + case 0x02: //not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + __ delayed()->nop(); + break; + + case 0x03: //greater + __ slt(AT, opr2_reg, opr1_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + __ delayed()->nop(); + break; + + case 0x04: //greater_equal + __ slt(AT, opr1_reg, opr2_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + __ delayed()->nop(); + + break; + + case 0x05: //less + __ slt(AT, opr1_reg, opr2_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + __ delayed()->nop(); + + break; + + case 0x06: //less_equal + __ slt(AT, opr2_reg, opr1_reg); + + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + __ delayed()->nop(); + + break; + + default: + Unimplemented(); + } + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ + match( If cmp (CmpUL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} + ins_cost(250); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = as_Register($src2$$reg); + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + __ delayed()->nop(); + break; + + case 0x02: // not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + __ delayed()->nop(); + break; + + case 0x03: // greater + __ sltu(AT, opr2_reg, opr1_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + __ delayed()->nop(); + break; + + case 0x04: // greater_equal + __ sltu(AT, opr1_reg, opr2_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + __ delayed()->nop(); + break; + + case 0x05: // less + __ sltu(AT, opr1_reg, opr2_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + __ delayed()->nop(); + break; + + case 0x06: // less_equal + __ sltu(AT, opr2_reg, opr1_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + __ delayed()->nop(); + break; + + default: + Unimplemented(); + } + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); + ins_short_branch(1); +%} + +instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match( If cmp (CmpL src1 zero) ); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_short" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: //equal + if (&target) + __ beq(opr1_reg, R0, target); + else + __ beq(opr1_reg, R0, int(0)); + break; + + case 0x02: //not_equal + if(&target) + __ bne(opr1_reg, R0, target); + else + __ bne(opr1_reg, R0, (int)0); + break; + + case 0x03: //greater + if(&target) + __ bgtz(opr1_reg, target); + else + __ bgtz(opr1_reg, (int)0); + break; + + case 0x04: //greater_equal + if(&target) + __ bgez(opr1_reg, target); + else + __ bgez(opr1_reg, (int)0); + break; + + case 0x05: //less + __ slt(AT, opr1_reg, R0); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + break; + + case 0x06: //less_equal + if (&target) + __ blez(opr1_reg, target); + else + __ blez(opr1_reg, int(0)); + break; + + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ + match(If cmp (CmpUL src1 zero)); + effect(USE labl); + format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_short" %} + ins_cost(150); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + case 0x04: // greater_equal + case 0x06: // less_equal + if (&target) + __ beq(opr1_reg, R0, target); + else + __ beq(opr1_reg, R0, int(0)); + break; + + case 0x02: // not_equal + case 0x03: // greater + if(&target) + __ bne(opr1_reg, R0, target); + else + __ bne(opr1_reg, R0, (int)0); + break; + + case 0x05: // less + if(&target) + __ beq(R0, R0, target); + else + __ beq(R0, R0, (int)0); + break; + + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); + ins_short_branch(1); +%} + +instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match( If cmp (CmpL src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ set64(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: //equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + break; + + case 0x02: //not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + break; + + case 0x03: //greater + __ slt(AT, opr2_reg, opr1_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + break; + + case 0x04: //greater_equal + __ slt(AT, opr1_reg, opr2_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + break; + + case 0x05: //less + __ slt(AT, opr1_reg, opr2_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + break; + + case 0x06: //less_equal + __ slt(AT, opr2_reg, opr1_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + break; + + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + + ins_pc_relative(1); + ins_pipe( pipe_alu_branch ); + ins_short_branch(1); +%} + +instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ + match(If cmp (CmpUL src1 src2)); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} + ins_cost(180); + + ins_encode %{ + Register opr1_reg = as_Register($src1$$reg); + Register opr2_reg = AT; + Label &target = *($labl$$label); + int flag = $cmp$$cmpcode; + + __ set64(opr2_reg, $src2$$constant); + + switch(flag) { + case 0x01: // equal + if (&target) + __ beq(opr1_reg, opr2_reg, target); + else + __ beq(opr1_reg, opr2_reg, (int)0); + break; + + case 0x02: // not_equal + if(&target) + __ bne(opr1_reg, opr2_reg, target); + else + __ bne(opr1_reg, opr2_reg, (int)0); + break; + + case 0x03: // greater + __ sltu(AT, opr2_reg, opr1_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + break; + + case 0x04: // greater_equal + __ sltu(AT, opr1_reg, opr2_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + break; + + case 0x05: // less + __ sltu(AT, opr1_reg, opr2_reg); + if(&target) + __ bne(AT, R0, target); + else + __ bne(AT, R0, (int)0); + break; + + case 0x06: // less_equal + __ sltu(AT, opr2_reg, opr1_reg); + if(&target) + __ beq(AT, R0, target); + else + __ beq(AT, R0, (int)0); + break; + + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe(pipe_alu_branch); + ins_short_branch(1); +%} + +//FIXME +instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ + match( If cmp (CmpF src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label& L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + __ c_eq_s(reg_op1, reg_op2); + if (&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + case 0x02: // not_equal + __ c_eq_s(reg_op1, reg_op2); + if (&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x03: // greater + __ c_ule_s(reg_op1, reg_op2); + if(&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x04: // greater_equal + __ c_ult_s(reg_op1, reg_op2); + if(&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x05: // less + __ c_ult_s(reg_op1, reg_op2); + if(&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + case 0x06: // less_equal + __ c_ule_s(reg_op1, reg_op2); + if(&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe(pipe_fpu_branch); + ins_short_branch(1); +%} + +instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ + match( If cmp (CmpD src1 src2) ); + effect(USE labl); + format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} + + ins_encode %{ + FloatRegister reg_op1 = $src1$$FloatRegister; + FloatRegister reg_op2 = $src2$$FloatRegister; + Label& L = *($labl$$label); + int flag = $cmp$$cmpcode; + + switch(flag) { + case 0x01: // equal + __ c_eq_d(reg_op1, reg_op2); + if (&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + case 0x02: // not_equal + // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. + __ c_eq_d(reg_op1, reg_op2); + if (&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x03: // greater + __ c_ule_d(reg_op1, reg_op2); + if(&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x04: // greater_equal + __ c_ult_d(reg_op1, reg_op2); + if(&L) + __ bc1f(L); + else + __ bc1f((int)0); + break; + case 0x05: // less + __ c_ult_d(reg_op1, reg_op2); + if(&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + case 0x06: // less_equal + __ c_ule_d(reg_op1, reg_op2); + if(&L) + __ bc1t(L); + else + __ bc1t((int)0); + break; + default: + Unimplemented(); + } + __ delayed()->nop(); + %} + + ins_pc_relative(1); + ins_pipe(pipe_fpu_branch); + ins_short_branch(1); +%} + +// =================== End of branch instructions ========================== + +// Call Runtime Instruction +instruct CallRuntimeDirect(method meth) %{ + match(CallRuntime ); + effect(USE meth); + + ins_cost(300); + format %{ "CALL,runtime #@CallRuntimeDirect" %} + ins_encode( Java_To_Runtime( meth ) ); + ins_pipe( pipe_slow ); + ins_alignment(16); +%} + + + +//------------------------MemBar Instructions------------------------------- +//Memory barrier flavors + +instruct membar_acquire() %{ + match(MemBarAcquire); + ins_cost(400); + + format %{ "MEMBAR-acquire @ membar_acquire" %} + ins_encode %{ + __ sync(); + %} + ins_pipe(empty); +%} + +instruct load_fence() %{ + match(LoadFence); + ins_cost(400); + + format %{ "MEMBAR @ load_fence" %} + ins_encode %{ + __ sync(); + %} + ins_pipe(pipe_slow); +%} + +instruct membar_acquire_lock() +%{ + match(MemBarAcquireLock); + ins_cost(0); + + size(0); + format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} + ins_encode(); + ins_pipe(empty); +%} + +instruct membar_release() %{ + match(MemBarRelease); + ins_cost(400); + + format %{ "MEMBAR-release @ membar_release" %} + + ins_encode %{ + // Attention: DO NOT DELETE THIS GUY! + __ sync(); + %} + + ins_pipe(pipe_slow); +%} + +instruct store_fence() %{ + match(StoreFence); + ins_cost(400); + + format %{ "MEMBAR @ store_fence" %} + + ins_encode %{ + __ sync(); + %} + + ins_pipe(pipe_slow); +%} + +instruct membar_release_lock() +%{ + match(MemBarReleaseLock); + ins_cost(0); + + size(0); + format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} + ins_encode(); + ins_pipe(empty); +%} + + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(400); + + format %{ "MEMBAR-volatile" %} + ins_encode %{ + if( !os::is_MP() ) return; // Not needed on single CPU + __ sync(); + + %} + ins_pipe(pipe_slow); +%} + +instruct unnecessary_membar_volatile() %{ + match(MemBarVolatile); + predicate(Matcher::post_store_load_barrier(n)); + ins_cost(0); + + size(0); + format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %} + ins_encode( ); + ins_pipe(empty); +%} + +instruct membar_storestore() %{ + match(MemBarStoreStore); + + ins_cost(400); + format %{ "MEMBAR-storestore @ membar_storestore" %} + ins_encode %{ + __ sync(); + %} + ins_pipe(empty); +%} + +//----------Move Instructions-------------------------------------------------- +instruct castX2P(mRegP dst, mRegL src) %{ + match(Set dst (CastX2P src)); + format %{ "castX2P $dst, $src @ castX2P" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + if(src != dst) + __ move(dst, src); + %} + ins_cost(10); + ins_pipe( ialu_regI_mov ); +%} + +instruct castP2X(mRegL dst, mRegP src ) %{ + match(Set dst (CastP2X src)); + + format %{ "mov $dst, $src\t #@castP2X" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + if(src != dst) + __ move(dst, src); + %} + ins_pipe( ialu_regI_mov ); +%} + +instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ mfc1(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ + match(Set dst (MoveI2F src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ mtc1(src, dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + __ dmfc1(dst, src); + %} + ins_pipe( pipe_slow ); +%} + +instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + ins_cost(85); + format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + Register src = as_Register($src$$reg); + + __ dmtc1(src, dst); + %} + ins_pipe( pipe_slow ); +%} + +//----------Conditional Move--------------------------------------------------- +// Conditional move +instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" + "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" + "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ + match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpUL_reg_reg" + %} + ins_encode %{ + Register opr1 = as_Register($tmp1$$reg); + Register opr2 = as_Register($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe(pipe_slow); +%} + +instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" + "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" + %} + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ + match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" + "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" + %} + ins_encode %{ + FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); + FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" + "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ + match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" + "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ + match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" + "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" + %} + + ins_encode %{ + Register op1 = $tmp1$$Register; + Register op2 = $tmp2$$Register; + FloatRegister dst = as_FloatRegister($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + int flag = $cop$$cmpcode; + + __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); + %} + + ins_pipe( pipe_slow ); +%} + +//FIXME +instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + ins_cost(80); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + Register dst = $dst$$Register; + Register src = $src$$Register; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ + match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); + ins_cost(200); + format %{ + "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" + "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" + %} + + ins_encode %{ + FloatRegister reg_op1 = $tmp1$$FloatRegister; + FloatRegister reg_op2 = $tmp2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + int flag = $cop$$cmpcode; + + __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); + %} + ins_pipe( pipe_slow ); +%} + +// Manifest a CmpL result in an integer register. Very painful. +// This is the test to avoid. +instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ + match(Set dst (CmpL3 src1 src2)); + ins_cost(1000); + format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} + ins_encode %{ + Register opr1 = as_Register($src1$$reg); + Register opr2 = as_Register($src2$$reg); + Register dst = as_Register($dst$$reg); + + Label Done; + + __ subu(AT, opr1, opr2); + __ bltz(AT, Done); + __ delayed()->daddiu(dst, R0, -1); + + __ move(dst, 1); + __ movz(dst, R0, AT); + + __ bind(Done); + %} + ins_pipe( pipe_slow ); +%} + +// +// less_rsult = -1 +// greater_result = 1 +// equal_result = 0 +// nan_result = -1 +// +instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ + match(Set dst (CmpF3 src1 src2)); + ins_cost(1000); + format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + Register dst = as_Register($dst$$reg); + + Label Done; + + __ c_ult_s(src1, src2); + __ bc1t(Done); + __ delayed()->daddiu(dst, R0, -1); + + __ c_eq_s(src1, src2); + __ move(dst, 1); + __ movt(dst, R0); + + __ bind(Done); + %} + ins_pipe( pipe_slow ); +%} + +instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ + match(Set dst (CmpD3 src1 src2)); + ins_cost(1000); + format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + Register dst = as_Register($dst$$reg); + + Label Done; + + __ c_ult_d(src1, src2); + __ bc1t(Done); + __ delayed()->daddiu(dst, R0, -1); + + __ c_eq_d(src1, src2); + __ move(dst, 1); + __ movt(dst, R0); + + __ bind(Done); + %} + ins_pipe( pipe_slow ); +%} + +instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{ + match(Set dummy (ClearArray cnt base)); + format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} + ins_encode %{ + //Assume cnt is the number of bytes in an array to be cleared, + //and base points to the starting address of the array. + Register base = $base$$Register; + Register num = $cnt$$Register; + Label Loop, done; + + __ beq(num, R0, done); + __ delayed()->daddu(AT, base, R0); + + __ move(T9, num); /* T9 = words */ + + __ bind(Loop); + __ sd(R0, AT, 0); + __ daddiu(T9, T9, -1); + __ bne(T9, R0, Loop); + __ delayed()->daddiu(AT, AT, wordSize); + + __ bind(done); + %} + ins_pipe( pipe_slow ); +%} + +instruct string_compare(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); + + format %{ "String Compare $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compare" %} + ins_encode %{ + // Get the first character position in both strings + // [8] char array, [12] offset, [16] count + Register str1 = $str1$$Register; + Register str2 = $str2$$Register; + Register cnt1 = $cnt1$$Register; + Register cnt2 = $cnt2$$Register; + Register result = $result$$Register; + + Label L, Loop, haveResult, done; + + // compute the and difference of lengths (in result) + __ subu(result, cnt1, cnt2); // result holds the difference of two lengths + + // compute the shorter length (in cnt1) + __ slt(AT, cnt2, cnt1); + __ movn(cnt1, cnt2, AT); + + // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register + __ bind(Loop); // Loop begin + __ beq(cnt1, R0, done); + __ delayed()->nop(); + __ lhu(AT, str1, 0); + + // compare current character + __ lhu(cnt2, str2, 0); + __ bne(AT, cnt2, haveResult); + __ delayed()->addiu(str1, str1, 2); + __ addiu(str2, str2, 2); + __ b(Loop); + __ delayed()->addiu(cnt1, cnt1, -1); // Loop end + + __ bind(haveResult); + __ subu(result, AT, cnt2); + + __ bind(done); + %} + + ins_pipe( pipe_slow ); +%} + +// intrinsic optimization +instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{ + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp); + + format %{ "String Equal $str1, $str2, len:$cnt tmp:$temp -> $result @ string_equals" %} + ins_encode %{ + // Get the first character position in both strings + // [8] char array, [12] offset, [16] count + Register str1 = $str1$$Register; + Register str2 = $str2$$Register; + Register cnt = $cnt$$Register; + Register tmp = $temp$$Register; + Register result = $result$$Register; + + Label Loop, True, False; + + __ beq(str1, str2, True); // same char[] ? + __ delayed()->daddiu(result, R0, 1); + + __ beq(cnt, R0, True); + __ delayed()->nop(); // count == 0 + + __ bind(Loop); + + // compare current character + __ lhu(AT, str1, 0); + __ lhu(tmp, str2, 0); + __ bne(AT, tmp, False); + __ delayed()->addiu(str1, str1, 2); + __ addiu(cnt, cnt, -1); + __ bne(cnt, R0, Loop); + __ delayed()->addiu(str2, str2, 2); + + __ b(True); + __ delayed()->nop(); + + __ bind(False); + __ daddiu(result, R0, 0); + + __ bind(True); + %} + + ins_pipe( pipe_slow ); +%} + +//----------Arithmetic Instructions------------------------------------------- +//----------Addition Instructions--------------------------------------------- +instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (AddI src1 src2)); + + format %{ "addu $dst, $src1, $src2 #@addI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ addu32(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct addI_Reg_imm(mRegI dst, mRegI src1, immI src2) %{ + match(Set dst (AddI src1 src2)); + + format %{ "addu $dst, $src1, $src2 #@addI_Reg_imm" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + int imm = $src2$$constant; + + if(Assembler::is_simm16(imm)) { + __ addiu32(dst, src1, imm); + } else { + __ move(AT, imm); + __ addu32(dst, src1, AT); + } + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "daddu $dst, $src1, $src2 #@addP_reg_reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ daddu(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{ + match(Set dst (AddP src1 (ConvI2L src2))); + + format %{ "daddu $dst, $src1, $src2 #@addP_reg_reg_convI2L" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ daddu(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct addP_reg_imm(mRegP dst, mRegP src1, immL src2) %{ + match(Set dst (AddP src1 src2)); + + format %{ "daddiu $dst, $src1, $src2 #@addP_reg_imm" %} + ins_encode %{ + Register src1 = $src1$$Register; + long src2 = $src2$$constant; + Register dst = $dst$$Register; + + if(Assembler::is_simm16(src2)) { + __ daddiu(dst, src1, src2); + } else { + __ set64(AT, src2); + __ daddu(dst, src1, AT); + } + %} + ins_pipe( ialu_regI_imm16 ); +%} + +// Add Long Register with Register +instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (AddL src1 src2)); + ins_cost(200); + format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} + + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ daddu(dst_reg, src1_reg, src2_reg); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2) +%{ + match(Set dst (AddL src1 src2)); + + format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + int src2_imm = $src2$$constant; + + __ daddiu(dst_reg, src1_reg, src2_imm); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2) +%{ + match(Set dst (AddL (ConvI2L src1) src2)); + + format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_imm " %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + int src2_imm = $src2$$constant; + + __ daddiu(dst_reg, src1_reg, src2_imm); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ + match(Set dst (AddL (ConvI2L src1) src2)); + ins_cost(200); + format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %} + + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ daddu(dst_reg, src1_reg, src2_reg); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ + match(Set dst (AddL (ConvI2L src1) (ConvI2L src2))); + ins_cost(200); + format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %} + + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ daddu(dst_reg, src1_reg, src2_reg); + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ + match(Set dst (AddL src1 (ConvI2L src2))); + ins_cost(200); + format %{ "ADD $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %} + + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ daddu(dst_reg, src1_reg, src2_reg); + %} + + ins_pipe( ialu_regL_regL ); +%} + +//----------Subtraction Instructions------------------------------------------- +// Integer Subtraction Instructions +instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (SubI src1 src2)); + ins_cost(100); + + format %{ "subu $dst, $src1, $src2 #@subI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ subu32(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1, immI_M32767_32768 src2) %{ + match(Set dst (SubI src1 src2)); + ins_cost(80); + + format %{ "subu $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ addiu32(dst, src1, -1 * $src2$$constant); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct negI_Reg(mRegI dst, immI_0 zero, mRegI src) %{ + match(Set dst (SubI zero src)); + ins_cost(80); + + format %{ "neg $dst, $src #@negI_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ subu32(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct negL_Reg(mRegL dst, immL_0 zero, mRegL src) %{ + match(Set dst (SubL zero src)); + ins_cost(80); + + format %{ "neg $dst, $src #@negL_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + __ subu(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1, immL_M32767_32768 src2) %{ + match(Set dst (SubL src1 src2)); + ins_cost(80); + + format %{ "subu $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + __ daddiu(dst, src1, -1 * $src2$$constant); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Subtract Long Register with Register. +instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (SubL src1 src2)); + ins_cost(100); + format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + + __ subu(dst, src1, src2); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ + match(Set dst (SubL src1 (ConvI2L src2))); + ins_cost(100); + format %{ "SubL $dst, $src1, $src2 @ subL_Reg_RegI2L" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + + __ subu(dst, src1, src2); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ + match(Set dst (SubL (ConvI2L src1) src2)); + ins_cost(200); + format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_Reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + + __ subu(dst, src1, src2); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ + match(Set dst (SubL (ConvI2L src1) (ConvI2L src2))); + ins_cost(200); + format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src1 = as_Register($src1$$reg); + Register src2 = as_Register($src2$$reg); + + __ subu(dst, src1, src2); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Integer MOD with Register +instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (ModI src1 src2)); + ins_cost(300); + format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + //if (UseLEXT1) { + if (0) { + // Experiments show that gsmod is slower that div+mfhi. + // So I just disable it here. + __ gsmod(dst, src1, src2); + } else { + __ div(src1, src2); + __ mfhi(dst); + } + %} + + //ins_pipe( ialu_mod ); + ins_pipe( ialu_regI_regI ); +%} + +instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (ModL src1 src2)); + format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} + + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + if (UseLEXT1) { + __ gsdmod(dst, op1, op2); + } else { + __ ddiv(op1, op2); + __ mfhi(dst); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (MulI src1 src2)); + + ins_cost(300); + format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register dst = $dst$$Register; + + __ mul(dst, src1, src2); + %} + ins_pipe( ialu_mult ); +%} + +instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{ + match(Set dst (AddI (MulI src1 src2) src3)); + + ins_cost(999); + format %{ "madd $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register src3 = $src3$$Register; + Register dst = $dst$$Register; + + __ mtlo(src3); + __ madd(src1, src2); + __ mflo(dst); + %} + ins_pipe( ialu_mult ); +%} + +instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (DivI src1 src2)); + + ins_cost(300); + format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} + ins_encode %{ + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + Register dst = $dst$$Register; + + // In MIPS, div does not cause exception. + // We must trap an exception manually. + __ teq(R0, src2, 0x7); + + if (UseLEXT1) { + __ gsdiv(dst, src1, src2); + } else { + __ div(src1, src2); + + __ nop(); + __ nop(); + __ mflo(dst); + } + %} + ins_pipe( ialu_mod ); +%} + +instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ + match(Set dst (DivF src1 src2)); + + ins_cost(300); + format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + /* Here do we need to trap an exception manually ? */ + __ div_s(dst, src1, src2); + %} + ins_pipe( pipe_slow ); +%} + +instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ + match(Set dst (DivD src1 src2)); + + ins_cost(300); + format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + /* Here do we need to trap an exception manually ? */ + __ div_d(dst, src1, src2); + %} + ins_pipe( pipe_slow ); +%} + +instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (MulL src1 src2)); + format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + if (UseLEXT1) { + __ gsdmult(dst, op1, op2); + } else { + __ dmult(op1, op2); + __ mflo(dst); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{ + match(Set dst (MulL src1 (ConvI2L src2))); + format %{ "mulL $dst, $src1, $src2 @mulL_reg_regI2L" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + if (UseLEXT1) { + __ gsdmult(dst, op1, op2); + } else { + __ dmult(op1, op2); + __ mflo(dst); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (DivL src1 src2)); + format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} + + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register op1 = as_Register($src1$$reg); + Register op2 = as_Register($src2$$reg); + + if (UseLEXT1) { + __ gsddiv(dst, op1, op2); + } else { + __ ddiv(op1, op2); + __ mflo(dst); + } + %} + ins_pipe( pipe_slow ); +%} + +instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (AddF src1 src2)); + format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ add_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (SubF src1 src2)); + format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ sub_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} +instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (AddD src1 src2)); + format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ add_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (SubD src1 src2)); + format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = as_FloatRegister($src1$$reg); + FloatRegister src2 = as_FloatRegister($src2$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ sub_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct negF_reg(regF dst, regF src) %{ + match(Set dst (NegF src)); + format %{ "negF $dst, $src @negF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ neg_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct negD_reg(regD dst, regD src) %{ + match(Set dst (NegD src)); + format %{ "negD $dst, $src @negD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ neg_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (MulF src1 src2)); + format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ mul_s(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ + match(Set dst (AddF (MulF src1 src2) src3)); + // For compatibility reason (e.g. on the Loongson platform), disable this guy. + ins_cost(44444); + format %{ "maddF $dst, $src1, $src2, $src3 @maddF_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister src3 = $src3$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ madd_s(dst, src1, src2, src3); + %} + ins_pipe( fpu_regF_regF ); +%} + +// Mul two double precision floating piont number +instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (MulD src1 src2)); + format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ mul_d(dst, src1, src2); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ + match(Set dst (AddD (MulD src1 src2) src3)); + // For compatibility reason (e.g. on the Loongson platform), disable this guy. + ins_cost(44444); + format %{ "maddD $dst, $src1, $src2, $src3 @maddD_reg_reg" %} + ins_encode %{ + FloatRegister src1 = $src1$$FloatRegister; + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister src3 = $src3$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + + __ madd_d(dst, src1, src2, src3); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct absF_reg(regF dst, regF src) %{ + match(Set dst (AbsF src)); + ins_cost(100); + format %{ "absF $dst, $src @absF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ abs_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +// intrinsics for math_native. +// AbsD SqrtD CosD SinD TanD LogD Log10D + +instruct absD_reg(regD dst, regD src) %{ + match(Set dst (AbsD src)); + ins_cost(100); + format %{ "absD $dst, $src @absD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ abs_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct sqrtD_reg(regD dst, regD src) %{ + match(Set dst (SqrtD src)); + ins_cost(100); + format %{ "SqrtD $dst, $src @sqrtD_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ sqrt_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct sqrtF_reg(regF dst, regF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + ins_cost(100); + format %{ "SqrtF $dst, $src @sqrtF_reg" %} + ins_encode %{ + FloatRegister src = as_FloatRegister($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ sqrt_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} +//----------------------------------Logical Instructions---------------------- +//__________________________________Integer Logical Instructions------------- + +//And Instuctions +// And Register with Immediate +instruct andI_Reg_immI(mRegI dst, mRegI src1, immI src2) %{ + match(Set dst (AndI src1 src2)); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_immI" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ move(AT, val); + __ andr(dst, src, AT); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ + match(Set dst (AndI src1 src2)); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ andi(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ + match(Set dst (AndI src1 mask)); + ins_cost(60); + + format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int size = Assembler::is_int_mask($mask$$constant); + + __ ext(dst, src, 0, size); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ + match(Set dst (AndL src1 mask)); + ins_cost(60); + + format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int size = Assembler::is_jlong_mask($mask$$constant); + + __ dext(dst, src, 0, size); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ + match(Set dst (XorI src1 src2)); + ins_cost(60); + + format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ xori(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1, immI_M1 M1) %{ + match(Set dst (XorI src1 M1)); + predicate(UseLEXT3); + ins_cost(60); + + format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + + __ gsorn(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1, immI_M1 M1) %{ + match(Set dst (XorI (ConvL2I src1) M1)); + predicate(UseLEXT3); + ins_cost(60); + + format %{ "xor $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + + __ gsorn(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ + match(Set dst (XorL src1 src2)); + ins_cost(60); + + format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + int val = $src2$$constant; + + __ xori(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +/* +instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1, immL_M1 M1) %{ + match(Set dst (XorL src1 M1)); + predicate(UseLEXT3); + ins_cost(60); + + format %{ "xor $dst, $src1, $M1 #@xorL_Reg_immL_M1" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + + __ gsorn(dst, R0, src); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI mask (LoadB mem))); + ins_cost(60); + + format %{ "lhu $dst, $mem #@lbu_and_lmask" %} + ins_encode(load_UB_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ + match(Set dst (AndI (LoadB mem) mask)); + ins_cost(60); + + format %{ "lhu $dst, $mem #@lbu_and_rmask" %} + ins_encode(load_UB_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (AndI src1 src2)); + + format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ andr(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (AndI src1 (XorI src2 M1))); + predicate(UseLEXT3); + + format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsandn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (OrI src1 (XorI src2 M1))); + predicate(UseLEXT3); + + format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsorn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (AndI (XorI src1 M1) src2)); + predicate(UseLEXT3); + + format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsandn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ + match(Set dst (OrI (XorI src1 M1) src2)); + predicate(UseLEXT3); + + format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsorn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} + +// And Long Register with Register +instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (AndL src1 src2)); + format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ andr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{ + match(Set dst (AndL src1 (ConvI2L src2))); + format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ andr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ + match(Set dst (AndL src1 src2)); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + long val = $src2$$constant; + + __ andi(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1, immL_0_65535 src2) %{ + match(Set dst (ConvL2I (AndL src1 src2))); + ins_cost(60); + + format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src1$$Register; + long val = $src2$$constant; + + __ andi(dst, src, val); + %} + ins_pipe( ialu_regI_regI ); +%} + +/* +instruct andnL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ + match(Set dst (AndL src1 (XorL src2 M1))); + predicate(UseLEXT3); + + format %{ "andn $dst, $src1, $src2 #@andnL_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsandn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +/* +instruct ornL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ + match(Set dst (OrL src1 (XorL src2 M1))); + predicate(UseLEXT3); + + format %{ "orn $dst, $src1, $src2 #@ornL_Reg_nReg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsorn(dst, src1, src2); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +/* +instruct andnL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ + match(Set dst (AndL (XorL src1 M1) src2)); + predicate(UseLEXT3); + + format %{ "andn $dst, $src2, $src1 #@andnL_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsandn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +/* +instruct ornL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ + match(Set dst (OrL (XorL src1 M1) src2)); + predicate(UseLEXT3); + + format %{ "orn $dst, $src2, $src1 #@ornL_nReg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + __ gsorn(dst, src2, src1); + %} + ins_pipe( ialu_regI_regI ); +%} +*/ + +instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ + match(Set dst (AndL dst M8)); + ins_cost(60); + + format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ dins(dst, R0, 0, 3); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ + match(Set dst (AndL dst M5)); + ins_cost(60); + + format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ dins(dst, R0, 2, 1); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ + match(Set dst (AndL dst M7)); + ins_cost(60); + + format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ dins(dst, R0, 1, 2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ + match(Set dst (AndL dst M4)); + ins_cost(60); + + format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ dins(dst, R0, 0, 2); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ + match(Set dst (AndL dst M121)); + ins_cost(60); + + format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} + ins_encode %{ + Register dst = $dst$$Register; + + __ dins(dst, R0, 3, 4); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Or Long Register with Register +instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (OrL src1 src2)); + format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + Register src1_reg = $src1$$Register; + Register src2_reg = $src2$$Register; + + __ orr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{ + match(Set dst (OrL (CastP2X src1) src2)); + format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} + ins_encode %{ + Register dst_reg = $dst$$Register; + Register src1_reg = $src1$$Register; + Register src2_reg = $src2$$Register; + + __ orr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Xor Long Register with Register +instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ + match(Set dst (XorL src1 src2)); + format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} + ins_encode %{ + Register dst_reg = as_Register($dst$$reg); + Register src1_reg = as_Register($src1$$reg); + Register src2_reg = as_Register($src2$$reg); + + __ xorr(dst_reg, src1_reg, src2_reg); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Left by 8-bit immediate +instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ + match(Set dst (LShiftI src shift)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ sll(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{ + match(Set dst (LShiftI (ConvL2I src) shift)); + + format %{ "SHL $dst, $src, $shift #@salL2I_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ sll(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ + match(Set dst (AndI (LShiftI src shift) mask)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ sll(dst, src, 16); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) +%{ + match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); + + format %{ "andi $dst, $src, 7\t# @land7_2_s" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ andi(dst, src, 7); + %} + ins_pipe(ialu_regI_regI); +%} + +// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. +// This idiom is used by the compiler the i2s bytecode. +instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) +%{ + match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); + + format %{ "i2s $dst, $src\t# @i2s" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ seh(dst, src); + %} + ins_pipe(ialu_regI_regI); +%} + +// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. +// This idiom is used by the compiler for the i2b bytecode. +instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) +%{ + match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); + + format %{ "i2b $dst, $src\t# @i2b" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ seb(dst, src); + %} + ins_pipe(ialu_regI_regI); +%} + + +instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{ + match(Set dst (LShiftI (ConvL2I src) shift)); + + format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shamt = $shift$$constant; + + __ sll(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Shift Left by 8-bit immediate +instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (LShiftI src shift)); + + format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shamt = $shift$$Register; + __ sllv(dst, src, shamt); + %} + ins_pipe( ialu_regI_regI ); +%} + + +// Shift Left Long +instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ + match(Set dst (LShiftL src shift)); + ins_cost(100); + format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + if (__ is_simm(shamt, 5)) + __ dsll(dst_reg, src_reg, shamt); + else { + int sa = Assembler::low(shamt, 6); + if (sa < 32) { + __ dsll(dst_reg, src_reg, sa); + } else { + __ dsll32(dst_reg, src_reg, sa - 32); + } + } + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{ + match(Set dst (LShiftL (ConvI2L src) shift)); + ins_cost(100); + format %{ "salL $dst, $src, $shift @ salL_RegI2L_imm" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + if (__ is_simm(shamt, 5)) + __ dsll(dst_reg, src_reg, shamt); + else { + int sa = Assembler::low(shamt, 6); + if (sa < 32) { + __ dsll(dst_reg, src_reg, sa); + } else { + __ dsll32(dst_reg, src_reg, sa - 32); + } + } + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Left Long +instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ + match(Set dst (LShiftL src shift)); + ins_cost(100); + format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ dsllv(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long +instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ + match(Set dst (RShiftL src shift)); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = ($shift$$constant & 0x3f); + if (__ is_simm(shamt, 5)) + __ dsra(dst_reg, src_reg, shamt); + else { + int sa = Assembler::low(shamt, 6); + if (sa < 32) { + __ dsra(dst_reg, src_reg, sa); + } else { + __ dsra32(dst_reg, src_reg, sa - 32); + } + } + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{ + match(Set dst (ConvL2I (RShiftL src shift))); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsra32(dst_reg, src_reg, shamt - 32); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long arithmetically +instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ + match(Set dst (RShiftL src shift)); + ins_cost(100); + format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ dsrav(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Shift Right Long logically +instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(100); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + + __ dsrlv(dst_reg, src_reg, $shift$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsrl(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{ + match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); + ins_cost(80); + format %{ "dext $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dext(dst_reg, src_reg, shamt, 31); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ + match(Set dst (URShiftL (CastP2X src) shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsrl(dst_reg, src_reg, shamt); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{ + match(Set dst (URShiftL src shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsrl32(dst_reg, src_reg, shamt - 32); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{ + match(Set dst (ConvL2I (URShiftL src shift))); + predicate(n->in(1)->in(2)->get_int() > 32); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsrl32(dst_reg, src_reg, shamt - 32); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ + match(Set dst (URShiftL (CastP2X src) shift)); + ins_cost(80); + format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} + ins_encode %{ + Register src_reg = as_Register($src$$reg); + Register dst_reg = as_Register($dst$$reg); + int shamt = $shift$$constant; + + __ dsrl32(dst_reg, src_reg, shamt - 32); + %} + ins_pipe( ialu_regL_regL ); +%} + +// Xor Instructions +// Xor Register with Register +instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (XorI src1 src2)); + + format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ xorr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +// Or Instructions +instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} + ins_encode %{ + __ ori($dst$$Register, $src1$$Register, $src2$$constant); + %} + + ins_pipe( ialu_regI_regI ); +%} +// Or Register with Register +instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (OrI src1 src2)); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ orr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ + match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); + predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); + + format %{ "rotr $dst, $src, 1 ...\n\t" + "srl $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int rshift = $rshift$$constant; + + __ rotr(dst, src, 1); + if (rshift - 1) { + __ srl(dst, dst, rshift - 1); + } + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ + match(Set dst (OrI src1 (CastP2X src2))); + + format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + __ orr(dst, src1, src2); + %} + + ins_pipe( ialu_regI_regI ); +%} + +// Logical Shift Right by 8-bit immediate +instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ + match(Set dst (URShiftI src shift)); + //effect(KILL cr); + + format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shift = $shift$$constant; + + __ srl(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ + match(Set dst (AndI (URShiftI src shift) mask)); + + format %{ "ext $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int pos = $shift$$constant; + int size = Assembler::is_int_mask($mask$$constant); + + __ ext(dst, src, pos, size); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift))); + + ins_cost(100); + format %{ "rotr $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + int sa = $rshift$$constant; + + __ rotr(dst, dst, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); + + ins_cost(100); + format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ drotr(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); + + ins_cost(100); + format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ drotr32(dst, src, sa - 32); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); + match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); + + ins_cost(100); + format %{ "rotr $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ rotr(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); + + ins_cost(100); + format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ drotr(dst, src, sa); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift) +%{ + predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); + match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); + + ins_cost(100); + format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + int sa = $rshift$$constant; + + __ drotr32(dst, src, sa - 32); + %} + ins_pipe( ialu_regI_regI ); +%} + +// Logical Shift Right +instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (URShiftI src shift)); + + format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shift = $shift$$Register; + __ srlv(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + + +instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ + match(Set dst (RShiftI src shift)); + // effect(KILL cr); + + format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_imm" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + int shift = $shift$$constant; + __ sra(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ + match(Set dst (RShiftI src shift)); + // effect(KILL cr); + + format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_Reg" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + Register shift = $shift$$Register; + __ srav(dst, src, shift); + %} + ins_pipe( ialu_regI_regI ); +%} + +//----------Convert Int to Boolean--------------------------------------------- + +instruct convI2B(mRegI dst, mRegI src) %{ + match(Set dst (Conv2B src)); + + ins_cost(100); + format %{ "convI2B $dst, $src @ convI2B" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if (dst != src) { + __ daddiu(dst, R0, 1); + __ movz(dst, R0, src); + } else { + __ move(AT, src); + __ daddiu(dst, R0, 1); + __ movz(dst, R0, AT); + } + %} + + ins_pipe( ialu_regL_regL ); +%} + +instruct convI2L_reg( mRegL dst, mRegI src) %{ + match(Set dst (ConvI2L src)); + + ins_cost(100); + format %{ "SLL $dst, $src @ convI2L_reg\t" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if(dst != src) __ sll(dst, src, 0); + %} + ins_pipe( ialu_regL_regL ); +%} + + +instruct convL2I_reg( mRegI dst, mRegL src ) %{ + match(Set dst (ConvL2I src)); + + format %{ "MOV $dst, $src @ convL2I_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + __ sll(dst, src, 0); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct convL2I2L_reg( mRegL dst, mRegL src ) %{ + match(Set dst (ConvI2L (ConvL2I src))); + + format %{ "sll $dst, $src, 0 @ convL2I2L_reg" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + __ sll(dst, src, 0); + %} + + ins_pipe( ialu_regI_regI ); +%} + +instruct convL2D_reg( regD dst, mRegL src ) %{ + match(Set dst (ConvL2D src)); + format %{ "convL2D $dst, $src @ convL2D_reg" %} + ins_encode %{ + Register src = as_Register($src$$reg); + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ dmtc1(src, dst); + __ cvt_d_l(dst, dst); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convD2L_reg_fast( mRegL dst, regD src ) %{ + match(Set dst (ConvD2L src)); + ins_cost(150); + format %{ "convD2L $dst, $src @ convD2L_reg_fast" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + Label Done; + + __ trunc_l_d(F30, src); + // max_long: 0x7fffffffffffffff + // __ set64(AT, 0x7fffffffffffffff); + __ daddiu(AT, R0, -1); + __ dsrl(AT, AT, 1); + __ dmfc1(dst, F30); + + __ bne(dst, AT, Done); + __ delayed()->mtc1(R0, F30); + + __ cvt_d_w(F30, F30); + __ c_ult_d(src, F30); + __ bc1f(Done); + __ delayed()->daddiu(T9, R0, -1); + + __ c_un_d(src, src); //NaN? + __ subu(dst, T9, AT); + __ movt(dst, R0); + + __ bind(Done); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convD2L_reg_slow( mRegL dst, regD src ) %{ + match(Set dst (ConvD2L src)); + ins_cost(250); + format %{ "convD2L $dst, $src @ convD2L_reg_slow" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister src = as_FloatRegister($src$$reg); + + Label L; + + __ c_un_d(src, src); //NaN? + __ bc1t(L); + __ delayed(); + __ move(dst, R0); + + __ trunc_l_d(F30, src); + __ cfc1(AT, 31); + __ li(T9, 0x10000); + __ andr(AT, AT, T9); + __ beq(AT, R0, L); + __ delayed()->dmfc1(dst, F30); + + __ mov_d(F12, src); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1); + __ move(dst, V0); + __ bind(L); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convF2I_reg_fast( mRegI dst, regF src ) %{ + match(Set dst (ConvF2I src)); + ins_cost(150); + format %{ "convf2i $dst, $src @ convF2I_reg_fast" %} + ins_encode %{ + Register dreg = $dst$$Register; + FloatRegister fval = $src$$FloatRegister; + Label L; + + __ trunc_w_s(F30, fval); + __ move(AT, 0x7fffffff); + __ mfc1(dreg, F30); + __ c_un_s(fval, fval); //NaN? + __ movt(dreg, R0); + + __ bne(AT, dreg, L); + __ delayed()->lui(T9, 0x8000); + + __ mfc1(AT, fval); + __ andr(AT, AT, T9); + + __ movn(dreg, T9, AT); + + __ bind(L); + + %} + + ins_pipe( pipe_slow ); +%} + + + +instruct convF2I_reg_slow( mRegI dst, regF src ) %{ + match(Set dst (ConvF2I src)); + ins_cost(250); + format %{ "convf2i $dst, $src @ convF2I_reg_slow" %} + ins_encode %{ + Register dreg = $dst$$Register; + FloatRegister fval = $src$$FloatRegister; + Label L; + + __ c_un_s(fval, fval); //NaN? + __ bc1t(L); + __ delayed(); + __ move(dreg, R0); + + __ trunc_w_s(F30, fval); + + /* Call SharedRuntime:f2i() to do valid convention */ + __ cfc1(AT, 31); + __ li(T9, 0x10000); + __ andr(AT, AT, T9); + __ beq(AT, R0, L); + __ delayed()->mfc1(dreg, F30); + + __ mov_s(F12, fval); + + //This bug was found when running ezDS's control-panel. + // J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74 + // + // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE. + // V0 is corrupted during call_VM_leaf(), and should be preserved. + // + __ push(fval); + if(dreg != V0) { + __ push(V0); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); + if(dreg != V0) { + __ move(dreg, V0); + __ pop(V0); + } + __ pop(fval); + __ bind(L); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convF2L_reg_fast( mRegL dst, regF src ) %{ + match(Set dst (ConvF2L src)); + ins_cost(150); + format %{ "convf2l $dst, $src @ convF2L_reg_fast" %} + ins_encode %{ + Register dreg = $dst$$Register; + FloatRegister fval = $src$$FloatRegister; + Label L; + + __ trunc_l_s(F30, fval); + __ daddiu(AT, R0, -1); + __ dsrl(AT, AT, 1); + __ dmfc1(dreg, F30); + __ c_un_s(fval, fval); //NaN? + __ movt(dreg, R0); + + __ bne(AT, dreg, L); + __ delayed()->lui(T9, 0x8000); + + __ mfc1(AT, fval); + __ andr(AT, AT, T9); + + __ dsll32(T9, T9, 0); + __ movn(dreg, T9, AT); + + __ bind(L); + %} + + ins_pipe( pipe_slow ); +%} + + +instruct convF2L_reg_slow( mRegL dst, regF src ) %{ + match(Set dst (ConvF2L src)); + ins_cost(250); + format %{ "convf2l $dst, $src @ convF2L_reg_slow" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + FloatRegister fval = $src$$FloatRegister; + Label L; + + __ c_un_s(fval, fval); //NaN? + __ bc1t(L); + __ delayed(); + __ move(dst, R0); + + __ trunc_l_s(F30, fval); + __ cfc1(AT, 31); + __ li(T9, 0x10000); + __ andr(AT, AT, T9); + __ beq(AT, R0, L); + __ delayed()->dmfc1(dst, F30); + + __ mov_s(F12, fval); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); + __ move(dst, V0); + __ bind(L); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convL2F_reg( regF dst, mRegL src ) %{ + match(Set dst (ConvL2F src)); + format %{ "convl2f $dst, $src @ convL2F_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + Register src = as_Register($src$$reg); + Label L; + + __ dmtc1(src, dst); + __ cvt_s_l(dst, dst); + %} + + ins_pipe( pipe_slow ); +%} + +instruct convI2F_reg( regF dst, mRegI src ) %{ + match(Set dst (ConvI2F src)); + format %{ "convi2f $dst, $src @ convI2F_reg" %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + + __ mtc1(src, dst); + __ cvt_s_w(dst, dst); + %} + + ins_pipe( fpu_regF_regF ); +%} + +instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ + match(Set dst (CmpLTMask p zero)); + ins_cost(100); + + format %{ "sra $dst, $p, 31 @ cmpLTMask_immI_0" %} + ins_encode %{ + Register src = $p$$Register; + Register dst = $dst$$Register; + + __ sra(dst, src, 31); + %} + ins_pipe( pipe_slow ); +%} + + +instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ + match(Set dst (CmpLTMask p q)); + ins_cost(400); + + format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} + ins_encode %{ + Register p = $p$$Register; + Register q = $q$$Register; + Register dst = $dst$$Register; + + __ slt(dst, p, q); + __ subu(dst, R0, dst); + %} + ins_pipe( pipe_slow ); +%} + +instruct convP2B(mRegI dst, mRegP src) %{ + match(Set dst (Conv2B src)); + + ins_cost(100); + format %{ "convP2B $dst, $src @ convP2B" %} + ins_encode %{ + Register dst = as_Register($dst$$reg); + Register src = as_Register($src$$reg); + + if (dst != src) { + __ daddiu(dst, R0, 1); + __ movz(dst, R0, src); + } else { + __ move(AT, src); + __ daddiu(dst, R0, 1); + __ movz(dst, R0, AT); + } + %} + + ins_pipe( ialu_regL_regL ); +%} + + +instruct convI2D_reg_reg(regD dst, mRegI src) %{ + match(Set dst (ConvI2D src)); + format %{ "conI2D $dst, $src @convI2D_reg" %} + ins_encode %{ + Register src = $src$$Register; + FloatRegister dst = $dst$$FloatRegister; + __ mtc1(src, dst); + __ cvt_d_w(dst, dst); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct convF2D_reg_reg(regD dst, regF src) %{ + match(Set dst (ConvF2D src)); + format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + + __ cvt_d_s(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct convD2F_reg_reg(regF dst, regD src) %{ + match(Set dst (ConvD2F src)); + format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + + __ cvt_s_d(dst, src); + %} + ins_pipe( fpu_regF_regF ); +%} + + +// Convert a double to an int. If the double is a NAN, stuff a zero in instead. +instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{ + match(Set dst (ConvD2I src)); + + ins_cost(150); + format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %} + + ins_encode %{ + FloatRegister src = $src$$FloatRegister; + Register dst = $dst$$Register; + + Label Done; + + __ trunc_w_d(F30, src); + // max_int: 2147483647 + __ move(AT, 0x7fffffff); + __ mfc1(dst, F30); + + __ bne(dst, AT, Done); + __ delayed()->mtc1(R0, F30); + + __ cvt_d_w(F30, F30); + __ c_ult_d(src, F30); + __ bc1f(Done); + __ delayed()->addiu(T9, R0, -1); + + __ c_un_d(src, src); //NaN? + __ subu32(dst, T9, AT); + __ movt(dst, R0); + + __ bind(Done); + %} + ins_pipe( pipe_slow ); +%} + + +instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{ + match(Set dst (ConvD2I src)); + + ins_cost(250); + format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %} + + ins_encode %{ + FloatRegister src = $src$$FloatRegister; + Register dst = $dst$$Register; + Label L; + + __ trunc_w_d(F30, src); + __ cfc1(AT, 31); + __ li(T9, 0x10000); + __ andr(AT, AT, T9); + __ beq(AT, R0, L); + __ delayed()->mfc1(dst, F30); + + __ mov_d(F12, src); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1); + __ move(dst, V0); + __ bind(L); + + %} + ins_pipe( pipe_slow ); +%} + +// Convert oop pointer into compressed form +instruct encodeHeapOop(mRegN dst, mRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop $dst,$src" %} + ins_encode %{ + Register src = $src$$Register; + Register dst = $dst$$Register; + + __ encode_heap_oop(dst, src); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ + predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); + match(Set dst (EncodeP src)); + format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} + ins_encode %{ + __ encode_heap_oop_not_null($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeHeapOop(mRegP dst, mRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && + n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + + __ decode_heap_oop(d, s); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ + predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || + n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); + match(Set dst (DecodeN src)); + format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + if (s != d) { + __ decode_heap_oop_not_null(d, s); + } else { + __ decode_heap_oop_not_null(d); + } + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ + match(Set dst (EncodePKlass src)); + format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} + ins_encode %{ + __ encode_klass_not_null($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ + match(Set dst (DecodeNKlass src)); + format %{ "decode_heap_klass_not_null $dst,$src" %} + ins_encode %{ + Register s = $src$$Register; + Register d = $dst$$Register; + if (s != d) { + __ decode_klass_not_null(d, s); + } else { + __ decode_klass_not_null(d); + } + %} + ins_pipe( ialu_regL_regL ); +%} + +//FIXME +instruct tlsLoadP(mRegP dst) %{ + match(Set dst (ThreadLocal)); + + ins_cost(0); + format %{ " get_thread in $dst #@tlsLoadP" %} + ins_encode %{ + Register dst = $dst$$Register; +#ifdef OPT_THREAD + __ move(dst, TREG); +#else + __ get_thread(dst); +#endif + %} + + ins_pipe( ialu_loadI ); +%} + + +instruct checkCastPP( mRegP dst ) %{ + match(Set dst (CheckCastPP dst)); + + format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} + ins_encode( /*empty encoding*/ ); + ins_pipe( empty ); +%} + +instruct castPP(mRegP dst) +%{ + match(Set dst (CastPP dst)); + + size(0); + format %{ "# castPP of $dst" %} + ins_encode(/* empty encoding */); + ins_pipe(empty); +%} + +instruct castII( mRegI dst ) %{ + match(Set dst (CastII dst)); + format %{ "#castII of $dst empty encoding" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe( empty ); +%} + +// Return Instruction +// Remove the return address & jump to it. +instruct Ret() %{ + match(Return); + format %{ "RET #@Ret" %} + + ins_encode %{ + __ jr(RA); + __ delayed()->nop(); + %} + + ins_pipe( pipe_jump ); +%} + +/* +// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported. +instruct jumpXtnd(mRegL switch_val) %{ + match(Jump switch_val); + + ins_cost(350); + + format %{ "load T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t" + "jr T9\n\t" + "nop" %} + ins_encode %{ + Register table_base = $constanttablebase; + int con_offset = $constantoffset; + Register switch_reg = $switch_val$$Register; + + if (UseLEXT1) { + if (Assembler::is_simm(con_offset, 8)) { + __ gsldx(T9, table_base, switch_reg, con_offset); + } else if (Assembler::is_simm16(con_offset)) { + __ daddu(T9, table_base, switch_reg); + __ ld(T9, T9, con_offset); + } else { + __ move(T9, con_offset); + __ daddu(AT, table_base, switch_reg); + __ gsldx(T9, AT, T9, 0); + } + } else { + if (Assembler::is_simm16(con_offset)) { + __ daddu(T9, table_base, switch_reg); + __ ld(T9, T9, con_offset); + } else { + __ move(T9, con_offset); + __ daddu(AT, table_base, switch_reg); + __ daddu(AT, T9, AT); + __ ld(T9, AT, 0); + } + } + + __ jr(T9); + __ delayed()->nop(); + + %} + ins_pipe(pipe_jump); +%} +*/ + + +// Tail Jump; remove the return address; jump to target. +// TailCall above leaves the return address around. +// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). +// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a +// "restore" before this instruction (in Epilogue), we need to materialize it +// in %i0. +//FIXME +instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{ + match( TailJump jump_target ex_oop ); + ins_cost(200); + format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} + ins_encode %{ + Register target = $jump_target$$Register; + + // V0, V1 are indicated in: + // [stubGenerator_mips.cpp] generate_forward_exception() + // [runtime_mips.cpp] OptoRuntime::generate_exception_blob() + // + Register oop = $ex_oop$$Register; + Register exception_oop = V0; + Register exception_pc = V1; + + __ move(exception_pc, RA); + __ move(exception_oop, oop); + + __ jr(target); + __ delayed()->nop(); + %} + ins_pipe( pipe_jump ); +%} + +// ============================================================================ +// Procedure Call/Return Instructions +// Call Java Static Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallStaticJavaDirect(method meth) %{ + match(CallStaticJava); + effect(USE meth); + + ins_cost(300); + format %{ "CALL,static #@CallStaticJavaDirect " %} + ins_encode( Java_Static_Call( meth ) ); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(16); +%} + +// Call Java Dynamic Instruction +// Note: If this code changes, the corresponding ret_addr_offset() and +// compute_padding() functions will have to be adjusted. +instruct CallDynamicJavaDirect(method meth) %{ + match(CallDynamicJava); + effect(USE meth); + + ins_cost(300); + format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" + "CallDynamic @ CallDynamicJavaDirect" %} + ins_encode( Java_Dynamic_Call( meth ) ); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(16); +%} + +instruct CallLeafNoFPDirect(method meth) %{ + match(CallLeafNoFP); + effect(USE meth); + + ins_cost(300); + format %{ "CALL_LEAF_NOFP,runtime " %} + ins_encode(Java_To_Runtime(meth)); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(16); +%} + +// Prefetch instructions. + +instruct prefetchrNTA( memory mem ) %{ + match(PrefetchRead mem); + ins_cost(125); + + format %{ "pref $mem\t# Prefetch into non-temporal cache for read @ prefetchrNTA" %} + ins_encode %{ + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + } else { + __ move(AT, as_Register(base)); + } + if( Assembler::is_simm16(disp) ) { + __ daddiu(AT, AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + } + __ pref(0, AT, 0); //hint: 0:load + %} + ins_pipe(pipe_slow); +%} + +instruct prefetchwNTA( memory mem ) %{ + match(PrefetchWrite mem); + ins_cost(125); + format %{ "pref $mem\t# Prefetch to non-temporal cache for write @ prefetchwNTA" %} + ins_encode %{ + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, as_Register(base), AT); + } + } else { + __ move(AT, as_Register(base)); + } + if( Assembler::is_simm16(disp) ) { + __ daddiu(AT, AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + } + __ pref(1, AT, 0); //hint: 1:store + %} + ins_pipe(pipe_slow); +%} + +// Prefetch instructions for allocation. + +instruct prefetchAllocNTA( memory mem ) %{ + match(PrefetchAllocation mem); + ins_cost(125); + format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %} + ins_encode %{ + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + Register dst = R0; + + if ( index != 0 ) { + if ( Assembler::is_simm16(disp) ) { + if (UseLEXT1) { + if (scale == 0) { + __ gslbx(dst, as_Register(base), as_Register(index), disp); + } else { + __ dsll(AT, as_Register(index), scale); + __ gslbx(dst, as_Register(base), AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ lb(dst, AT, disp); + } + } else { + if (scale == 0) { + __ addu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(AT, as_Register(index), scale); + __ addu(AT, as_Register(base), AT); + } + __ move(T9, disp); + if (UseLEXT1) { + __ gslbx(dst, AT, T9, 0); + } else { + __ addu(AT, AT, T9); + __ lb(dst, AT, 0); + } + } + } else { + if ( Assembler::is_simm16(disp) ) { + __ lb(dst, as_Register(base), disp); + } else { + __ move(T9, disp); + if (UseLEXT1) { + __ gslbx(dst, as_Register(base), T9, 0); + } else { + __ addu(AT, as_Register(base), T9); + __ lb(dst, AT, 0); + } + } + } + %} + ins_pipe(pipe_slow); +%} + + +// Call runtime without safepoint +instruct CallLeafDirect(method meth) %{ + match(CallLeaf); + effect(USE meth); + + ins_cost(300); + format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} + ins_encode(Java_To_Runtime(meth)); + ins_pipe( pipe_slow ); + ins_pc_relative(1); + ins_alignment(16); +%} + +// Load Char (16bit unsigned) +instruct loadUS(mRegI dst, memory mem) %{ + match(Set dst (LoadUS mem)); + + ins_cost(125); + format %{ "loadUS $dst,$mem @ loadC" %} + ins_encode(load_C_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +instruct loadUS_convI2L(mRegL dst, memory mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); + + ins_cost(125); + format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} + ins_encode(load_C_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +// Store Char (16bit unsigned) +instruct storeC(memory mem, mRegI src) %{ + match(Set mem (StoreC mem src)); + + ins_cost(125); + format %{ "storeC $src, $mem @ storeC" %} + ins_encode(store_C_reg_enc(mem, src)); + ins_pipe( ialu_loadI ); +%} + +instruct storeC_0(memory mem, immI_0 zero) %{ + match(Set mem (StoreC mem zero)); + + ins_cost(125); + format %{ "storeC $zero, $mem @ storeC_0" %} + ins_encode(store_C0_enc(mem)); + ins_pipe( ialu_loadI ); +%} + + +instruct loadConF_immF_0(regF dst, immF_0 zero) %{ + match(Set dst zero); + ins_cost(100); + + format %{ "mov $dst, zero @ loadConF_immF_0\n"%} + ins_encode %{ + FloatRegister dst = $dst$$FloatRegister; + + __ mtc1(R0, dst); + %} + ins_pipe( fpu_loadF ); +%} + + +instruct loadConF(regF dst, immF src) %{ + match(Set dst src); + ins_cost(125); + + format %{ "lwc1 $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} + ins_encode %{ + int con_offset = $constantoffset($src); + + if (Assembler::is_simm16(con_offset)) { + __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset); + } else { + __ set64(AT, con_offset); + if (UseLEXT1) { + __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0); + } else { + __ daddu(AT, $constanttablebase, AT); + __ lwc1($dst$$FloatRegister, AT, 0); + } + } + %} + ins_pipe( fpu_loadF ); +%} + + +instruct loadConD_immD_0(regD dst, immD_0 zero) %{ + match(Set dst zero); + ins_cost(100); + + format %{ "mov $dst, zero @ loadConD_immD_0"%} + ins_encode %{ + FloatRegister dst = as_FloatRegister($dst$$reg); + + __ dmtc1(R0, dst); + %} + ins_pipe( fpu_loadF ); +%} + +instruct loadConD(regD dst, immD src) %{ + match(Set dst src); + ins_cost(125); + + format %{ "ldc1 $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} + ins_encode %{ + int con_offset = $constantoffset($src); + + if (Assembler::is_simm16(con_offset)) { + __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset); + } else { + __ set64(AT, con_offset); + if (UseLEXT1) { + __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0); + } else { + __ daddu(AT, $constanttablebase, AT); + __ ldc1($dst$$FloatRegister, AT, 0); + } + } + %} + ins_pipe( fpu_loadF ); +%} + +// Store register Float value (it is faster than store from FPU register) +instruct storeF_reg( memory mem, regF src) %{ + match(Set mem (StoreF mem src)); + + ins_cost(50); + format %{ "store $mem, $src\t# store float @ storeF_reg" %} + ins_encode(store_F_reg_enc(mem, src)); + ins_pipe( fpu_storeF ); +%} + +instruct storeF_immF_0( memory mem, immF_0 zero) %{ + match(Set mem (StoreF mem zero)); + + ins_cost(40); + format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} + ins_encode %{ + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + if( index != 0 ) { + if (UseLEXT1) { + if ( Assembler::is_simm(disp, 8) ) { + if ( scale == 0 ) { + __ gsswx(R0, as_Register(base), as_Register(index), disp); + } else { + __ dsll(T9, as_Register(index), scale); + __ gsswx(R0, as_Register(base), T9, disp); + } + } else if ( Assembler::is_simm16(disp) ) { + if ( scale == 0 ) { + __ daddu(AT, as_Register(base), as_Register(index)); + } else { + __ dsll(T9, as_Register(index), scale); + __ daddu(AT, as_Register(base), T9); + } + __ sw(R0, AT, disp); + } else { + if ( scale == 0 ) { + __ move(T9, disp); + __ daddu(AT, as_Register(index), T9); + __ gsswx(R0, as_Register(base), AT, 0); + } else { + __ dsll(T9, as_Register(index), scale); + __ move(AT, disp); + __ daddu(AT, AT, T9); + __ gsswx(R0, as_Register(base), AT, 0); + } + } + } else { //not use loongson isa + if(scale != 0) { + __ dsll(T9, as_Register(index), scale); + __ daddu(AT, as_Register(base), T9); + } else { + __ daddu(AT, as_Register(base), as_Register(index)); + } + if( Assembler::is_simm16(disp) ) { + __ sw(R0, AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ sw(R0, AT, 0); + } + } + } else { //index is 0 + if (UseLEXT1) { + if ( Assembler::is_simm16(disp) ) { + __ sw(R0, as_Register(base), disp); + } else { + __ move(T9, disp); + __ gsswx(R0, as_Register(base), T9, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sw(R0, as_Register(base), disp); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ sw(R0, AT, 0); + } + } + } + %} + ins_pipe( ialu_storeI ); +%} + +// Load Double +instruct loadD(regD dst, memory mem) %{ + match(Set dst (LoadD mem)); + + ins_cost(150); + format %{ "loadD $dst, $mem #@loadD" %} + ins_encode(load_D_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +// Load Double - UNaligned +instruct loadD_unaligned(regD dst, memory mem ) %{ + match(Set dst (LoadD_unaligned mem)); + ins_cost(250); + // FIXME: Need more effective ldl/ldr + format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} + ins_encode(load_D_enc(dst, mem)); + ins_pipe( ialu_loadI ); +%} + +instruct storeD_reg( memory mem, regD src) %{ + match(Set mem (StoreD mem src)); + + ins_cost(50); + format %{ "store $mem, $src\t# store float @ storeD_reg" %} + ins_encode(store_D_reg_enc(mem, src)); + ins_pipe( fpu_storeF ); +%} + +instruct storeD_immD_0( memory mem, immD_0 zero) %{ + match(Set mem (StoreD mem zero)); + + ins_cost(40); + format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} + ins_encode %{ + int base = $mem$$base; + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + __ mtc1(R0, F30); + __ cvt_d_w(F30, F30); + + if( index != 0 ) { + if (UseLEXT1) { + if ( Assembler::is_simm(disp, 8) ) { + if (scale == 0) { + __ gssdxc1(F30, as_Register(base), as_Register(index), disp); + } else { + __ dsll(T9, as_Register(index), scale); + __ gssdxc1(F30, as_Register(base), T9, disp); + } + } else if ( Assembler::is_simm16(disp) ) { + if (scale == 0) { + __ daddu(AT, as_Register(base), as_Register(index)); + __ sdc1(F30, AT, disp); + } else { + __ dsll(T9, as_Register(index), scale); + __ daddu(AT, as_Register(base), T9); + __ sdc1(F30, AT, disp); + } + } else { + if (scale == 0) { + __ move(T9, disp); + __ daddu(AT, as_Register(index), T9); + __ gssdxc1(F30, as_Register(base), AT, 0); + } else { + __ move(T9, disp); + __ dsll(AT, as_Register(index), scale); + __ daddu(AT, AT, T9); + __ gssdxc1(F30, as_Register(base), AT, 0); + } + } + } else { // not use loongson isa + if(scale != 0) { + __ dsll(T9, as_Register(index), scale); + __ daddu(AT, as_Register(base), T9); + } else { + __ daddu(AT, as_Register(base), as_Register(index)); + } + if( Assembler::is_simm16(disp) ) { + __ sdc1(F30, AT, disp); + } else { + __ move(T9, disp); + __ daddu(AT, AT, T9); + __ sdc1(F30, AT, 0); + } + } + } else {// index is 0 + if (UseLEXT1) { + if ( Assembler::is_simm16(disp) ) { + __ sdc1(F30, as_Register(base), disp); + } else { + __ move(T9, disp); + __ gssdxc1(F30, as_Register(base), T9, 0); + } + } else { + if( Assembler::is_simm16(disp) ) { + __ sdc1(F30, as_Register(base), disp); + } else { + __ move(T9, disp); + __ daddu(AT, as_Register(base), T9); + __ sdc1(F30, AT, 0); + } + } + } + %} + ins_pipe( ialu_storeI ); +%} + +instruct loadSSI(mRegI dst, stackSlotI src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "lw $dst, $src\t# int stk @ loadSSI" %} + ins_encode %{ + guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !"); + __ lw($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSI(stackSlotI dst, mRegI src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sw $dst, $src\t# int stk @ storeSSI" %} + ins_encode %{ + guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !"); + __ sw($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSL(mRegL dst, stackSlotL src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ld $dst, $src\t# long stk @ loadSSL" %} + ins_encode %{ + guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !"); + __ ld($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSL(stackSlotL dst, mRegL src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sd $dst, $src\t# long stk @ storeSSL" %} + ins_encode %{ + guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !"); + __ sd($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSP(mRegP dst, stackSlotP src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ld $dst, $src\t# ptr stk @ loadSSP" %} + ins_encode %{ + guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !"); + __ ld($dst$$Register, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSP(stackSlotP dst, mRegP src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} + ins_encode %{ + guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !"); + __ sd($src$$Register, SP, $dst$$disp); + %} + ins_pipe(ialu_storeI); +%} + +instruct loadSSF(regF dst, stackSlotF src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "lwc1 $dst, $src\t# float stk @ loadSSF" %} + ins_encode %{ + guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !"); + __ lwc1($dst$$FloatRegister, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSF(stackSlotF dst, regF src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "swc1 $dst, $src\t# float stk @ storeSSF" %} + ins_encode %{ + guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !"); + __ swc1($src$$FloatRegister, SP, $dst$$disp); + %} + ins_pipe(fpu_storeF); +%} + +// Use the same format since predicate() can not be used here. +instruct loadSSD(regD dst, stackSlotD src) +%{ + match(Set dst src); + + ins_cost(125); + format %{ "ldc1 $dst, $src\t# double stk @ loadSSD" %} + ins_encode %{ + guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !"); + __ ldc1($dst$$FloatRegister, SP, $src$$disp); + %} + ins_pipe(ialu_loadI); +%} + +instruct storeSSD(stackSlotD dst, regD src) +%{ + match(Set dst src); + + ins_cost(100); + format %{ "sdc1 $dst, $src\t# double stk @ storeSSD" %} + ins_encode %{ + guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !"); + __ sdc1($src$$FloatRegister, SP, $dst$$disp); + %} + ins_pipe(fpu_storeF); +%} + +instruct cmpFastLock( FlagsReg cr, mRegP object, s0_RegP box, mRegI tmp, mRegP scr) %{ + match( Set cr (FastLock object box) ); + effect( TEMP tmp, TEMP scr, USE_KILL box ); + ins_cost(300); + format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register); + __ move($cr$$Register, AT); + %} + + ins_pipe( pipe_slow ); + ins_pc_relative(1); +%} + +instruct cmpFastUnlock( FlagsReg cr, mRegP object, s0_RegP box, mRegP tmp ) %{ + match( Set cr (FastUnlock object box) ); + effect( TEMP tmp, USE_KILL box ); + ins_cost(300); + format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); + __ move($cr$$Register, AT); + %} + + ins_pipe( pipe_slow ); + ins_pc_relative(1); +%} + +// Store CMS card-mark Immediate +instruct storeImmCM(memory mem, immI8 src) %{ + match(Set mem (StoreCM mem src)); + + ins_cost(150); + format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} +// opcode(0xC6); + ins_encode(store_B_immI_enc_sync(mem, src)); + ins_pipe( ialu_storeI ); +%} + +// Die now +instruct ShouldNotReachHere( ) +%{ + match(Halt); + ins_cost(300); + + // Use the following format syntax + format %{ "ILLTRAP ;#@ShouldNotReachHere" %} + ins_encode %{ + // Here we should emit illtrap ! + + __ stop("in ShoudNotReachHere"); + + %} + ins_pipe( pipe_jump ); +%} + +instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem) +%{ + predicate(Universe::narrow_oop_shift() == 0); + match(Set dst mem); + + ins_cost(110); + format %{ "leaq $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %} + ins_encode %{ + Register dst = $dst$$Register; + Register base = as_Register($mem$$base); + int disp = $mem$$disp; + + __ daddiu(dst, base, disp); + %} + ins_pipe( ialu_regI_imm16 ); +%} + +instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "leaq $dst, $mem\t# @ PosIdxScaleOff8" %} + ins_encode %{ + Register dst = $dst$$Register; + Register base = as_Register($mem$$base); + Register index = as_Register($mem$$index); + int scale = $mem$$scale; + int disp = $mem$$disp; + + if (scale == 0) { + __ daddu(AT, base, index); + __ daddiu(dst, AT, disp); + } else { + __ dsll(AT, index, scale); + __ daddu(AT, base, AT); + __ daddiu(dst, AT, disp); + } + %} + + ins_pipe( ialu_regI_imm16 ); +%} + +instruct leaPIdxScale(mRegP dst, indIndexScale mem) +%{ + match(Set dst mem); + + ins_cost(110); + format %{ "leaq $dst, $mem\t# @ leaPIdxScale" %} + ins_encode %{ + Register dst = $dst$$Register; + Register base = as_Register($mem$$base); + Register index = as_Register($mem$$index); + int scale = $mem$$scale; + + if (scale == 0) { + __ daddu(dst, base, index); + } else { + __ dsll(AT, index, scale); + __ daddu(dst, base, AT); + } + %} + + ins_pipe( ialu_regI_imm16 ); +%} + + +// ============================================================================ +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass +// array for an instance of the superklass. Set a hidden internal cache on a +// hit (cache is checked with exposed code in gen_subtype_check()). Return +// NZ for a miss or zero for a hit. The encoding ALSO sets flags. +instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ + match(Set result (PartialSubtypeCheck sub super)); + effect(KILL tmp); + ins_cost(1100); // slightly larger than the next version + format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} + + ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); + ins_pipe( pipe_slow ); +%} + +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. + +instruct storePConditional( memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr ) %{ + match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); + + format %{ "CMPXCHG $heap_top_ptr, $newval\t# (ptr) @storePConditional " + "If $oldval == $heap_top_ptr then store $newval into $heap_top_ptr" %} + ins_encode%{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); + + int index = $heap_top_ptr$$index; + int scale = $heap_top_ptr$$scale; + int disp = $heap_top_ptr$$disp; + + guarantee(Assembler::is_simm16(disp), ""); + + if( index != 0 ) { + __ stop("in storePConditional: index != 0"); + } else { + __ cmpxchg(newval, addr, oldval); + __ move($cr$$Register, AT); + } + %} + ins_pipe( long_memory_op ); +%} + +// Conditional-store of an int value. +// AT flag is set on success, reset otherwise. +instruct storeIConditional( memory mem, mRegI oldval, mRegI newval, FlagsReg cr ) %{ + match(Set cr (StoreIConditional mem (Binary oldval newval))); +// effect(KILL oldval); + format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} + + ins_encode %{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Address addr(as_Register($mem$$base), $mem$$disp); + Label again, failure; + + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + guarantee(Assembler::is_simm16(disp), ""); + + if( index != 0 ) { + __ stop("in storeIConditional: index != 0"); + } else { + __ bind(again); + if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) __ sync(); + __ ll(AT, addr); + __ bne(AT, oldval, failure); + __ delayed()->addu(AT, R0, R0); + + __ addu(AT, newval, R0); + __ sc(AT, addr); + __ beq(AT, R0, again); + __ delayed()->addiu(AT, R0, 0xFF); + __ bind(failure); + __ sync(); + + __ move($cr$$Register, AT); + } +%} + + ins_pipe( long_memory_op ); +%} + +// Conditional-store of a long value. +// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. +instruct storeLConditional(memory mem, t2RegL oldval, mRegL newval, FlagsReg cr ) +%{ + match(Set cr (StoreLConditional mem (Binary oldval newval))); + effect(KILL oldval); + + format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} + ins_encode%{ + Register oldval = $oldval$$Register; + Register newval = $newval$$Register; + Address addr(as_Register($mem$$base), $mem$$disp); + + int index = $mem$$index; + int scale = $mem$$scale; + int disp = $mem$$disp; + + guarantee(Assembler::is_simm16(disp), ""); + + if( index != 0 ) { + __ stop("in storeIConditional: index != 0"); + } else { + __ cmpxchg(newval, addr, oldval); + __ move($cr$$Register, AT); + } + %} + ins_pipe( long_memory_op ); +%} + +// Implement LoadPLocked. Must be ordered against changes of the memory location +// by storePConditional. +instruct loadPLocked(mRegP dst, memory mem) %{ + match(Set dst (LoadPLocked mem)); + ins_cost(MEMORY_REF_COST); + + format %{ "ld $dst, $mem #@loadPLocked\n\t" + "sync" %} + size(12); + ins_encode (load_P_enc_ac(dst, mem)); + ins_pipe( ialu_loadI ); +%} + + +instruct compareAndSwapI( mRegI res, mRegP mem_ptr, mS2RegI oldval, mRegI newval) %{ + match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); + effect(KILL oldval); +// match(CompareAndSwapI mem_ptr (Binary oldval newval)); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL\n\t" + "MOV $res, 1 @ compareAndSwapI\n\t" + "BNE AT, R0 @ compareAndSwapI\n\t" + "MOV $res, 0 @ compareAndSwapI\n" + "L:" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + Label L; + + __ cmpxchg32(newval, addr, oldval); + __ move(res, AT); + %} + ins_pipe( long_memory_op ); +%} + +instruct compareAndSwapL( mRegI res, mRegP mem_ptr, s2RegL oldval, mRegL newval) %{ + predicate(VM_Version::supports_cx8()); + match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); + effect(KILL oldval); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI\n\t" + "MOV $res, 1 @ compareAndSwapI\n\t" + "BNE AT, R0 @ compareAndSwapI\n\t" + "MOV $res, 0 @ compareAndSwapI\n" + "L:" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + Label L; + + __ cmpxchg(newval, addr, oldval); + __ move(res, AT); + %} + ins_pipe( long_memory_op ); +%} + +//FIXME: +instruct compareAndSwapP( mRegI res, mRegP mem_ptr, s2_RegP oldval, mRegP newval) %{ + match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); + effect(KILL oldval); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP\n\t" + "MOV $res, AT @ compareAndSwapP\n\t" + "L:" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + Label L; + + __ cmpxchg(newval, addr, oldval); + __ move(res, AT); + %} + ins_pipe( long_memory_op ); +%} + +instruct compareAndSwapN( mRegI res, mRegP mem_ptr, t2_RegN oldval, mRegN newval) %{ + match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); + effect(KILL oldval); + format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN\n\t" + "MOV $res, AT @ compareAndSwapN\n\t" + "L:" %} + ins_encode %{ + Register newval = $newval$$Register; + Register oldval = $oldval$$Register; + Register res = $res$$Register; + Address addr($mem_ptr$$Register, 0); + Label L; + + // cmpxchg32 is implemented with ll/sc, which will do sign extension. + // Thus, we should extend oldval's sign for correct comparision. + // + __ sll(oldval, oldval, 0); + + __ cmpxchg32(newval, addr, oldval); + __ move(res, AT); + %} + ins_pipe( long_memory_op ); +%} + +//----------Max and Min-------------------------------------------------------- +// Min Instructions +//// +// *** Min and Max using the conditional move are slower than the +// *** branch version on a Pentium III. +// // Conditional move for min +//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ +// effect( USE_DEF op2, USE op1, USE cr ); +// format %{ "CMOVlt $op2,$op1\t! min" %} +// opcode(0x4C,0x0F); +// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); +// ins_pipe( pipe_cmov_reg ); +//%} +// +//// Min Register with Register (P6 version) +//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ +// predicate(VM_Version::supports_cmov() ); +// match(Set op2 (MinI op1 op2)); +// ins_cost(200); +// expand %{ +// eFlagsReg cr; +// compI_eReg(cr,op1,op2); +// cmovI_reg_lt(op2,op1,cr); +// %} +//%} + +// Min Register with Register (generic version) +instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ + match(Set dst (MinI dst src)); + //effect(KILL flags); + ins_cost(80); + + format %{ "MIN $dst, $src @minI_Reg_Reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ slt(AT, src, dst); + __ movn(dst, src, AT); + + %} + + ins_pipe( pipe_slow ); +%} + +// Max Register with Register +// *** Min and Max using the conditional move are slower than the +// *** branch version on a Pentium III. +// // Conditional move for max +//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ +// effect( USE_DEF op2, USE op1, USE cr ); +// format %{ "CMOVgt $op2,$op1\t! max" %} +// opcode(0x4F,0x0F); +// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); +// ins_pipe( pipe_cmov_reg ); +//%} +// +// // Max Register with Register (P6 version) +//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ +// predicate(VM_Version::supports_cmov() ); +// match(Set op2 (MaxI op1 op2)); +// ins_cost(200); +// expand %{ +// eFlagsReg cr; +// compI_eReg(cr,op1,op2); +// cmovI_reg_gt(op2,op1,cr); +// %} +//%} + +// Max Register with Register (generic version) +instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ + match(Set dst (MaxI dst src)); + ins_cost(80); + + format %{ "MAX $dst, $src @maxI_Reg_Reg" %} + + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ slt(AT, dst, src); + __ movn(dst, src, AT); + + %} + + ins_pipe( pipe_slow ); +%} + +instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ + match(Set dst (MaxI dst zero)); + ins_cost(50); + + format %{ "MAX $dst, 0 @maxI_Reg_zero" %} + + ins_encode %{ + Register dst = $dst$$Register; + + __ slt(AT, dst, R0); + __ movn(dst, R0, AT); + + %} + + ins_pipe( pipe_slow ); +%} + +instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) +%{ + match(Set dst (AndL src mask)); + + format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ dext(dst, src, 0, 32); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) +%{ + match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); + + format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src1 = $src1$$Register; + Register src2 = $src2$$Register; + + if (src1 == dst) { + __ dinsu(dst, src2, 32, 32); + } else if (src2 == dst) { + __ dsll32(dst, dst, 0); + __ dins(dst, src1, 0, 32); + } else { + __ dext(dst, src1, 0, 32); + __ dinsu(dst, src2, 32, 32); + } + %} + ins_pipe(ialu_regI_regI); +%} + +// Zero-extend convert int to long +instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) +%{ + match(Set dst (AndL (ConvI2L src) mask)); + + format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ dext(dst, src, 0, 32); + %} + ins_pipe(ialu_regI_regI); +%} + +instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) +%{ + match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); + + format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} + ins_encode %{ + Register dst = $dst$$Register; + Register src = $src$$Register; + + __ dext(dst, src, 0, 32); + %} + ins_pipe(ialu_regI_regI); +%} + +// Match loading integer and casting it to unsigned int in long register. +// LoadI + ConvI2L + AndL 0xffffffff. +instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + + format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} + ins_encode (load_N_enc(dst, mem)); + ins_pipe(ialu_loadI); +%} + +instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ + match(Set dst (AndL mask (ConvI2L (LoadI mem)))); + + format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} + ins_encode (load_N_enc(dst, mem)); + ins_pipe(ialu_loadI); +%} + + +// ============================================================================ +// Safepoint Instruction +instruct safePoint_poll_reg(mRegP poll) %{ + match(SafePoint poll); + predicate(false); + effect(USE poll); + + ins_cost(125); + format %{ "Safepoint @ [$poll] : poll for GC @ safePoint_poll_reg" %} + + ins_encode %{ + Register poll_reg = $poll$$Register; + + __ block_comment("Safepoint:"); + __ relocate(relocInfo::poll_type); + __ lw(AT, poll_reg, 0); + %} + + ins_pipe( ialu_storeI ); +%} + +instruct safePoint_poll() %{ + match(SafePoint); + + ins_cost(105); + format %{ "poll for GC @ safePoint_poll" %} + + ins_encode %{ + __ block_comment("Safepoint:"); + __ set64(T9, (long)os::get_polling_page()); + __ relocate(relocInfo::poll_type); + __ lw(AT, T9, 0); + %} + + ins_pipe( ialu_storeI ); +%} + +//----------Arithmetic Conversion Instructions--------------------------------- + +instruct roundFloat_nop(regF dst) +%{ + match(Set dst (RoundFloat dst)); + + ins_cost(0); + ins_encode(); + ins_pipe(empty); +%} + +instruct roundDouble_nop(regD dst) +%{ + match(Set dst (RoundDouble dst)); + + ins_cost(0); + ins_encode(); + ins_pipe(empty); +%} + +//---------- Zeros Count Instructions ------------------------------------------ +// CountLeadingZerosINode CountTrailingZerosINode +instruct countLeadingZerosI(mRegI dst, mRegI src) %{ + predicate(UseCountLeadingZerosInstructionMIPS64); + match(Set dst (CountLeadingZerosI src)); + + format %{ "clz $dst, $src\t# count leading zeros (int)" %} + ins_encode %{ + __ clz($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countLeadingZerosL(mRegI dst, mRegL src) %{ + predicate(UseCountLeadingZerosInstructionMIPS64); + match(Set dst (CountLeadingZerosL src)); + + format %{ "dclz $dst, $src\t# count leading zeros (long)" %} + ins_encode %{ + __ dclz($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countTrailingZerosI(mRegI dst, mRegI src) %{ + predicate(UseCountTrailingZerosInstructionMIPS64); + match(Set dst (CountTrailingZerosI src)); + + format %{ "ctz $dst, $src\t# count trailing zeros (int)" %} + ins_encode %{ + // ctz and dctz is gs instructions. + __ ctz($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +instruct countTrailingZerosL(mRegI dst, mRegL src) %{ + predicate(UseCountTrailingZerosInstructionMIPS64); + match(Set dst (CountTrailingZerosL src)); + + format %{ "dcto $dst, $src\t# count trailing zeros (long)" %} + ins_encode %{ + __ dctz($dst$$Register, $src$$Register); + %} + ins_pipe( ialu_regL_regL ); +%} + +// ====================VECTOR INSTRUCTIONS===================================== + +// Load vectors (8 bytes long) +instruct loadV8(vecD dst, memory mem) %{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(125); + format %{ "load $dst, $mem\t! load vector (8 bytes)" %} + ins_encode(load_D_enc(dst, mem)); + ins_pipe( fpu_loadF ); +%} + +// Store vectors (8 bytes long) +instruct storeV8(memory mem, vecD src) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(145); + format %{ "store $mem, $src\t! store vector (8 bytes)" %} + ins_encode(store_D_reg_enc(mem, src)); + ins_pipe( fpu_storeF ); +%} + +instruct Repl8B_DSP(vecD dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 8 && UseLEXT3); + match(Set dst (ReplicateB src)); + ins_cost(100); + format %{ "replv_ob AT, $src\n\t" + "dmtc1 AT, $dst\t! replicate8B" %} + ins_encode %{ + __ replv_ob(AT, $src$$Register); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl8B(vecD dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(140); + format %{ "move AT, $src\n\t" + "dins AT, AT, 8, 8\n\t" + "dins AT, AT, 16, 16\n\t" + "dinsu AT, AT, 32, 32\n\t" + "dmtc1 AT, $dst\t! replicate8B" %} + ins_encode %{ + __ move(AT, $src$$Register); + __ dins(AT, AT, 8, 8); + __ dins(AT, AT, 16, 16); + __ dinsu(AT, AT, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl8B_imm_DSP(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 8 && UseLEXT3); + match(Set dst (ReplicateB con)); + ins_cost(110); + format %{ "repl_ob AT, [$con]\n\t" + "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} + ins_encode %{ + int val = $con$$constant; + __ repl_ob(AT, val); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl8B_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + ins_cost(150); + format %{ "move AT, [$con]\n\t" + "dins AT, AT, 8, 8\n\t" + "dins AT, AT, 16, 16\n\t" + "dinsu AT, AT, 32, 32\n\t" + "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} + ins_encode %{ + __ move(AT, $con$$constant); + __ dins(AT, AT, 8, 8); + __ dins(AT, AT, 16, 16); + __ dinsu(AT, AT, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl8B_zero(vecD dst, immI_0 zero) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB zero)); + ins_cost(90); + format %{ "dmtc1 R0, $dst\t! replicate8B zero" %} + ins_encode %{ + __ dmtc1(R0, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl8B_M1(vecD dst, immI_M1 M1) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB M1)); + ins_cost(80); + format %{ "dmtc1 -1, $dst\t! replicate8B -1" %} + ins_encode %{ + __ nor(AT, R0, R0); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S_DSP(vecD dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 4 && UseLEXT3); + match(Set dst (ReplicateS src)); + ins_cost(100); + format %{ "replv_qh AT, $src\n\t" + "dmtc1 AT, $dst\t! replicate4S" %} + ins_encode %{ + __ replv_qh(AT, $src$$Register); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S(vecD dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + ins_cost(120); + format %{ "move AT, $src \n\t" + "dins AT, AT, 16, 16\n\t" + "dinsu AT, AT, 32, 32\n\t" + "dmtc1 AT, $dst\t! replicate4S" %} + ins_encode %{ + __ move(AT, $src$$Register); + __ dins(AT, AT, 16, 16); + __ dinsu(AT, AT, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S_imm_DSP(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 4 && UseLEXT3); + match(Set dst (ReplicateS con)); + ins_cost(100); + format %{ "repl_qh AT, [$con]\n\t" + "dmtc1 AT, $dst\t! replicate4S($con)" %} + ins_encode %{ + int val = $con$$constant; + if ( Assembler::is_simm(val, 10)) { + //repl_qh supports 10 bits immediate + __ repl_qh(AT, val); + } else { + __ li32(AT, val); + __ replv_qh(AT, AT); + } + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S_imm(vecD dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + ins_cost(110); + format %{ "move AT, [$con]\n\t" + "dins AT, AT, 16, 16\n\t" + "dinsu AT, AT, 32, 32\n\t" + "dmtc1 AT, $dst\t! replicate4S($con)" %} + ins_encode %{ + __ move(AT, $con$$constant); + __ dins(AT, AT, 16, 16); + __ dinsu(AT, AT, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S_zero(vecD dst, immI_0 zero) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS zero)); + format %{ "dmtc1 R0, $dst\t! replicate4S zero" %} + ins_encode %{ + __ dmtc1(R0, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +instruct Repl4S_M1(vecD dst, immI_M1 M1) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS M1)); + format %{ "dmtc1 -1, $dst\t! replicate4S -1" %} + ins_encode %{ + __ nor(AT, R0, R0); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +// Replicate integer (4 byte) scalar to be vector +instruct Repl2I(vecD dst, mRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + format %{ "dins AT, $src, 0, 32\n\t" + "dinsu AT, $src, 32, 32\n\t" + "dmtc1 AT, $dst\t! replicate2I" %} + ins_encode %{ + __ dins(AT, $src$$Register, 0, 32); + __ dinsu(AT, $src$$Register, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. +instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + effect(KILL tmp); + format %{ "li32 AT, [$con], 32\n\t" + "dinsu AT, AT\n\t" + "dmtc1 AT, $dst\t! replicate2I($con)" %} + ins_encode %{ + int val = $con$$constant; + __ li32(AT, val); + __ dinsu(AT, AT, 32, 32); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +// Replicate integer (4 byte) scalar zero to be vector +instruct Repl2I_zero(vecD dst, immI_0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI zero)); + format %{ "dmtc1 R0, $dst\t! replicate2I zero" %} + ins_encode %{ + __ dmtc1(R0, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +// Replicate integer (4 byte) scalar -1 to be vector +instruct Repl2I_M1(vecD dst, immI_M1 M1) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI M1)); + format %{ "dmtc1 -1, $dst\t! replicate2I -1, use AT" %} + ins_encode %{ + __ nor(AT, R0, R0); + __ dmtc1(AT, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + +// Replicate float (4 byte) scalar to be vector +instruct Repl2F(vecD dst, regF src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + format %{ "cvt.ps $dst, $src, $src\t! replicate2F" %} + ins_encode %{ + __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +// Replicate float (4 byte) scalar zero to be vector +instruct Repl2F_zero(vecD dst, immF_0 zero) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF zero)); + format %{ "dmtc1 R0, $dst\t! replicate2F zero" %} + ins_encode %{ + __ dmtc1(R0, $dst$$FloatRegister); + %} + ins_pipe( pipe_mtc1 ); +%} + + +// ====================VECTOR ARITHMETIC======================================= + +// --------------------------------- ADD -------------------------------------- + +// Floats vector add +// kernel does not have emulation of PS instructions yet, so PS instructions is disabled. +instruct vadd2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF dst src)); + format %{ "add.ps $dst,$src\t! add packed2F" %} + ins_encode %{ + __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( pipe_slow ); +%} + +instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + format %{ "add.ps $dst,$src1,$src2\t! add packed2F" %} + ins_encode %{ + __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +// --------------------------------- SUB -------------------------------------- + +// Floats vector sub +instruct vsub2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVF dst src)); + format %{ "sub.ps $dst,$src\t! sub packed2F" %} + ins_encode %{ + __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +// --------------------------------- MUL -------------------------------------- + +// Floats vector mul +instruct vmul2F(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF dst src)); + format %{ "mul.ps $dst, $src\t! mul packed2F" %} + ins_encode %{ + __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF src1 src2)); + format %{ "mul.ps $dst, $src1, $src2\t! mul packed2F" %} + ins_encode %{ + __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( fpu_regF_regF ); +%} + +// --------------------------------- DIV -------------------------------------- +// MIPS do not have div.ps + +// --------------------------------- MADD -------------------------------------- +// Floats vector madd +//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{ +// predicate(n->as_Vector()->length() == 2); +// match(Set dst (AddVF (MulVF src1 src2) src3)); +// ins_cost(50); +// format %{ "madd.ps $dst, $src3, $src1, $src2\t! madd packed2F" %} +// ins_encode %{ +// __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); +// %} +// ins_pipe( fpu_regF_regF ); +//%} + + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceeding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == EAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(eRegI dst, eRegI src) %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceeded by register-register move +// peepmatch ( incI_eReg movI ); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint ( 0.dst == 1.dst ); +// // construct a replacement instruction that sets +// // the destination to ( move's source register + one ) +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// Implementation no longer uses movX instructions since +// machine-independent system no longer uses CopyX nodes. +// +// peephole %{ +// peepmatch ( incI_eReg movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( decI_eReg movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( addI_eReg_imm movI ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); +// %} +// +// peephole %{ +// peepmatch ( addP_eReg_imm movP ); +// peepconstraint ( 0.dst == 1.dst ); +// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); +// %} + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, eRegI src) %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(eRegI dst, memory mem) %{ +// match(Set dst (LoadI mem)); +// %} +// +//peephole %{ +// peepmatch ( loadI storeI ); +// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); +// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); +//%} + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. + diff --git a/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp b/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp new file mode 100644 index 00000000000..e1f7cd944df --- /dev/null +++ b/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp @@ -0,0 +1,1829 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/codeBlob.hpp" +#include "code/codeCache.hpp" +#include "compiler/disassembler.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_mips.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" + +#include + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +void NativeInstruction::wrote(int offset) { + ICache::invalidate_word(addr_at(offset)); +} + +void NativeInstruction::set_long_at(int offset, long i) { + address addr = addr_at(offset); + *(long*)addr = i; + ICache::invalidate_range(addr, 8); +} + +static int illegal_instruction_bits = 0; + +int NativeInstruction::illegal_instruction() { + if (illegal_instruction_bits == 0) { + ResourceMark rm; + char buf[40]; + CodeBuffer cbuf((address)&buf[0], 20); + MacroAssembler* a = new MacroAssembler(&cbuf); + address ia = a->pc(); + a->brk(11); + int bits = *(int*)ia; + illegal_instruction_bits = bits; + } + return illegal_instruction_bits; +} + +bool NativeInstruction::is_int_branch() { + switch(Assembler::opcode(insn_word())) { + case Assembler::beq_op: + case Assembler::beql_op: + case Assembler::bgtz_op: + case Assembler::bgtzl_op: + case Assembler::blez_op: + case Assembler::blezl_op: + case Assembler::bne_op: + case Assembler::bnel_op: + return true; + case Assembler::regimm_op: + switch(Assembler::rt(insn_word())) { + case Assembler::bgez_op: + case Assembler::bgezal_op: + case Assembler::bgezall_op: + case Assembler::bgezl_op: + case Assembler::bltz_op: + case Assembler::bltzal_op: + case Assembler::bltzall_op: + case Assembler::bltzl_op: + return true; + } + } + + return false; +} + +bool NativeInstruction::is_float_branch() { + if (!is_op(Assembler::cop1_op) || + !is_rs((Register)Assembler::bc1f_op)) return false; + + switch(Assembler::rt(insn_word())) { + case Assembler::bcf_op: + case Assembler::bcfl_op: + case Assembler::bct_op: + case Assembler::bctl_op: + return true; + } + + return false; +} + + +void NativeCall::verify() { + // make sure code pattern is actually a call instruction + + // nop + // nop + // nop + // nop + // jal target + // nop + if ( is_nop() && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_op(int_at(16), Assembler::jal_op) && + nativeInstruction_at(addr_at(20))->is_nop() ) { + return; + } + + // jal targe + // nop + if ( is_op(int_at(0), Assembler::jal_op) && + nativeInstruction_at(addr_at(4))->is_nop() ) { + return; + } + + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) && + is_special_op(int_at(24), Assembler::jalr_op) ) { + return; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + // FIXME: why add jr_op here? + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op) ) { + return; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return; + } + + //daddiu dst, R0, imm16 + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return; + } + + //lui dst, imm16 + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return; + } + + if (nativeInstruction_at(addr_at(0))->is_trampoline_call()) + return; + + fatal("not a call"); +} + +address NativeCall::target_addr_for_insn() const { + // jal target + // nop + if ( is_op(int_at(0), Assembler::jal_op) && + nativeInstruction_at(addr_at(4))->is_nop()) { + int instr_index = int_at(0) & 0x3ffffff; + intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; + intptr_t target = target_high | (instr_index << 2); + return (address)target; + } + + // nop + // nop + // nop + // nop + // jal target + // nop + if ( nativeInstruction_at(addr_at(0))->is_nop() && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_op(int_at(16), Assembler::jal_op) && + nativeInstruction_at(addr_at(20))->is_nop()) { + int instr_index = int_at(16) & 0x3ffffff; + intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; + intptr_t target = target_high | (instr_index << 2); + return (address)target; + } + + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) ) { + + return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), + (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff)); + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) ) { + + return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0); + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ld dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ld_op) ) { + + address dest = (address)Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0); + return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return (address)Assembler::merge( (intptr_t)(0), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //daddiu dst, R0, imm16 + //nop + //nop <-- optional + //nop <-- optional + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop <-- optional + //nop <-- optional + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //nop + //nop <-- optional + //nop <-- optional + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0))); + tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0))); + Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty); + tty->print_cr("======= End of decoding ======="); + fatal("not a call"); + return NULL; +} + +// Extract call destination from a NativeCall. The call might use a trampoline stub. +address NativeCall::destination() const { + address addr = (address)this; + address destination = target_addr_for_insn(); + // Do we use a trampoline stub for this call? + // Trampoline stubs are located behind the main code. + if (destination > addr) { + // Filter out recursive method invocation (call to verified/unverified entry point). + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. + assert(cb && cb->is_nmethod(), "sanity"); + nmethod *nm = (nmethod *)cb; + NativeInstruction* ni = nativeInstruction_at(addr); + if (nm->stub_contains(destination) && ni->is_trampoline_call()) { + // Yes we do, so get the destination from the trampoline stub. + const address trampoline_stub_addr = destination; + destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); + } + } + return destination; +} + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. +// +// Used in the runtime linkage of calls; see class CompiledIC. +// +// Add parameter assert_lock to switch off assertion +// during code generation, where no patching lock is needed. +void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || + (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), + "concurrent code patching"); + + ResourceMark rm; + address addr_call = addr_at(0); + assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); + // Patch the constant in the call's trampoline stub. + if (MacroAssembler::reachable_from_cache()) { + set_destination(dest); + } else { + address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn(); + assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline"); + nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); + } +} + + +address NativeCall::get_trampoline() { + address call_addr = addr_at(0); + + CodeBlob *code = CodeCache::find_blob(call_addr); + assert(code != NULL, "Could not find the containing code blob"); + + // If the codeBlob is not a nmethod, this is because we get here from the + // CodeBlob constructor, which is called within the nmethod constructor. + return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); +} + +// manual implementation of GSSQ +// +// 00000001200009c0 : +// 1200009c0: 0085202d daddu a0, a0, a1 +// 1200009c4: e8860027 gssq a2, a3, 0(a0) +// 1200009c8: 03e00008 jr ra +// 1200009cc: 00000000 nop +// +typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64); + +static int *buf; + +static atomic_store128_ptr get_atomic_store128_func() { + assert(UseLEXT1, "UseLEXT1 must be true"); + static atomic_store128_ptr p = NULL; + if (p != NULL) + return p; + + buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + buf[0] = 0x0085202d; + buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27; /* gssq $a2, $a3, 0($a0) */ + buf[2] = 0x03e00008; + buf[3] = 0; + + asm("sync"); + p = (atomic_store128_ptr)buf; + return p; +} + +void NativeCall::patch_on_jal_only(address dst) { + long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; + if ((dest >= 0) && (dest < (1<<26))) { + jint jal_inst = (Assembler::jal_op << 26) | dest; + set_int_at(0, jal_inst); + ICache::invalidate_range(addr_at(0), 4); + } else { + ShouldNotReachHere(); + } +} + +void NativeCall::patch_on_trampoline(address dest) { + assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site"); + jlong dst = (jlong) dest; + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ld dst, dst, imm16 + if ((dst> 0) && Assembler::is_simm16(dst >> 32)) { + dst += (dst & 0x8000) << 1; + set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff)); + set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff)); + set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff)); + + ICache::invalidate_range(addr_at(0), 24); + } else { + ShouldNotReachHere(); + } +} + +void NativeCall::patch_on_jal_gs(address dst) { + long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; + if ((dest >= 0) && (dest < (1<<26))) { + jint jal_inst = (Assembler::jal_op << 26) | dest; + set_int_at(16, jal_inst); + ICache::invalidate_range(addr_at(16), 4); + } else { + ShouldNotReachHere(); + } +} + +void NativeCall::patch_on_jal(address dst) { + patch_on_jal_gs(dst); +} + +void NativeCall::patch_on_jalr_gs(address dst) { + patch_set48_gs(dst); +} + +void NativeCall::patch_on_jalr(address dst) { + patch_set48(dst); +} + +void NativeCall::patch_set48_gs(address dest) { + jlong value = (jlong) dest; + int rt_reg = (int_at(0) & (0x1f << 16)); + + if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 + + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + int count = 0; + int insts[4] = {0, 0, 0, 0}; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); + count += 1; + } else { + insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); + count += 1; + if (Assembler::split_low(value)) { + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); + count += 1; + insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); + count += 1; + if (Assembler::split_low(value)) { + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } + } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { + insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); + count += 1; + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); + count += 1; + insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); + count += 1; + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } else { + tty->print_cr("dest = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + insts[count] = 0; + count++; + } + + guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); + atomic_store128_ptr func = get_atomic_store128_func(); + (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); + + ICache::invalidate_range(addr_at(0), 16); +} + +void NativeCall::patch_set32_gs(address dest) { + jlong value = (jlong) dest; + int rt_reg = (int_at(0) & (0x1f << 16)); + + if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 + + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + int insts[2] = {0, 0}; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + //daddiu(d, R0, value); + //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); + insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); + count += 1; + } else { + //lui(d, split_low(value >> 16)); + //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); + insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); + count += 1; + if (Assembler::split_low(value)) { + //ori(d, d, split_low(value)); + //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } + } + } else { + tty->print_cr("dest = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 2) { + //nop(); + //set_int_at(count << 2, 0); + insts[count] = 0; + count++; + } + + long inst = insts[1]; + inst = inst << 32; + inst = inst + insts[0]; + + set_long_at(0, inst); +} + +void NativeCall::patch_set48(address dest) { + jlong value = (jlong) dest; + int rt_reg = (int_at(0) & (0x1f << 16)); + + if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 + + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + //daddiu(d, R0, value); + set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + //lui(d, split_low(value >> 16)); + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + if (Assembler::split_low(value)) { + //ori(d, d, split_low(value)); + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + //ori(d, R0, julong(value) >> 16); + set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); + count += 1; + //dsll(d, d, 16); + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + if (Assembler::split_low(value)) { + //ori(d, d, split_low(value)); + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { + //lui(d, value >> 32); + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); + count += 1; + //ori(d, d, split_low(value >> 16)); + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + //dsll(d, d, 16); + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + //ori(d, d, split_low(value)); + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + tty->print_cr("dest = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + //nop(); + set_int_at(count << 2, 0); + count++; + } + + ICache::invalidate_range(addr_at(0), 16); +} + +void NativeCall::patch_set32(address dest) { + patch_set32_gs(dest); +} + +void NativeCall::set_destination(address dest) { + OrderAccess::fence(); + + // li64 + if (is_special_op(int_at(16), Assembler::dsll_op)) { + int first_word = int_at(0); + set_int_at(0, 0x1000ffff); /* .1: b .1 */ + set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff)); + set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff)); + set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff)); + set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff)); + ICache::invalidate_range(addr_at(0), 24); + } else if (is_op(int_at(16), Assembler::jal_op)) { + if (UseLEXT1) { + patch_on_jal_gs(dest); + } else { + patch_on_jal(dest); + } + } else if (is_op(int_at(0), Assembler::jal_op)) { + patch_on_jal_only(dest); + } else if (is_special_op(int_at(16), Assembler::jalr_op)) { + if (UseLEXT1) { + patch_on_jalr_gs(dest); + } else { + patch_on_jalr(dest); + } + } else if (is_special_op(int_at(8), Assembler::jalr_op)) { + guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8"); + if (UseLEXT1) { + patch_set32_gs(dest); + } else { + patch_set32(dest); + } + ICache::invalidate_range(addr_at(0), 8); + } else { + fatal("not a call"); + } +} + +void NativeCall::print() { + tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, + p2i(instruction_address()), p2i(destination())); +} + +// Inserts a native call instruction at a given pc +void NativeCall::insert(address code_pos, address entry) { + NativeCall *call = nativeCall_at(code_pos); + CodeBuffer cb(call->addr_at(0), instruction_size); + MacroAssembler masm(&cb); +#define __ masm. + __ li48(T9, (long)entry); + __ jalr (); + __ delayed()->nop(); +#undef __ + + ICache::invalidate_range(call->addr_at(0), instruction_size); +} + +// MT-safe patching of a call instruction. +// First patches first word of instruction to two jmp's that jmps to them +// selfs (spinlock). Then patches the last byte, and then atomicly replaces +// the jmp's with the first 4 byte of the new instruction. +void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { + Unimplemented(); +} + +//------------------------------------------------------------------- + +void NativeMovConstReg::verify() { + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) ) { + return; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) ) { + return; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop()) { + return; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop()) { + return; + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + return; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + return; + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + return; + } + + fatal("not a mov reg, imm64/imm48"); +} + +void NativeMovConstReg::print() { + tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, + p2i(instruction_address()), data()); +} + +intptr_t NativeMovConstReg::data() const { + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) ) { + + return Assembler::merge( (intptr_t)(int_at(20) & 0xffff), + (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff)); + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) ) { + + return Assembler::merge( (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return Assembler::merge( (intptr_t)(int_at(8) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return Assembler::merge( (intptr_t)(0), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0, + (intptr_t)0); + } else { + return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + fatal("not a mov reg, imm64/imm48"); + return 0; // unreachable +} + +void NativeMovConstReg::patch_set48(intptr_t x) { + jlong value = (jlong) x; + int rt_reg = (int_at(0) & (0x1f << 16)); + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + //daddiu(d, R0, value); + set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + //lui(d, split_low(value >> 16)); + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + if (Assembler::split_low(value)) { + //ori(d, d, split_low(value)); + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); + count += 1; + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + if (Assembler::split_low(value)) { + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); + count += 1; + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + tty->print_cr("value = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + set_int_at(count << 2, 0); + count++; + } +} + +void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { + // li64 or li48 + if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) { + set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff)); + set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff)); + set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff)); + set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff)); + } else { + patch_set48(x); + } + + ICache::invalidate_range(addr_at(0), 24); + + // Find and replace the oop/metadata corresponding to this + // instruction in oops section. + CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); + nmethod* nm = blob->as_nmethod_or_null(); + if (nm != NULL) { + o = o ? o : x; + RelocIterator iter(nm, instruction_address(), next_instruction_address()); + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop* oop_addr = iter.oop_reloc()->oop_addr(); + *oop_addr = cast_to_oop(o); + break; + } else if (iter.type() == relocInfo::metadata_type) { + Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); + *metadata_addr = (Metadata*)o; + break; + } + } + } +} + +//------------------------------------------------------------------- + +int NativeMovRegMem::offset() const{ + if (is_immediate()) + return (short)(int_at(instruction_offset)&0xffff); + else + return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff); +} + +void NativeMovRegMem::set_offset(int x) { + if (is_immediate()) { + assert(Assembler::is_simm16(x), "just check"); + set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) ); + if (is_64ldst()) { + assert(Assembler::is_simm16(x+4), "just check"); + set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) ); + } + } else { + set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff)); + set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff)); + } + ICache::invalidate_range(addr_at(0), 8); +} + +void NativeMovRegMem::verify() { + int offset = 0; + + if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) { + + if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) { + fatal ("not a mov [reg+offs], reg instruction"); + } + + offset += 12; + } + + switch(Assembler::opcode(int_at(offset))) { + case Assembler::lb_op: + case Assembler::lbu_op: + case Assembler::lh_op: + case Assembler::lhu_op: + case Assembler::lw_op: + case Assembler::lwu_op: + case Assembler::ld_op: + case Assembler::lwc1_op: + case Assembler::ldc1_op: + case Assembler::sb_op: + case Assembler::sh_op: + case Assembler::sw_op: + case Assembler::sd_op: + case Assembler::swc1_op: + case Assembler::sdc1_op: + break; + default: + fatal ("not a mov [reg+offs], reg instruction"); + } +} + + +void NativeMovRegMem::print() { + tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset()); +} + +bool NativeInstruction::is_sigill_zombie_not_entrant() { + return uint_at(0) == NativeIllegalInstruction::instruction_code; +} + +void NativeIllegalInstruction::insert(address code_pos) { + *(juint*)code_pos = instruction_code; + ICache::invalidate_range(code_pos, instruction_size); +} + +void NativeJump::verify() { + assert(((NativeInstruction *)this)->is_jump() || + ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction"); +} + +void NativeJump::patch_set48_gs(address dest) { + jlong value = (jlong) dest; + int rt_reg = (int_at(0) & (0x1f << 16)); + + if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 + + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + int insts[4] = {0, 0, 0, 0}; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); + count += 1; + } else { + insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); + count += 1; + if (Assembler::split_low(value)) { + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); + count += 1; + insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); + count += 1; + if (Assembler::split_low(value)) { + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } + } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { + insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); + count += 1; + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); + count += 1; + insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); + count += 1; + insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); + count += 1; + } else { + tty->print_cr("dest = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + insts[count] = 0; + count++; + } + + guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); + atomic_store128_ptr func = get_atomic_store128_func(); + (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); + + ICache::invalidate_range(addr_at(0), 16); +} + +void NativeJump::patch_set48(address dest) { + jlong value = (jlong) dest; + int rt_reg = (int_at(0) & (0x1f << 16)); + int rs_reg = rt_reg << 5; + int rd_reg = rt_reg >> 5; + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + + int count = 0; + + if (value == lo) { // 32-bit integer + if (Assembler::is_simm16(value)) { + set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + if (Assembler::split_low(value)) { + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } + } else if (hi == 0) { // hardware zero-extends to upper 32 + set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); + count += 1; + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + if (Assembler::split_low(value)) { + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } + } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { + set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); + count += 1; + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); + count += 1; + set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); + count += 1; + set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); + count += 1; + } else { + tty->print_cr("dest = 0x%lx", value); + guarantee(false, "Not supported yet !"); + } + + while (count < 4) { + set_int_at(count << 2, 0); + count++; + } + + ICache::invalidate_range(addr_at(0), 16); +} + +void NativeJump::patch_on_j_only(address dst) { + long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; + if ((dest >= 0) && (dest < (1<<26))) { + jint j_inst = (Assembler::j_op << 26) | dest; + set_int_at(0, j_inst); + ICache::invalidate_range(addr_at(0), 4); + } else { + ShouldNotReachHere(); + } +} + + +void NativeJump::patch_on_j_gs(address dst) { + long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; + if ((dest >= 0) && (dest < (1<<26))) { + jint j_inst = (Assembler::j_op << 26) | dest; + set_int_at(16, j_inst); + ICache::invalidate_range(addr_at(16), 4); + } else { + ShouldNotReachHere(); + } +} + +void NativeJump::patch_on_j(address dst) { + patch_on_j_gs(dst); +} + +void NativeJump::patch_on_jr_gs(address dst) { + patch_set48_gs(dst); + ICache::invalidate_range(addr_at(0), 16); +} + +void NativeJump::patch_on_jr(address dst) { + patch_set48(dst); + ICache::invalidate_range(addr_at(0), 16); +} + + +void NativeJump::set_jump_destination(address dest) { + OrderAccess::fence(); + + if (is_short()) { + assert(Assembler::is_simm16(dest-addr_at(4)), "change this code"); + set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff ); + ICache::invalidate_range(addr_at(0), 4); + } else if (is_b_far()) { + int offset = dest - addr_at(12); + set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16)); + set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff)); + } else { + if (is_op(int_at(16), Assembler::j_op)) { + if (UseLEXT1) { + patch_on_j_gs(dest); + } else { + patch_on_j(dest); + } + } else if (is_op(int_at(0), Assembler::j_op)) { + patch_on_j_only(dest); + } else if (is_special_op(int_at(16), Assembler::jr_op)) { + if (UseLEXT1) { + //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD"); + //patch_on_jr_gs(dest); + patch_on_jr(dest); + } else { + patch_on_jr(dest); + } + } else { + fatal("not a jump"); + } + } +} + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + CodeBuffer cb(code_pos, instruction_size); + MacroAssembler masm(&cb); +#define __ masm. + if (Assembler::is_simm16((entry - code_pos - 4) / 4)) { + __ b(entry); + __ delayed()->nop(); + } else { + // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here. + int offset = entry - code_pos; + + Label L; + __ bgezal(R0, L); + __ delayed()->lui(T9, (offset - 8) >> 16); + __ bind(L); + __ ori(T9, T9, (offset - 8) & 0xffff); + __ daddu(T9, T9, RA); + __ jr(T9); + __ delayed()->nop(); + } + +#undef __ + + ICache::invalidate_range(code_pos, instruction_size); +} + +bool NativeJump::is_b_far() { +// +// 0x000000556809f198: daddu at, ra, zero +// 0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4 +// +// 0x000000556809f1a0: nop +// 0x000000556809f1a4: lui t9, 0xfffffffd +// 0x000000556809f1a8: ori t9, t9, 0x14dc +// 0x000000556809f1ac: daddu t9, t9, ra +// 0x000000556809f1b0: daddu ra, at, zero +// 0x000000556809f1b4: jr t9 +// 0x000000556809f1b8: nop +// ;; ImplicitNullCheckStub slow case +// 0x000000556809f1bc: lui t9, 0x55 +// + return is_op(int_at(12), Assembler::lui_op); +} + +address NativeJump::jump_destination() { + if ( is_short() ) { + return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4; + } + // Assembler::merge() is not correct in MIPS_64! + // + // Example: + // hi16 = 0xfffd, + // lo16 = f7a4, + // + // offset=0xfffdf7a4 (Right) + // Assembler::merge = 0xfffcf7a4 (Wrong) + // + if ( is_b_far() ) { + int hi16 = int_at(12)&0xffff; + int low16 = int_at(16)&0xffff; + address target = addr_at(12) + (hi16 << 16) + low16; + return target; + } + + // nop + // nop + // nop + // nop + // j target + // nop + if ( nativeInstruction_at(addr_at(0))->is_nop() && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_op(int_at(16), Assembler::j_op) && + nativeInstruction_at(addr_at(20))->is_nop()) { + int instr_index = int_at(16) & 0x3ffffff; + intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; + intptr_t target = target_high | (instr_index << 2); + return (address)target; + } + + // j target + // nop + if ( is_op(int_at(0), Assembler::j_op) && + nativeInstruction_at(addr_at(4))->is_nop()) { + int instr_index = int_at(0) & 0x3ffffff; + intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; + intptr_t target = target_high | (instr_index << 2); + return (address)target; + } + + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) ) { + + return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), + (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff)); + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) ) { + + return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), + (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop()) { + + return (address)Assembler::merge( (intptr_t)(0), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() ) { + + int sign = int_at(0) & 0x8000; + if (sign == 0) { + return (address)Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)0, + (intptr_t)0); + } else { + return (address)Assembler::merge( (intptr_t)0, + (intptr_t)(int_at(0) & 0xffff), + (intptr_t)(0xffff), + (intptr_t)(0xffff)); + } + } + + fatal("not a jump"); + return NULL; // unreachable +} + +// MT-safe patching of a long jump instruction. +// First patches first word of instruction to two jmp's that jmps to them +// selfs (spinlock). Then patches the last byte, and then atomicly replaces +// the jmp's with the first 4 byte of the new instruction. +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + NativeGeneralJump* h_jump = nativeGeneralJump_at (instr_addr); + assert((int)instruction_size == (int)NativeCall::instruction_size, + "note::Runtime1::patch_code uses NativeCall::instruction_size"); + + // ensure 100% atomicity + guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD"); + + int *p = (int *)instr_addr; + int jr_word = p[4]; + + p[4] = 0x1000fffb; /* .1: --; --; --; --; b .1; nop */ + memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8); + *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16); +} + +// Must ensure atomicity +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); + assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump"); + + if (MacroAssembler::reachable_from_cache(dest)) { + CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); + MacroAssembler masm(&cb); + masm.j(dest); + } else { + // We use an illegal instruction for marking a method as + // not_entrant or zombie + NativeIllegalInstruction::insert(verified_entry); + } + + ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); +} + +bool NativeInstruction::is_jump() +{ + if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode) + return true; + if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far + return true; + if (is_op(int_at(12), Assembler::lui_op)) // original b_far + return true; + + // nop + // nop + // nop + // nop + // j target + // nop + if ( is_nop() && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) && + nativeInstruction_at(addr_at(20))->is_nop() ) { + return true; + } + + if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) && + nativeInstruction_at(addr_at(4))->is_nop() ) { + return true; + } + + // lui rd, imm(63...48); + // ori rd, rd, imm(47...32); + // dsll rd, rd, 16; + // ori rd, rd, imm(31...16); + // dsll rd, rd, 16; + // ori rd, rd, imm(15...0); + // jr rd + // nop + if (is_op(int_at(0), Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) && + is_special_op(int_at(24), Assembler::jr_op)) { + return true; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if (is_op(int_at(0), Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jr_op)) { + return true; + } + + return false; +} + +bool NativeInstruction::is_dtrace_trap() { + //return (*(int32_t*)this & 0xff) == 0xcc; + Unimplemented(); + return false; +} + +bool NativeInstruction::is_safepoint_poll() { + // + // 390 li T2, 0x0000000000400000 #@loadConP + // 394 sw [SP + #12], V1 # spill 9 + // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 + // + // 0x000000ffe5815130: lui t2, 0x40 + // 0x000000ffe5815134: sw v1, 0xc(sp) ; OopMap{a6=Oop off=920} + // ;*goto + // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) + // + // 0x000000ffe5815138: lw at, 0x0(t2) ;*goto <--- PC + // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) + // + + // Since there may be some spill instructions between the safePoint_poll and loadConP, + // we check the safepoint instruction like the this. + return is_op(Assembler::lw_op) && is_rt(AT); +} diff --git a/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp b/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp new file mode 100644 index 00000000000..13a4cb4ef1c --- /dev/null +++ b/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp @@ -0,0 +1,735 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP +#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP + +#include "asm/assembler.hpp" +#include "memory/allocation.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" +#include "utilities/top.hpp" + +// We have interfaces for the following instructions: +// - NativeInstruction +// - - NativeCall +// - - NativeMovConstReg +// - - NativeMovConstRegPatching +// - - NativeMovRegMem +// - - NativeMovRegMemPatching +// - - NativeJump +// - - NativeIllegalOpCode +// - - NativeGeneralJump +// - - NativeReturn +// - - NativeReturnX (return with argument) +// - - NativePushConst +// - - NativeTstRegMem + +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + +class NativeInstruction VALUE_OBJ_CLASS_SPEC { + friend class Relocation; + + public: + enum mips_specific_constants { + nop_instruction_code = 0, + nop_instruction_size = 4, + sync_instruction_code = 0xf + }; + + bool is_nop() { return long_at(0) == nop_instruction_code; } + bool is_sync() { return long_at(0) == sync_instruction_code; } + bool is_dtrace_trap(); + inline bool is_call(); + inline bool is_illegal(); + inline bool is_return(); + bool is_jump(); + inline bool is_cond_jump(); + bool is_safepoint_poll(); + + //mips has no instruction to generate a illegal instrucion exception + //we define ours: break 11 + static int illegal_instruction(); + + bool is_int_branch(); + bool is_float_branch(); + + inline bool is_trampoline_call(); + + //We use an illegal instruction for marking a method as not_entrant or zombie. + bool is_sigill_zombie_not_entrant(); + + protected: + address addr_at(int offset) const { return address(this) + offset; } + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(BytesPerInstWord); } + address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } + + s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } + u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } + + jint int_at(int offset) const { return *(jint*) addr_at(offset); } + juint uint_at(int offset) const { return *(juint*) addr_at(offset); } + + intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } + + oop oop_at (int offset) const { return *(oop*) addr_at(offset); } + int long_at(int offset) const { return *(jint*)addr_at(offset); } + + + void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } + void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } + void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } + void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } + void set_long_at(int offset, long i); + + int insn_word() const { return long_at(0); } + static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; } + bool is_op (Assembler::ops op) const { return is_op(insn_word(), op); } + bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); } + bool is_rs (Register rs) const { return is_rs(insn_word(), rs); } + bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); } + bool is_rt (Register rt) const { return is_rt(insn_word(), rt); } + + static bool is_special_op (int insn, Assembler::special_ops op) { + return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op; + } + bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); } + + void wrote(int offset); + + public: + + // unit test stuff + static void test() {} // override for testing + + inline friend NativeInstruction* nativeInstruction_at(address address); +}; + +inline NativeInstruction* nativeInstruction_at(address address) { + NativeInstruction* inst = (NativeInstruction*)address; +#ifdef ASSERT + //inst->verify(); +#endif + return inst; +} + +inline NativeCall* nativeCall_at(address address); +// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64 +// instructions (used to manipulate inline caches, primitive & dll calls, etc.). +// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this: +// 32 bits: +// lui rt, imm16 +// addiu rt, rt, imm16 +// jalr rt +// nop +// +// 64 bits: +// lui rd, imm(63...48); +// ori rd, rd, imm(47...32); +// dsll rd, rd, 16; +// ori rd, rd, imm(31...16); +// dsll rd, rd, 16; +// ori rd, rd, imm(15...0); +// jalr rd +// nop +// + +// we just consider the above for instruction as one call instruction +class NativeCall: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_offset = 0, + instruction_size = 6 * BytesPerInstWord, + return_address_offset_short = 4 * BytesPerInstWord, + return_address_offset_long = 6 * BytesPerInstWord, + displacement_offset = 0 + }; + + address instruction_address() const { return addr_at(instruction_offset); } + + address next_instruction_address() const { + if (is_special_op(int_at(8), Assembler::jalr_op)) { + return addr_at(return_address_offset_short); + } else { + return addr_at(return_address_offset_long); + } + } + + address return_address() const { + return next_instruction_address(); + } + + address target_addr_for_insn() const; + address destination() const; + void set_destination(address dest); + + void patch_set48_gs(address dest); + void patch_set48(address dest); + + void patch_on_jalr_gs(address dest); + void patch_on_jalr(address dest); + + void patch_on_jal_gs(address dest); + void patch_on_jal(address dest); + + void patch_on_trampoline(address dest); + + void patch_on_jal_only(address dest); + + void patch_set32_gs(address dest); + void patch_set32(address dest); + + void verify_alignment() { } + void verify(); + void print(); + + // Creation + inline friend NativeCall* nativeCall_at(address address); + inline friend NativeCall* nativeCall_before(address return_address); + + static bool is_call_at(address instr) { + return nativeInstruction_at(instr)->is_call(); + } + + static bool is_call_before(address return_address) { + return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long); + } + + static bool is_call_to(address instr, address target) { + return nativeInstruction_at(instr)->is_call() && +nativeCall_at(instr)->destination() == target; + } + + // MT-safe patching of a call instruction. + static void insert(address code_pos, address entry); + + static void replace_mt_safe(address instr_addr, address code_buffer); + + // Similar to replace_mt_safe, but just changes the destination. The + // important thing is that free-running threads are able to execute + // this call instruction at all times. If the call is an immediate jal + // instruction we can simply rely on atomicity of 32-bit writes to + // make sure other threads will see no intermediate states. + + // We cannot rely on locks here, since the free-running threads must run at + // full speed. + // + // Used in the runtime linkage of calls; see class CompiledIC. + + // The parameter assert_lock disables the assertion during code generation. + void set_destination_mt_safe(address dest, bool assert_lock = true); + + address get_trampoline(); +}; + +inline NativeCall* nativeCall_at(address address) { + NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +inline NativeCall* nativeCall_before(address return_address) { + NativeCall* call = NULL; + if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) { + call = (NativeCall*)(return_address - NativeCall::return_address_offset_long); + } else { + call = (NativeCall*)(return_address - NativeCall::return_address_offset_short); + } +#ifdef ASSERT + call->verify(); +#endif + return call; +} + +class NativeMovConstReg: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_offset = 0, + instruction_size = 4 * BytesPerInstWord, + next_instruction_offset = 4 * BytesPerInstWord, + }; + + int insn_word() const { return long_at(instruction_offset); } + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(next_instruction_offset); } + intptr_t data() const; + void set_data(intptr_t x, intptr_t o = 0); + + void patch_set48(intptr_t x); + + void verify(); + void print(); + + // unit test stuff + static void test() {} + + // Creation + inline friend NativeMovConstReg* nativeMovConstReg_at(address address); + inline friend NativeMovConstReg* nativeMovConstReg_before(address address); +}; + +inline NativeMovConstReg* nativeMovConstReg_at(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +inline NativeMovConstReg* nativeMovConstReg_before(address address) { + NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovConstRegPatching: public NativeMovConstReg { + private: + friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { + NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + +// An interface for accessing/manipulating native moves of the form: +// lui AT, split_high(offset) +// addiu AT, split_low(offset) +// addu reg, reg, AT +// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0 +// [lw/sw/lwc1/swc1 dest, reg, 4] +// or +// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset +// [lw/sw/lwc1/swc1 dest, reg, offset+4] +// +// Warning: These routines must be able to handle any instruction sequences +// that are generated as a result of the load/store byte,word,long +// macros. + +class NativeMovRegMem: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_offset = 0, + hiword_offset = 4, + ldst_offset = 12, + immediate_size = 4, + ldst_size = 16 + }; + + //offset is less than 16 bits. + bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); } + bool is_64ldst() const { + if (is_immediate()) { + return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) && + (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize); + } else { + return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) && + (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize); + } + } + + address instruction_address() const { return addr_at(instruction_offset); } + address next_instruction_address() const { + return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0)); + } + + int offset() const; + + void set_offset(int x); + + void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } + + void verify(); + void print (); + + // unit test stuff + static void test() {} + + private: + inline friend NativeMovRegMem* nativeMovRegMem_at (address address); +}; + +inline NativeMovRegMem* nativeMovRegMem_at (address address) { + NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovRegMemPatching: public NativeMovRegMem { + private: + friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { + NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); + #ifdef ASSERT + test->verify(); + #endif + return test; + } +}; + + +// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional +// 32 bits: +// far jump: +// lui reg, split_high(addr) +// addiu reg, split_low(addr) +// jr reg +// nop +// or +// beq ZERO, ZERO, offset +// nop +// + +//64 bits: +// far jump: +// lui rd, imm(63...48); +// ori rd, rd, imm(47...32); +// dsll rd, rd, 16; +// ori rd, rd, imm(31...16); +// dsll rd, rd, 16; +// ori rd, rd, imm(15...0); +// jalr rd +// nop +// +class NativeJump: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_offset = 0, + beq_opcode = 0x10000000,//000100|00000|00000|offset + b_mask = 0xffff0000, + short_size = 8, + instruction_size = 6 * BytesPerInstWord + }; + + bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; } + bool is_b_far(); + address instruction_address() const { return addr_at(instruction_offset); } + address jump_destination(); + + void patch_set48_gs(address dest); + void patch_set48(address dest); + + void patch_on_jr_gs(address dest); + void patch_on_jr(address dest); + + void patch_on_j_gs(address dest); + void patch_on_j(address dest); + + void patch_on_j_only(address dest); + + void set_jump_destination(address dest); + + // Creation + inline friend NativeJump* nativeJump_at(address address); + + // Insertion of native jump instruction + static void insert(address code_pos, address entry) { Unimplemented(); } + // MT-safe insertion of native jump at verified method entry + static void check_verified_entry_alignment(address entry, address verified_entry) {} + static void patch_verified_entry(address entry, address verified_entry, address dest); + + void verify(); +}; + +inline NativeJump* nativeJump_at(address address) { + NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); + debug_only(jump->verify();) + return jump; +} + +class NativeGeneralJump: public NativeJump { + public: + // Creation + inline friend NativeGeneralJump* nativeGeneralJump_at(address address); + + // Insertion of native general jump instruction + static void insert_unconditional(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); +}; + +inline NativeGeneralJump* nativeGeneralJump_at(address address) { + NativeGeneralJump* jump = (NativeGeneralJump*)(address); + debug_only(jump->verify();) + return jump; +} + +class NativeIllegalInstruction: public NativeInstruction { +public: + enum mips_specific_constants { + instruction_code = 0x42000029, // mips reserved instruction + instruction_size = 4, + instruction_offset = 0, + next_instruction_offset = 4 + }; + + // Insert illegal opcode as specific address + static void insert(address code_pos); +}; + +// return instruction that does not pop values of the stack +// jr RA +// delay slot +class NativeReturn: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_size = 8, + instruction_offset = 0, + next_instruction_offset = 8 + }; +}; + + + + +class NativeCondJump; +inline NativeCondJump* nativeCondJump_at(address address); +class NativeCondJump: public NativeInstruction { + public: + enum mips_specific_constants { + instruction_size = 16, + instruction_offset = 12, + next_instruction_offset = 20 + }; + + + int insn_word() const { return long_at(instruction_offset); } + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const { return addr_at(next_instruction_offset); } + + // Creation + inline friend NativeCondJump* nativeCondJump_at(address address); + + address jump_destination() const { + return ::nativeCondJump_at(addr_at(12))->jump_destination(); + } + + void set_jump_destination(address dest) { + ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest); + } + +}; + +inline NativeCondJump* nativeCondJump_at(address address) { + NativeCondJump* jump = (NativeCondJump*)(address); + return jump; +} + + + +inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } + +inline bool NativeInstruction::is_call() { + // jal target + // nop + if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) && + nativeInstruction_at(addr_at(4))->is_nop() ) { + return true; + } + + // nop + // nop + // nop + // nop + // jal target + // nop + if ( is_nop() && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) && + nativeInstruction_at(addr_at(20))->is_nop() ) { + return true; + } + + // li64 + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::dsll_op) && + is_op(int_at(20), Assembler::ori_op) && + is_special_op(int_at(24), Assembler::jalr_op) ) { + return true; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op (int_at(12), Assembler::ori_op) && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //ori dst, dst, imm16 + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + is_op (int_at(8), Assembler::ori_op) && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + //ori dst, R0, imm16 + //dsll dst, dst, 16 + //nop + //nop + if ( is_op(Assembler::ori_op) && + is_special_op(int_at(4), Assembler::dsll_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + //daddiu dst, R0, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + //nop + //nop + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + //lui dst, imm16 + //nop + //nop + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + nativeInstruction_at(addr_at(8))->is_nop() && + nativeInstruction_at(addr_at(12))->is_nop() && + is_special_op(int_at(16), Assembler::jalr_op) ) { + return true; + } + + + //daddiu dst, R0, imm16 + //nop + if ( is_op(Assembler::daddiu_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return true; + } + + //lui dst, imm16 + //ori dst, dst, imm16 + if ( is_op(Assembler::lui_op) && + is_op (int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return true; + } + + //lui dst, imm16 + //nop + if ( is_op(Assembler::lui_op) && + nativeInstruction_at(addr_at(4))->is_nop() && + is_special_op(int_at(8), Assembler::jalr_op) ) { + return true; + } + + if(is_trampoline_call()) + return true; + + return false; + +} + +inline bool NativeInstruction::is_return() { return is_special_op(Assembler::jr_op) && is_rs(RA);} + +inline bool NativeInstruction::is_cond_jump() { return is_int_branch() || is_float_branch(); } + +// Call trampoline stubs. +class NativeCallTrampolineStub : public NativeInstruction { + public: + + enum mips_specific_constants { + instruction_size = 2 * BytesPerInstWord, + instruction_offset = 0, + next_instruction_offset = 2 * BytesPerInstWord + }; + + address destination() const { + return (address)ptr_at(0); + } + + void set_destination(address new_destination) { + set_ptr_at(0, (intptr_t)new_destination); + } +}; + +inline bool NativeInstruction::is_trampoline_call() { + // lui dst, imm16 + // ori dst, dst, imm16 + // dsll dst, dst, 16 + // ld target, dst, imm16 + // jalr target + // nop + if ( is_op(Assembler::lui_op) && + is_op(int_at(4), Assembler::ori_op) && + is_special_op(int_at(8), Assembler::dsll_op) && + is_op(int_at(12), Assembler::ld_op) && + is_special_op(int_at(16), Assembler::jalr_op) && + nativeInstruction_at(addr_at(20))->is_nop() ) { + return true; + } + + return false; +} + +inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { + return (NativeCallTrampolineStub*)addr; +} + +#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/registerMap_mips.hpp b/hotspot/src/cpu/mips/vm/registerMap_mips.hpp new file mode 100644 index 00000000000..7f800eb1070 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/registerMap_mips.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP +#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP + +// machine-dependent implemention for register maps + friend class frame; + + private: +#ifndef CORE + // This is the hook for finding a register in an "well-known" location, + // such as a register block of a predetermined format. + // Since there is none, we just return NULL. + // See registerMap_sparc.hpp for an example of grabbing registers + // from register save areas of a standard layout. + address pd_location(VMReg reg) const {return NULL;} +#endif + + // no PD state to clear or copy: + void pd_clear() {} + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp b/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp new file mode 100644 index 00000000000..4af25318346 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/register.hpp" +#include "register_mips.hpp" +#ifdef TARGET_ARCH_MODEL_mips_32 +# include "interp_masm_mips_32.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_mips_64 +# include "interp_masm_mips_64.hpp" +#endif + +REGISTER_DEFINITION(Register, noreg); +REGISTER_DEFINITION(Register, i0); +REGISTER_DEFINITION(Register, i1); +REGISTER_DEFINITION(Register, i2); +REGISTER_DEFINITION(Register, i3); +REGISTER_DEFINITION(Register, i4); +REGISTER_DEFINITION(Register, i5); +REGISTER_DEFINITION(Register, i6); +REGISTER_DEFINITION(Register, i7); +REGISTER_DEFINITION(Register, i8); +REGISTER_DEFINITION(Register, i9); +REGISTER_DEFINITION(Register, i10); +REGISTER_DEFINITION(Register, i11); +REGISTER_DEFINITION(Register, i12); +REGISTER_DEFINITION(Register, i13); +REGISTER_DEFINITION(Register, i14); +REGISTER_DEFINITION(Register, i15); +REGISTER_DEFINITION(Register, i16); +REGISTER_DEFINITION(Register, i17); +REGISTER_DEFINITION(Register, i18); +REGISTER_DEFINITION(Register, i19); +REGISTER_DEFINITION(Register, i20); +REGISTER_DEFINITION(Register, i21); +REGISTER_DEFINITION(Register, i22); +REGISTER_DEFINITION(Register, i23); +REGISTER_DEFINITION(Register, i24); +REGISTER_DEFINITION(Register, i25); +REGISTER_DEFINITION(Register, i26); +REGISTER_DEFINITION(Register, i27); +REGISTER_DEFINITION(Register, i28); +REGISTER_DEFINITION(Register, i29); +REGISTER_DEFINITION(Register, i30); +REGISTER_DEFINITION(Register, i31); + +REGISTER_DEFINITION(FloatRegister, fnoreg); +REGISTER_DEFINITION(FloatRegister, f0); +REGISTER_DEFINITION(FloatRegister, f1); +REGISTER_DEFINITION(FloatRegister, f2); +REGISTER_DEFINITION(FloatRegister, f3); +REGISTER_DEFINITION(FloatRegister, f4); +REGISTER_DEFINITION(FloatRegister, f5); +REGISTER_DEFINITION(FloatRegister, f6); +REGISTER_DEFINITION(FloatRegister, f7); +REGISTER_DEFINITION(FloatRegister, f8); +REGISTER_DEFINITION(FloatRegister, f9); +REGISTER_DEFINITION(FloatRegister, f10); +REGISTER_DEFINITION(FloatRegister, f11); +REGISTER_DEFINITION(FloatRegister, f12); +REGISTER_DEFINITION(FloatRegister, f13); +REGISTER_DEFINITION(FloatRegister, f14); +REGISTER_DEFINITION(FloatRegister, f15); +REGISTER_DEFINITION(FloatRegister, f16); +REGISTER_DEFINITION(FloatRegister, f17); +REGISTER_DEFINITION(FloatRegister, f18); +REGISTER_DEFINITION(FloatRegister, f19); +REGISTER_DEFINITION(FloatRegister, f20); +REGISTER_DEFINITION(FloatRegister, f21); +REGISTER_DEFINITION(FloatRegister, f22); +REGISTER_DEFINITION(FloatRegister, f23); +REGISTER_DEFINITION(FloatRegister, f24); +REGISTER_DEFINITION(FloatRegister, f25); +REGISTER_DEFINITION(FloatRegister, f26); +REGISTER_DEFINITION(FloatRegister, f27); +REGISTER_DEFINITION(FloatRegister, f28); +REGISTER_DEFINITION(FloatRegister, f29); +REGISTER_DEFINITION(FloatRegister, f30); +REGISTER_DEFINITION(FloatRegister, f31); diff --git a/hotspot/src/cpu/mips/vm/register_mips.cpp b/hotspot/src/cpu/mips/vm/register_mips.cpp new file mode 100644 index 00000000000..4a9b22bfef2 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/register_mips.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "register_mips.hpp" + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; +const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + + 2 * FloatRegisterImpl::number_of_registers; + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", + "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", + }; + return is_valid() ? names[encoding()] : "fnoreg"; +} + diff --git a/hotspot/src/cpu/mips/vm/register_mips.hpp b/hotspot/src/cpu/mips/vm/register_mips.hpp new file mode 100644 index 00000000000..88bf2d68cc9 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/register_mips.hpp @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP +#define CPU_MIPS_VM_REGISTER_MIPS_HPP + +#include "asm/register.hpp" +#include "vm_version_mips.hpp" + +class VMRegImpl; +typedef VMRegImpl* VMReg; + +// Use Register as shortcut +class RegisterImpl; +typedef RegisterImpl* Register; + + +// The implementation of integer registers for the mips architecture +inline Register as_Register(int encoding) { + return (Register)(intptr_t) encoding; +} + +class RegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 32 + }; + + // derived registers, offsets, and addresses + Register successor() const { return as_Register(encoding() + 1); } + + // construction + inline friend Register as_Register(int encoding); + + VMReg as_VMReg(); + + // accessors + int encoding() const { assert(is_valid(),err_msg( "invalid register (%d)", (int)(intptr_t)this)); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; +}; + + +// The integer registers of the MIPS32 architecture +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + + +CONSTANT_REGISTER_DECLARATION(Register, i0, (0)); +CONSTANT_REGISTER_DECLARATION(Register, i1, (1)); +CONSTANT_REGISTER_DECLARATION(Register, i2, (2)); +CONSTANT_REGISTER_DECLARATION(Register, i3, (3)); +CONSTANT_REGISTER_DECLARATION(Register, i4, (4)); +CONSTANT_REGISTER_DECLARATION(Register, i5, (5)); +CONSTANT_REGISTER_DECLARATION(Register, i6, (6)); +CONSTANT_REGISTER_DECLARATION(Register, i7, (7)); +CONSTANT_REGISTER_DECLARATION(Register, i8, (8)); +CONSTANT_REGISTER_DECLARATION(Register, i9, (9)); +CONSTANT_REGISTER_DECLARATION(Register, i10, (10)); +CONSTANT_REGISTER_DECLARATION(Register, i11, (11)); +CONSTANT_REGISTER_DECLARATION(Register, i12, (12)); +CONSTANT_REGISTER_DECLARATION(Register, i13, (13)); +CONSTANT_REGISTER_DECLARATION(Register, i14, (14)); +CONSTANT_REGISTER_DECLARATION(Register, i15, (15)); +CONSTANT_REGISTER_DECLARATION(Register, i16, (16)); +CONSTANT_REGISTER_DECLARATION(Register, i17, (17)); +CONSTANT_REGISTER_DECLARATION(Register, i18, (18)); +CONSTANT_REGISTER_DECLARATION(Register, i19, (19)); +CONSTANT_REGISTER_DECLARATION(Register, i20, (20)); +CONSTANT_REGISTER_DECLARATION(Register, i21, (21)); +CONSTANT_REGISTER_DECLARATION(Register, i22, (22)); +CONSTANT_REGISTER_DECLARATION(Register, i23, (23)); +CONSTANT_REGISTER_DECLARATION(Register, i24, (24)); +CONSTANT_REGISTER_DECLARATION(Register, i25, (25)); +CONSTANT_REGISTER_DECLARATION(Register, i26, (26)); +CONSTANT_REGISTER_DECLARATION(Register, i27, (27)); +CONSTANT_REGISTER_DECLARATION(Register, i28, (28)); +CONSTANT_REGISTER_DECLARATION(Register, i29, (29)); +CONSTANT_REGISTER_DECLARATION(Register, i30, (30)); +CONSTANT_REGISTER_DECLARATION(Register, i31, (31)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define NOREG ((Register)(noreg_RegisterEnumValue)) + +#define I0 ((Register)(i0_RegisterEnumValue)) +#define I1 ((Register)(i1_RegisterEnumValue)) +#define I2 ((Register)(i2_RegisterEnumValue)) +#define I3 ((Register)(i3_RegisterEnumValue)) +#define I4 ((Register)(i4_RegisterEnumValue)) +#define I5 ((Register)(i5_RegisterEnumValue)) +#define I6 ((Register)(i6_RegisterEnumValue)) +#define I7 ((Register)(i7_RegisterEnumValue)) +#define I8 ((Register)(i8_RegisterEnumValue)) +#define I9 ((Register)(i9_RegisterEnumValue)) +#define I10 ((Register)(i10_RegisterEnumValue)) +#define I11 ((Register)(i11_RegisterEnumValue)) +#define I12 ((Register)(i12_RegisterEnumValue)) +#define I13 ((Register)(i13_RegisterEnumValue)) +#define I14 ((Register)(i14_RegisterEnumValue)) +#define I15 ((Register)(i15_RegisterEnumValue)) +#define I16 ((Register)(i16_RegisterEnumValue)) +#define I17 ((Register)(i17_RegisterEnumValue)) +#define I18 ((Register)(i18_RegisterEnumValue)) +#define I19 ((Register)(i19_RegisterEnumValue)) +#define I20 ((Register)(i20_RegisterEnumValue)) +#define I21 ((Register)(i21_RegisterEnumValue)) +#define I22 ((Register)(i22_RegisterEnumValue)) +#define I23 ((Register)(i23_RegisterEnumValue)) +#define I24 ((Register)(i24_RegisterEnumValue)) +#define I25 ((Register)(i25_RegisterEnumValue)) +#define I26 ((Register)(i26_RegisterEnumValue)) +#define I27 ((Register)(i27_RegisterEnumValue)) +#define I28 ((Register)(i28_RegisterEnumValue)) +#define I29 ((Register)(i29_RegisterEnumValue)) +#define I30 ((Register)(i30_RegisterEnumValue)) +#define I31 ((Register)(i31_RegisterEnumValue)) + +#define R0 ((Register)(i0_RegisterEnumValue)) +#define AT ((Register)(i1_RegisterEnumValue)) +#define V0 ((Register)(i2_RegisterEnumValue)) +#define V1 ((Register)(i3_RegisterEnumValue)) +#define RA0 ((Register)(i4_RegisterEnumValue)) +#define RA1 ((Register)(i5_RegisterEnumValue)) +#define RA2 ((Register)(i6_RegisterEnumValue)) +#define RA3 ((Register)(i7_RegisterEnumValue)) +#define RA4 ((Register)(i8_RegisterEnumValue)) +#define RA5 ((Register)(i9_RegisterEnumValue)) +#define RA6 ((Register)(i10_RegisterEnumValue)) +#define RA7 ((Register)(i11_RegisterEnumValue)) +#define RT0 ((Register)(i12_RegisterEnumValue)) +#define RT1 ((Register)(i13_RegisterEnumValue)) +#define RT2 ((Register)(i14_RegisterEnumValue)) +#define RT3 ((Register)(i15_RegisterEnumValue)) +#define S0 ((Register)(i16_RegisterEnumValue)) +#define S1 ((Register)(i17_RegisterEnumValue)) +#define S2 ((Register)(i18_RegisterEnumValue)) +#define S3 ((Register)(i19_RegisterEnumValue)) +#define S4 ((Register)(i20_RegisterEnumValue)) +#define S5 ((Register)(i21_RegisterEnumValue)) +#define S6 ((Register)(i22_RegisterEnumValue)) +#define S7 ((Register)(i23_RegisterEnumValue)) +#define RT8 ((Register)(i24_RegisterEnumValue)) +#define RT9 ((Register)(i25_RegisterEnumValue)) +#define K0 ((Register)(i26_RegisterEnumValue)) +#define K1 ((Register)(i27_RegisterEnumValue)) +#define GP ((Register)(i28_RegisterEnumValue)) +#define SP ((Register)(i29_RegisterEnumValue)) +#define FP ((Register)(i30_RegisterEnumValue)) +#define S8 ((Register)(i30_RegisterEnumValue)) +#define RA ((Register)(i31_RegisterEnumValue)) + +#define c_rarg0 RT0 +#define c_rarg1 RT1 +#define Rmethod S3 +#define Rsender S4 +#define Rnext S1 + +/* +#define RT0 T0 +#define RT1 T1 +#define RT2 T2 +#define RT3 T3 +#define RT4 T8 +#define RT5 T9 +*/ + + +//for interpreter frame +// bytecode pointer register +#define BCP S0 +// local variable pointer register +#define LVP S7 +// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM +// be sure to save and restore its value in call_stub +#define TSR S2 + +//OPT_SAFEPOINT not supported yet +#define OPT_SAFEPOINT 1 + +#define OPT_THREAD 1 + +#define TREG S6 + +#define S5_heapbase S5 + +#define mh_SP_save SP + +#define FSR V0 +#define SSR V1 +#define FSF F0 +#define SSF F1 +#define FTF F14 +#define STF F15 + +#define AFT F30 + +#define RECEIVER T0 +#define IC_Klass T1 + +#define SHIFT_count T3 + +#endif // DONT_USE_REGISTER_DEFINES + +// Use FloatRegister as shortcut +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + +inline FloatRegister as_FloatRegister(int encoding) { + return (FloatRegister)(intptr_t) encoding; +} + +// The implementation of floating point registers for the mips architecture +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { + float_arg_base = 12, + number_of_registers = 32 + }; + + // construction + inline friend FloatRegister as_FloatRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; + +}; + +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); + +#ifndef DONT_USE_REGISTER_DEFINES +#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) +#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) +#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) +#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) +#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) +#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) +#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) +#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) +#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) +#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) +#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) +#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) +#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) +#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) +#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) +#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) +#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) +#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) +#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) +#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) +#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) +#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) +#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) +#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) +#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) +#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) +#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) +#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) +#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) +#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) +#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) +#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) +#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) +#endif // DONT_USE_REGISTER_DEFINES + + +const int MIPS_ARGS_IN_REGS_NUM = 4; + +// Need to know the total number of registers of all sorts for SharedInfo. +// Define a class that exports it. +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // A big enough number for C2: all the registers plus flags + // This number must be large enough to cover REG_COUNT (defined by c2) registers. + // There is no requirement that any ordering here matches any ordering c2 gives + // it's optoregs. + number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2 + }; + + static const int max_gpr; + static const int max_fpr; +}; + +#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp b/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp new file mode 100644 index 00000000000..cae43b2d96b --- /dev/null +++ b/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp @@ -0,0 +1,156 @@ +/* + * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/relocInfo.hpp" +#include "compiler/disassembler.hpp" +#include "nativeInst_mips.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/safepoint.hpp" + + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + x += o; + typedef Assembler::WhichOperand WhichOperand; + WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop + assert(which == Assembler::disp32_operand || + which == Assembler::narrow_oop_operand || + which == Assembler::imm_operand, "format unpacks ok"); + if (which == Assembler::imm_operand) { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); + } + } else if (which == Assembler::narrow_oop_operand) { + // both compressed oops and compressed classes look the same + if (Universe::heap()->is_in_reserved((oop)x)) { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)oopDesc::encode_heap_oop((oop)x), "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(oopDesc::encode_heap_oop((oop)x)), (intptr_t)(x)); + } + } else { + if (verify_only) { + assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); + } else { + nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); + } + } + } else { + // Note: Use runtime_call_type relocations for call32_operand. + assert(0, "call32_operand not supported in MIPS64"); + } +} + + +//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target, +//Maybe We should FORGET CALL RELOCATION +address Relocation::pd_call_destination(address orig_addr) { + intptr_t adj = 0; + NativeInstruction* ni = nativeInstruction_at(addr()); + if (ni->is_call()) { + if (!ni->is_trampoline_call()) { + return nativeCall_at(addr())->target_addr_for_insn(); + } else { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline) { + return nativeCallTrampolineStub_at(trampoline)->destination(); + } else { + return (address) -1; + } + } + } else if (ni->is_jump()) { + return nativeGeneralJump_at(addr())->jump_destination() + adj; + } else if (ni->is_cond_jump()) { + return nativeCondJump_at(addr())->jump_destination() +adj; + } else { + tty->print_cr("\nError!\ncall destination: 0x%lx", p2i(addr())); + Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty); + ShouldNotReachHere(); + return NULL; + } +} + + +void Relocation::pd_set_call_destination(address x) { + NativeInstruction* ni = nativeInstruction_at(addr()); + if (ni->is_call()) { + NativeCall* call = nativeCall_at(addr()); + if (!ni->is_trampoline_call()) { + call->set_destination(x); + } else { + address trampoline_stub_addr = call->get_trampoline(); + if (trampoline_stub_addr != NULL) { + address orig = call->target_addr_for_insn(); + if (orig != trampoline_stub_addr) { + call->patch_on_trampoline(trampoline_stub_addr); + } + call->set_destination_mt_safe(x, false); + } + } + } else if (ni->is_jump()) + nativeGeneralJump_at(addr())->set_jump_destination(x); + else if (ni->is_cond_jump()) + nativeCondJump_at(addr())->set_jump_destination(x); + else + { ShouldNotReachHere(); } + + // Unresolved jumps are recognized by a destination of -1 + // However 64bit can't actually produce such an address + // and encodes a jump to self but jump_destination will + // return a -1 as the signal. We must not relocate this + // jmp or the ic code will not see it as unresolved. +} + + +address* Relocation::pd_address_in_code() { + return (address*)addr(); +} + + +address Relocation::pd_get_address_from_code() { + NativeMovConstReg* ni = nativeMovConstReg_at(addr()); + return (address)ni->data(); +} + + + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + +void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { + address target =0; + NativeMovConstReg* ni = nativeMovConstReg_at(addr()); + target = new_addr_for((address)ni->data(), src, dest); + ni->set_data((intptr_t)target); +} + +void metadata_Relocation::pd_fix_value(address x) { +} diff --git a/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp b/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp new file mode 100644 index 00000000000..04ad5dac96c --- /dev/null +++ b/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP +#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP + + // machine-dependent parts of class relocInfo + private: + enum { + // Since MIPS instructions are whole words, + // the two low-order offset bits can always be discarded. + offset_unit = 4, + + // imm_oop_operand vs. narrow_oop_operand + format_width = 2 + }; + +#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp b/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp new file mode 100644 index 00000000000..bb9269b423f --- /dev/null +++ b/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#ifdef COMPILER2 +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "classfile/systemDictionary.hpp" +#include "code/vmreg.hpp" +#include "interpreter/interpreter.hpp" +#include "opto/runtime.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/globalDefinitions.hpp" +#include "vmreg_mips.inline.hpp" +#endif + +#define __ masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +//-------------- generate_exception_blob ----------- +// creates _exception_blob. +// The exception blob is jumped to from a compiled method. +// (see emit_exception_handler in sparc.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers) unwind the frame and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a jump, and left with a jump. +// +// Arguments: +// V0: exception oop +// V1: exception pc +// +// Results: +// A0: exception oop +// A1: exception pc in caller or ??? +// jumps to: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) +// +// [stubGenerator_mips.cpp] generate_forward_exception() +// |- V0, V1 are created +// |- T9 <= SharedRuntime::exception_handler_for_return_address +// `- jr T9 +// `- the caller's exception_handler +// `- jr OptoRuntime::exception_blob +// `- here +// +void OptoRuntime::generate_exception_blob() { + // Capture info about frame layout + enum layout { + fp_off, + return_off, // slot for return address + framesize + }; + + // allocate space for the code + ResourceMark rm; + // setup code generation tools + CodeBuffer buffer("exception_blob", 5120, 5120); + MacroAssembler* masm = new MacroAssembler(&buffer); + + + address start = __ pc(); + + __ daddiu(SP, SP, -1 * framesize * wordSize); // Prolog! + + // this frame will be treated as the original caller method. + // So, the return pc should be filled with the original exception pc. + // ref: X86's implementation + __ sd(V1, SP, return_off *wordSize); // return address + __ sd(FP, SP, fp_off *wordSize); + + // Save callee saved registers. None for UseSSE=0, + // floats-only for UseSSE=1, and doubles for UseSSE=2. + + __ daddiu(FP, SP, fp_off * wordSize); + + // Store exception in Thread object. We cannot pass any arguments to the + // handle_exception call, since we do not want to make any assumption + // about the size of the frame where the exception happened in. + Register thread = TREG; + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + __ sd(V0, Address(thread, JavaThread::exception_oop_offset())); + __ sd(V1, Address(thread, JavaThread::exception_pc_offset())); + + // This call does all the hard work. It checks if an exception handler + // exists in the method. + // If so, it returns the handler address. + // If not, it prepares for stack-unwinding, restoring the callee-save + // registers of the frame being removed. + __ set_last_Java_frame(thread, NOREG, NOREG, NULL); + + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + __ relocate(relocInfo::internal_pc_type); + + { + long save_pc = (long)__ pc() + 48; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); + + __ move(A0, thread); + __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C); + __ jalr(T9); + __ delayed()->nop(); + + // Set an oopmap for the call site + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = new OopMap( framesize, 0 ); + + oop_maps->add_gc_map( __ offset(), map); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(thread, true); + + // Pop self-frame. + __ leave(); // Epilog! + + // V0: exception handler + + // We have a handler in V0, (could be deopt blob) + __ move(T9, V0); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // Get the exception + __ ld(A0, Address(thread, JavaThread::exception_oop_offset())); + // Get the exception pc in case we are deoptimized + __ ld(A1, Address(thread, JavaThread::exception_pc_offset())); +#ifdef ASSERT + __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset())); + __ sd(R0, Address(thread, JavaThread::exception_pc_offset())); +#endif + // Clear the exception oop so GC no longer processes it as a root. + __ sd(R0, Address(thread, JavaThread::exception_oop_offset())); + + // Fix seg fault when running: + // Eclipse + Plugin + Debug As + // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() + // + __ move(V0, A0); + __ move(V1, A1); + + // V0: exception oop + // T9: exception handler + // A1: exception pc + __ jr(T9); + __ delayed()->nop(); + + // make sure all code is generated + masm->flush(); + + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); +} diff --git a/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp b/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp new file mode 100644 index 00000000000..daf04c44229 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp @@ -0,0 +1,3816 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/compiledICHolder.hpp" +#include "prims/jvmtiRedefineClassesTrace.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_mips.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +#include + +#define __ masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; + +class RegisterSaver { + enum { FPU_regs_live = 32 }; + // Capture info about frame layout + enum layout { +#define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, + DEF_LAYOUT_OFFS(for_16_bytes_aligned) + DEF_LAYOUT_OFFS(fpr0) + DEF_LAYOUT_OFFS(fpr1) + DEF_LAYOUT_OFFS(fpr2) + DEF_LAYOUT_OFFS(fpr3) + DEF_LAYOUT_OFFS(fpr4) + DEF_LAYOUT_OFFS(fpr5) + DEF_LAYOUT_OFFS(fpr6) + DEF_LAYOUT_OFFS(fpr7) + DEF_LAYOUT_OFFS(fpr8) + DEF_LAYOUT_OFFS(fpr9) + DEF_LAYOUT_OFFS(fpr10) + DEF_LAYOUT_OFFS(fpr11) + DEF_LAYOUT_OFFS(fpr12) + DEF_LAYOUT_OFFS(fpr13) + DEF_LAYOUT_OFFS(fpr14) + DEF_LAYOUT_OFFS(fpr15) + DEF_LAYOUT_OFFS(fpr16) + DEF_LAYOUT_OFFS(fpr17) + DEF_LAYOUT_OFFS(fpr18) + DEF_LAYOUT_OFFS(fpr19) + DEF_LAYOUT_OFFS(fpr20) + DEF_LAYOUT_OFFS(fpr21) + DEF_LAYOUT_OFFS(fpr22) + DEF_LAYOUT_OFFS(fpr23) + DEF_LAYOUT_OFFS(fpr24) + DEF_LAYOUT_OFFS(fpr25) + DEF_LAYOUT_OFFS(fpr26) + DEF_LAYOUT_OFFS(fpr27) + DEF_LAYOUT_OFFS(fpr28) + DEF_LAYOUT_OFFS(fpr29) + DEF_LAYOUT_OFFS(fpr30) + DEF_LAYOUT_OFFS(fpr31) + + DEF_LAYOUT_OFFS(v0) + DEF_LAYOUT_OFFS(v1) + DEF_LAYOUT_OFFS(a0) + DEF_LAYOUT_OFFS(a1) + DEF_LAYOUT_OFFS(a2) + DEF_LAYOUT_OFFS(a3) + DEF_LAYOUT_OFFS(a4) + DEF_LAYOUT_OFFS(a5) + DEF_LAYOUT_OFFS(a6) + DEF_LAYOUT_OFFS(a7) + DEF_LAYOUT_OFFS(t0) + DEF_LAYOUT_OFFS(t1) + DEF_LAYOUT_OFFS(t2) + DEF_LAYOUT_OFFS(t3) + DEF_LAYOUT_OFFS(s0) + DEF_LAYOUT_OFFS(s1) + DEF_LAYOUT_OFFS(s2) + DEF_LAYOUT_OFFS(s3) + DEF_LAYOUT_OFFS(s4) + DEF_LAYOUT_OFFS(s5) + DEF_LAYOUT_OFFS(s6) + DEF_LAYOUT_OFFS(s7) + DEF_LAYOUT_OFFS(t8) + DEF_LAYOUT_OFFS(t9) + + DEF_LAYOUT_OFFS(gp) + DEF_LAYOUT_OFFS(fp) + DEF_LAYOUT_OFFS(return) + reg_save_size + }; + + public: + + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); + static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); + static int raOffset(void) { return return_off / 2; } + //Rmethod + static int methodOffset(void) { return s3_off / 2; } + + static int v0Offset(void) { return v0_off / 2; } + static int v1Offset(void) { return v1_off / 2; } + + static int fpResultOffset(void) { return fpr0_off / 2; } + + // During deoptimization only the result register need to be restored + // all the other values have already been extracted. + static void restore_result_registers(MacroAssembler* masm); +}; + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) { + + // Always make the frame size 16-byte aligned + int frame_size_in_bytes = round_to(additional_frame_words*wordSize + + reg_save_size*BytesPerInt, 16); + // OopMap frame size is in compiler stack slots (jint's) not bytes or words + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; + // The caller will allocate additional_frame_words + int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; + // CodeBlob frame size is in words. + int frame_size_in_words = frame_size_in_bytes / wordSize; + *total_frame_words = frame_size_in_words; + + // save registers + + __ daddiu(SP, SP, - reg_save_size * jintSize); + + __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize); + __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize); + __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize); + __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); + __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); + __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); + __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); + __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); + __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); + __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); + __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); + __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); + __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); + __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); + __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); + __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); + __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); + __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); + __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); + __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); + __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); + __ sd(T0, SP, t0_off * jintSize); + __ sd(T1, SP, t1_off * jintSize); + __ sd(T2, SP, t2_off * jintSize); + __ sd(T3, SP, t3_off * jintSize); + __ sd(S0, SP, s0_off * jintSize); + __ sd(S1, SP, s1_off * jintSize); + __ sd(S2, SP, s2_off * jintSize); + __ sd(S3, SP, s3_off * jintSize); + __ sd(S4, SP, s4_off * jintSize); + __ sd(S5, SP, s5_off * jintSize); + __ sd(S6, SP, s6_off * jintSize); + __ sd(S7, SP, s7_off * jintSize); + + __ sd(T8, SP, t8_off * jintSize); + __ sd(T9, SP, t9_off * jintSize); + + __ sd(GP, SP, gp_off * jintSize); + __ sd(FP, SP, fp_off * jintSize); + __ sd(RA, SP, return_off * jintSize); + __ daddiu(FP, SP, fp_off * jintSize); + + OopMapSet *oop_maps = new OopMapSet(); + //OopMap* map = new OopMap( frame_words, 0 ); + OopMap* map = new OopMap( frame_size_in_slots, 0 ); + + +//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) +#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) + map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg()); + + map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg()); + map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg()); + +#undef STACK_OFFSET + return map; +} + + +// Pop the current frame and restore all the registers that we +// saved. +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { + __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize); + __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize); + __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize); + __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); + __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); + __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); + __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); + __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); + __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); + __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); + __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); + __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); + __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); + __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); + __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); + __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); + + __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); + __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); + __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); + __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); + __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); + __ ld(T0, SP, t0_off * jintSize); + __ ld(T1, SP, t1_off * jintSize); + __ ld(T2, SP, t2_off * jintSize); + __ ld(T3, SP, t3_off * jintSize); + __ ld(S0, SP, s0_off * jintSize); + __ ld(S1, SP, s1_off * jintSize); + __ ld(S2, SP, s2_off * jintSize); + __ ld(S3, SP, s3_off * jintSize); + __ ld(S4, SP, s4_off * jintSize); + __ ld(S5, SP, s5_off * jintSize); + __ ld(S6, SP, s6_off * jintSize); + __ ld(S7, SP, s7_off * jintSize); + + __ ld(T8, SP, t8_off * jintSize); + __ ld(T9, SP, t9_off * jintSize); + + __ ld(GP, SP, gp_off * jintSize); + __ ld(FP, SP, fp_off * jintSize); + __ ld(RA, SP, return_off * jintSize); + + __ addiu(SP, SP, reg_save_size * jintSize); +} + +// Pop the current frame and restore the registers that might be holding +// a result. +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { + + // Just restore result register. Only used by deoptimization. By + // now any callee save register that needs to be restore to a c2 + // caller of the deoptee has been extracted into the vframeArray + // and will be stuffed into the c2i adapter we create for later + // restoration so only result registers need to be restored here. + + __ ld(V0, SP, v0_off * jintSize); + __ ld(V1, SP, v1_off * jintSize); + __ ldc1(F0, SP, fpr0_off * jintSize); + __ ldc1(F1, SP, fpr1_off * jintSize); + __ addiu(SP, SP, return_off * jintSize); +} + +// Is vector's size (in bytes) bigger than a size saved by default? +// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. +bool SharedRuntime::is_wide_vector(int size) { + return size > 16; +} + +// The java_calling_convention describes stack locations as ideal slots on +// a frame with no abi restrictions. Since we must observe abi restrictions +// (like the placement of the register window) the slots must be biased by +// the following value. + +static int reg2offset_in(VMReg r) { + // Account for saved fp and return address + // This should really be in_preserve_stack_slots + return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); +} + +static int reg2offset_out(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte +// quantities. Values less than SharedInfo::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer +// as framesizes are fixed. +// VMRegImpl::stack0 refers to the first slot 0(sp). +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register +// up to RegisterImpl::number_of_registers) are the 32-bit +// integer registers. + +// Pass first five oop/int args in registers T0, A0 - A3. +// Pass float/double/long args in stack. +// Doubles have precedence, so if you pass a mix of floats and doubles +// the doubles will grab the registers before the floats will. + +// Note: the INPUTS in sig_bt are in units of Java argument words, which are +// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit +// units regardless of build. + + +// --------------------------------------------------------------------------- +// The compiled Java calling convention. +// Pass first five oop/int args in registers T0, A0 - A3. +// Pass float/double/long args in stack. +// Doubles have precedence, so if you pass a mix of floats and doubles +// the doubles will grab the registers before the floats will. + +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed, + int is_outgoing) { + + // Create the mapping between argument positions and registers. + static const Register INT_ArgReg[Argument::n_register_parameters] = { + T0, A0, A1, A2, A3, A4, A5, A6 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { + F12, F13, F14, F15, F16, F17, F18, F19 + }; + + uint args = 0; + uint stk_args = 0; // inc by 2 each time + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (args < Argument::n_register_parameters) { + regs[i].set1(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + if (args < Argument::n_register_parameters) { + regs[i].set2(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (args < Argument::n_float_register_parameters) { + regs[i].set1(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (args < Argument::n_float_register_parameters) { + regs[i].set2(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return round_to(stk_args, 2); +} + +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm) { + Label L; + __ verify_oop(Rmethod); + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + // Schedule the branch target address early. + // Call into the VM to patch the caller, then jump to compiled callee + // V0 isn't live so capture return address while we easily can + __ move(V0, RA); + + __ pushad(); +#ifdef COMPILER2 + // C2 may leave the stack dirty if not in SSE2+ mode + __ empty_FPU_stack(); +#endif + + // VM needs caller's callsite + // VM needs target method + + __ move(A0, Rmethod); + __ move(A1, V0); + // we should preserve the return address + __ verify_oop(Rmethod); + __ move(S0, SP); + __ move(AT, -(StackAlignmentInBytes)); // align the stack + __ andr(SP, SP, AT); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), + relocInfo::runtime_call_type); + + __ delayed()->nop(); + __ move(SP, S0); + __ popad(); + __ bind(L); +} + +static void gen_c2i_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + Label& skip_fixup) { + + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + // However we will run interpreted if we come thru here. The next pass + // thru the call site will run compiled. If we ran compiled here then + // we can (theorectically) do endless i2c->c2i->i2c transitions during + // deopt/uncommon trap cycles. If we always go interpreted here then + // we can have at most one and don't need to play any tricks to keep + // from endlessly growing the stack. + // + // Actually if we detected that we had an i2c->c2i transition here we + // ought to be able to reset the world back to the state of the interpreted + // call and not bother building another interpreter arg area. We don't + // do that at this point. + + patch_callers_callsite(masm); + __ bind(skip_fixup); + +#ifdef COMPILER2 + __ empty_FPU_stack(); +#endif + //this is for native ? + // Since all args are passed on the stack, total_args_passed * interpreter_ + // stack_element_size is the + // space we need. + int extraspace = total_args_passed * Interpreter::stackElementSize; + + // stack is aligned, keep it that way + extraspace = round_to(extraspace, 2*wordSize); + + // Get return address + __ move(V0, RA); + // set senderSP value + //refer to interpreter_mips.cpp:generate_asm_entry + __ move(Rsender, SP); + __ addiu(SP, SP, -extraspace); + + // Now write the args into the outgoing interpreter space + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // st_off points to lowest address on stack. + int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; + // Say 4 args: + // i st_off + // 0 12 T_LONG + // 1 8 T_VOID + // 2 4 T_OBJECT + // 3 0 T_BOOL + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // memory to memory use fpu stack top + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + if (!r_2->is_valid()) { + __ ld_ptr(AT, SP, ld_off); + __ st_ptr(AT, SP, st_off); + + } else { + + + int next_off = st_off - Interpreter::stackElementSize; + __ ld_ptr(AT, SP, ld_off); + __ st_ptr(AT, SP, st_off); + + // Ref to is_Register condition + if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) + __ st_ptr(AT, SP, st_off - 8); + } + } else if (r_1->is_Register()) { + Register r = r_1->as_Register(); + if (!r_2->is_valid()) { + __ sd(r, SP, st_off); + } else { + //FIXME, mips will not enter here + // long/double in gpr + __ sd(r, SP, st_off); + // In [java/util/zip/ZipFile.java] + // + // private static native long open(String name, int mode, long lastModified); + // private static native int getTotal(long jzfile); + // + // We need to transfer T_LONG paramenters from a compiled method to a native method. + // It's a complex process: + // + // Caller -> lir_static_call -> gen_resolve_stub + // -> -- resolve_static_call_C + // `- gen_c2i_adapter() [*] + // | + // `- AdapterHandlerLibrary::get_create_apapter_index + // -> generate_native_entry + // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] + // + // In [**], T_Long parameter is stored in stack as: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | | + // (low) + // + // However, the sequence is reversed here: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | | + // (low) + // + // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). + // + if (sig_bt[i] == T_LONG) + __ sd(r, SP, st_off - 8); + } + } else if (r_1->is_FloatRegister()) { + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); + + FloatRegister fr = r_1->as_FloatRegister(); + if (sig_bt[i] == T_FLOAT) + __ swc1(fr, SP, st_off); + else { + __ sdc1(fr, SP, st_off); + __ sdc1(fr, SP, st_off - 8); // T_DOUBLE needs two slots + } + } + } + + // Schedule the branch target address early. + __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); + // And repush original return address + __ move(RA, V0); + __ jr (AT); + __ delayed()->nop(); +} + +static void gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + + // Generate an I2C adapter: adjust the I-frame to make space for the C-frame + // layout. Lesp was saved by the calling I-frame and will be restored on + // return. Meanwhile, outgoing arg space is all owned by the callee + // C-frame, so we can mangle it at will. After adjusting the frame size, + // hoist register arguments and repack other args according to the compiled + // code convention. Finally, end in a jump to the compiled code. The entry + // point address is the start of the buffer. + + // We will only enter here from an interpreted frame and never from after + // passing thru a c2i. Azul allowed this but we do not. If we lose the + // race and use a c2i we will remain interpreted for the race loser(s). + // This removes all sorts of headaches on the mips side and also eliminates + // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. + + + __ move(T9, SP); + + // Cut-out for having no stack args. Since up to 2 int/oop args are passed + // in registers, we will occasionally have no stack args. + int comp_words_on_stack = 0; + if (comp_args_on_stack) { + // Sig words on the stack are greater-than VMRegImpl::stack0. Those in + // registers are below. By subtracting stack0, we either get a negative + // number (all values in registers) or the maximum stack slot accessed. + // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); + // Convert 4-byte stack slots to words. + comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; + // Round up to miminum stack alignment, in wordSize + comp_words_on_stack = round_to(comp_words_on_stack, 2); + __ daddiu(SP, SP, -comp_words_on_stack * wordSize); + } + + // Align the outgoing SP + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + // push the return address on the stack (note that pushing, rather + // than storing it, yields the correct frame alignment for the callee) + // Put saved SP in another register + const Register saved_sp = V0; + __ move(saved_sp, T9); + + + // Will jump to the compiled code just as if compiled code was doing it. + // Pre-load the register-jump target early, to schedule it better. + __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset())); + + // Now generate the shuffle code. Pick up all register args and move the + // rest through the floating point stack top. + for (int i = 0; i < total_args_passed; i++) { + if (sig_bt[i] == T_VOID) { + // Longs and doubles are passed in native word order, but misaligned + // in the 32-bit build. + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from SP+offset. + + //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); + // Load in argument order going down. + int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; + // Point to interpreter value (vs. tag) + int next_off = ld_off - Interpreter::stackElementSize; + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + if (r_1->is_stack()) { + // Convert stack slot to an SP offset (+ wordSize to + // account for return address ) + // NOTICE HERE!!!! I sub a wordSize here + int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; + //+ wordSize; + + if (!r_2->is_valid()) { + __ ld(AT, saved_sp, ld_off); + __ sd(AT, SP, st_off); + } else { + // Interpreter local[n] == MSW, local[n+1] == LSW however locals + // are accessed as negative so LSW is at LOW address + + // ld_off is MSW so get LSW + // st_off is LSW (i.e. reg.first()) + + // [./org/eclipse/swt/graphics/GC.java] + // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, + // int destX, int destY, int destWidth, int destHeight, + // boolean simple, + // int imgWidth, int imgHeight, + // long maskPixmap, <-- Pass T_LONG in stack + // int maskType); + // Before this modification, Eclipse displays icons with solid black background. + // + __ ld(AT, saved_sp, ld_off); + if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) + __ ld(AT, saved_sp, ld_off - 8); + __ sd(AT, SP, st_off); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + if (r_2->is_valid()) { + // Remember r_1 is low address (and LSB on mips) + // So r_2 gets loaded from high address regardless of the platform + assert(r_2->as_Register() == r_1->as_Register(), ""); + __ ld(r, saved_sp, ld_off); + + // + // For T_LONG type, the real layout is as below: + // + // (high) + // | | + // ----------- + // | 8 bytes | + // | (void) | + // ----------- + // | 8 bytes | + // | (long) | + // ----------- + // | | + // (low) + // + // We should load the low-8 bytes. + // + if (sig_bt[i] == T_LONG) + __ ld(r, saved_sp, ld_off - 8); + } else { + __ lw(r, saved_sp, ld_off); + } + } else if (r_1->is_FloatRegister()) { // Float Register + assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); + + FloatRegister fr = r_1->as_FloatRegister(); + if (sig_bt[i] == T_FLOAT) + __ lwc1(fr, saved_sp, ld_off); + else { + __ ldc1(fr, saved_sp, ld_off); + __ ldc1(fr, saved_sp, ld_off - 8); + } + } + } + + // 6243940 We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find there should this case occur. + __ get_thread(T8); + __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset())); + + // move methodOop to V0 in case we end up in an c2i adapter. + // the c2i adapters expect methodOop in V0 (c2) because c2's + // resolve stubs return the result (the method) in V0. + // I'd love to fix this. + __ move(V0, Rmethod); + __ jr(T9); + __ delayed()->nop(); +} + +// --------------------------------------------------------------- +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) { + address i2c_entry = __ pc(); + + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + // ------------------------------------------------------------------------- + // Generate a C2I adapter. On entry we know G5 holds the methodOop. The + // args start out packed in the compiled layout. They need to be unpacked + // into the interpreter layout. This will almost always require some stack + // space. We grow the current (compiled) stack, then repack the args. We + // finally end in a jump to the generic interpreter entry point. On exit + // from the interpreter, the interpreter will restore our SP (lest the + // compiled code, which relys solely on SP and not FP, get sick). + + address c2i_unverified_entry = __ pc(); + Label skip_fixup; + { + Register holder = T1; + Register receiver = T0; + Register temp = T8; + address ic_miss = SharedRuntime::get_ic_miss_stub(); + + Label missed; + + __ verify_oop(holder); + //add for compressedoops + __ load_klass(temp, receiver); + __ verify_oop(temp); + + __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); + __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); + __ bne(AT, temp, missed); + __ delayed()->nop(); + // Method might have been compiled since the call site was patched to + // interpreted if that is the case treat it as a miss so we can get + // the call site corrected. + __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); + __ beq(AT, R0, skip_fixup); + __ delayed()->nop(); + __ bind(missed); + + __ jmp(ic_miss, relocInfo::runtime_call_type); + __ delayed()->nop(); + } + + address c2i_entry = __ pc(); + + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); +} + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "not needed on MIPS"); + // Return the number of VMReg stack_slots needed for the args. + // This value does not include an abi space (like register window + // save area). + + // We return the amount of VMReg stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. Since we always + // have space for storing at least 6 registers to memory we start with that. + // See int_stk_helper for a further discussion. + // We return the amount of VMRegImpl stack slots we need to reserve for all + // the arguments NOT counting out_preserve_stack_slots. + static const Register INT_ArgReg[Argument::n_register_parameters] = { + A0, A1, A2, A3, A4, A5, A6, A7 + }; + static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { + F12, F13, F14, F15, F16, F17, F18, F19 + }; + uint args = 0; + uint stk_args = 0; // inc by 2 each time + +// Example: +// n java.lang.UNIXProcess::forkAndExec +// private native int forkAndExec(byte[] prog, +// byte[] argBlock, int argc, +// byte[] envBlock, int envc, +// byte[] dir, +// boolean redirectErrorStream, +// FileDescriptor stdin_fd, +// FileDescriptor stdout_fd, +// FileDescriptor stderr_fd) +// JNIEXPORT jint JNICALL +// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, +// jobject process, +// jbyteArray prog, +// jbyteArray argBlock, jint argc, +// jbyteArray envBlock, jint envc, +// jbyteArray dir, +// jboolean redirectErrorStream, +// jobject stdin_fd, +// jobject stdout_fd, +// jobject stderr_fd) +// +// ::c_calling_convention +// 0: // env <-- a0 +// 1: L // klass/obj <-- t0 => a1 +// 2: [ // prog[] <-- a0 => a2 +// 3: [ // argBlock[] <-- a1 => a3 +// 4: I // argc <-- a2 => a4 +// 5: [ // envBlock[] <-- a3 => a5 +// 6: I // envc <-- a4 => a5 +// 7: [ // dir[] <-- a5 => a7 +// 8: Z // redirectErrorStream <-- a6 => sp[0] +// 9: L // stdin fp[16] => sp[8] +// 10: L // stdout fp[24] => sp[16] +// 11: L // stderr fp[32] => sp[24] +// + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_VOID: // Halves of longs and doubles + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (args < Argument::n_register_parameters) { + regs[i].set1(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + if (args < Argument::n_register_parameters) { + regs[i].set2(INT_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_FLOAT: + if (args < Argument::n_float_register_parameters) { + regs[i].set1(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (args < Argument::n_float_register_parameters) { + regs[i].set2(FP_ArgReg[args++]->as_VMReg()); + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return round_to(stk_args, 2); +} + +// --------------------------------------------------------------------------- +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ swc1(FSF, FP, -wordSize); + break; + case T_DOUBLE: + __ sdc1(FSF, FP, -wordSize ); + break; + case T_VOID: break; + case T_LONG: + __ sd(V0, FP, -wordSize); + break; + case T_OBJECT: + case T_ARRAY: + __ sd(V0, FP, -wordSize); + break; + default: { + __ sw(V0, FP, -wordSize); + } + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_FLOAT: + __ lwc1(FSF, FP, -wordSize); + break; + case T_DOUBLE: + __ ldc1(FSF, FP, -wordSize ); + break; + case T_LONG: + __ ld(V0, FP, -wordSize); + break; + case T_VOID: break; + case T_OBJECT: + case T_ARRAY: + __ ld(V0, FP, -wordSize); + break; + default: { + __ lw(V0, FP, -wordSize); + } + } +} + +static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + __ push(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ push(args[i].first()->as_FloatRegister()); + } + } +} + +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { + if (args[i].first()->is_Register()) { + __ pop(args[i].first()->as_Register()); + } else if (args[i].first()->is_FloatRegister()) { + __ pop(args[i].first()->as_FloatRegister()); + } + } +} + +// A simple move of integer like type +static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ lw(AT, FP, reg2offset_in(src.first())); + __ sd(AT, SP, reg2offset_out(dst.first())); + } else { + // stack to reg + __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); + } else { + if (dst.first() != src.first()){ + __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() + } + } +} + +// An oop arg. Must pass a handle not the oop itself +static void object_move(MacroAssembler* masm, + OopMap* map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int* receiver_offset) { + + // must pass a handle. First figure out the location we use as a handle + + //FIXME, for mips, dst can be register + if (src.first()->is_stack()) { + // Oop is already on the stack as an argument + Register rHandle = V0; + Label nil; + __ xorr(rHandle, rHandle, rHandle); + __ ld(AT, FP, reg2offset_in(src.first())); + __ beq(AT, R0, nil); + __ delayed()->nop(); + __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); + __ bind(nil); + if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); + else __ move( (dst.first())->as_Register(), rHandle); + //if dst is register + //FIXME, do mips need out preserve stack slots? + int offset_in_older_frame = src.first()->reg2stack() + + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + if (is_receiver) { + *receiver_offset = (offset_in_older_frame + + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + } else { + // Oop is in an a register we must store it to the space we reserve + // on the stack for oop_handles + const Register rOop = src.first()->as_Register(); + assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); + const Register rHandle = V0; + //Important: refer to java_calling_convertion + int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; + int offset = oop_slot*VMRegImpl::stack_slot_size; + Label skip; + __ sd( rOop , SP, offset ); + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + __ xorr( rHandle, rHandle, rHandle); + __ beq(rOop, R0, skip); + __ delayed()->nop(); + __ lea(rHandle, Address(SP, offset)); + __ bind(skip); + // Store the handle parameter + if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); + else __ move((dst.first())->as_Register(), rHandle); + //if dst is register + + if (is_receiver) { + *receiver_offset = offset; + } + } +} + +// A float arg may have to do float reg int reg conversion +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); + + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + __ lw(AT, FP, reg2offset_in(src.first())); + __ sw(AT, SP, reg2offset_out(dst.first())); + } + else + __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); + } else { + // reg to stack + if(dst.first()->is_stack()) + __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); + else + __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } +} + +// A long move +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + + // The only legal possibility for a long_move VMRegPair is: + // 1: two stack slots (possibly unaligned) + // as neither the java or C calling convention will use registers + // for longs. + + if (src.first()->is_stack()) { + assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); + if( dst.first()->is_stack()){ + __ ld(AT, FP, reg2offset_in(src.first())); + __ sd(AT, SP, reg2offset_out(dst.first())); + } else { + __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); + } + } else { + if( dst.first()->is_stack()){ + __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); + } else { + __ move( (dst.first())->as_Register() , (src.first())->as_Register()); + } + } +} + +// A double move +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + + // The only legal possibilities for a double_move VMRegPair are: + // The painful thing here is that like long_move a VMRegPair might be + + // Because of the calling convention we know that src is either + // 1: a single physical register (xmm registers only) + // 2: two stack slots (possibly unaligned) + // dst can only be a pair of stack slots. + + + if (src.first()->is_stack()) { + // source is all stack + if( dst.first()->is_stack()){ + __ ld(AT, FP, reg2offset_in(src.first())); + __ sd(AT, SP, reg2offset_out(dst.first())); + } else { + __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); + } + + } else { + // reg to stack + // No worries about stack alignment + if( dst.first()->is_stack()){ + __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); + } + else + __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + + } +} + +static void verify_oop_args(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + Register temp_reg = T9; // not part of any compiled calling seq + if (VerifyOops) { + for (int i = 0; i < method->size_of_parameters(); i++) { + if (sig_bt[i] == T_OBJECT || + sig_bt[i] == T_ARRAY) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + if (r->is_stack()) { + __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); + __ verify_oop(temp_reg); + } else { + __ verify_oop(r->as_Register()); + } + } + } + } +} + +static void gen_special_dispatch(MacroAssembler* masm, + methodHandle method, + const BasicType* sig_bt, + const VMRegPair* regs) { + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); + + // Now write the args into the outgoing interpreter space + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); + if (ref_kind != 0) { + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = S3; // known to be free at this point + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { + fatal(err_msg_res("unexpected intrinsic id %d", iid)); + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); + VMReg r = regs[member_arg_pos].first(); + if (r->is_stack()) { + __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(method->size_of_parameters() > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + fatal("receiver always in a register"); + receiver_reg = SSR; // known to be free at this point + __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, iid, + receiver_reg, member_reg, /*for_compiler_entry:*/ true); +} + +// --------------------------------------------------------------------------- +// Generate a native wrapper for a given method. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// convention (handlizes oops, etc), transitions to native, makes the call, +// returns to java state (possibly blocking), unhandlizes any result and +// returns. +nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + methodHandle method, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, + BasicType ret_type) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); + int vep_offset = ((intptr_t)__ pc()) - start; + // Make enough room for patch_verified_entry + __ nop(); + __ nop(); + gen_special_dispatch(masm, + method, + in_sig_bt, + in_regs); + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period + __ flush(); + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet*)NULL); + } + bool is_critical_native = true; + address native_func = method->critical_native_function(); + if (native_func == NULL) { + native_func = method->native_function(); + is_critical_native = false; + } + assert(native_func != NULL, "must have function"); + + // Native nmethod wrappers never take possesion of the oop arguments. + // So the caller will gc the arguments. The only thing we need an + // oopMap for is if the call is static + // + // An OopMap for lock (and class if static), and one for the VM call itself + OopMapSet *oop_maps = new OopMapSet(); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); + int total_c_args = total_in_args; + if (!is_critical_native) { + total_c_args += 1; + if (method->is_static()) { + total_c_args++; + } + } else { + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + total_c_args++; + } + } + } + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + int argc = 0; + if (!is_critical_native) { + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } else { + Thread* THREAD = Thread::current(); + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); + SignatureStream ss(method->signature()); + for (int i = 0; i < total_in_args ; i++ ) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as int, elem* pair + out_sig_bt[argc++] = T_INT; + out_sig_bt[argc++] = T_ADDRESS; + Symbol* atype = ss.as_symbol(CHECK_NULL); + const char* at = atype->as_C_string(); + if (strlen(at) == 2) { + assert(at[0] == '[', "must be"); + switch (at[1]) { + case 'B': in_elem_bt[i] = T_BYTE; break; + case 'C': in_elem_bt[i] = T_CHAR; break; + case 'D': in_elem_bt[i] = T_DOUBLE; break; + case 'F': in_elem_bt[i] = T_FLOAT; break; + case 'I': in_elem_bt[i] = T_INT; break; + case 'J': in_elem_bt[i] = T_LONG; break; + case 'S': in_elem_bt[i] = T_SHORT; break; + case 'Z': in_elem_bt[i] = T_BOOLEAN; break; + default: ShouldNotReachHere(); + } + } + } else { + out_sig_bt[argc++] = in_sig_bt[i]; + in_elem_bt[i] = T_VOID; + } + if (in_sig_bt[i] != T_VOID) { + assert(in_sig_bt[i] == ss.type(), "must match"); + ss.next(); + } + } + } + + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots but space for storing + // the 1st six register arguments). It's weird see int_stk_helper. + // + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Compute framesize for the wrapper. We need to handlize all oops in + // registers. We must create space for them here that is disjoint from + // the windowed save area because we have no control over when we might + // flush the window again and overwrite values that gc has since modified. + // (The live window race) + // + // We always just allocate 6 word for storing down these object. This allow + // us to simply record the base and use the Ireg number to decide which + // slot to use. (Note that the reg number is the inbound number not the + // outbound number). + // We must shuffle args to match the native convention, and include var-args space. + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Now the space for the inbound oop handle area + int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers + if (is_critical_native) { + // Critical natives may have to call out so they need a save area + // for register arguments. + int double_slots = 0; + int single_slots = 0; + for ( int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + switch (in_sig_bt[i]) { + case T_BOOLEAN: + case T_BYTE: + case T_SHORT: + case T_CHAR: + case T_INT: single_slots++; break; + case T_ARRAY: + case T_LONG: double_slots++; break; + default: ShouldNotReachHere(); + } + } else if (in_regs[i].first()->is_FloatRegister()) { + switch (in_sig_bt[i]) { + case T_FLOAT: single_slots++; break; + case T_DOUBLE: double_slots++; break; + default: ShouldNotReachHere(); + } + } + } + total_save_slots = double_slots * 2 + single_slots; + // align the save area + if (double_slots != 0) { + stack_slots = round_to(stack_slots, 2); + } + } + + int oop_handle_offset = stack_slots; + stack_slots += total_save_slots; + + // Now any space we need for handlizing a klass if static method + + int klass_slot_offset = 0; + int klass_offset = -1; + int lock_slot_offset = 0; + bool is_static = false; + + if (method->is_static()) { + klass_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + is_static = true; + } + + // Plus a lock if needed + + if (method->is_synchronized()) { + lock_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + } + + // Now a place to save return value or as a temporary for any gpr -> fpr moves + // + 2 for return address (which we own) and saved fp + stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | 2 slots for moves | + // |---------------------| + // | lock box (if sync) | + // |---------------------| <- lock_slot_offset + // | klass (if static) | + // |---------------------| <- klass_slot_offset + // | oopHandle area | + // |---------------------| <- oop_handle_offset + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | vararg area | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = round_to(stack_slots, StackAlignmentInSlots); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + intptr_t start = (intptr_t)__ pc(); + + + + // First thing make an ic check to see if we should even be here + address ic_miss = SharedRuntime::get_ic_miss_stub(); + + // We are free to use all registers as temps without saving them and + // restoring them except fp. fp is the only callee save register + // as far as the interpreter and the compiler(s) are concerned. + + //refer to register_mips.hpp:IC_Klass + const Register ic_reg = T1; + const Register receiver = T0; + + Label hit; + Label exception_pending; + + __ verify_oop(receiver); + //add for compressedoops + __ load_klass(T9, receiver); + __ beq(T9, ic_reg, hit); + __ delayed()->nop(); + __ jmp(ic_miss, relocInfo::runtime_call_type); + __ delayed()->nop(); + __ bind(hit); + + int vep_offset = ((intptr_t)__ pc()) - start; + + // Make enough room for patch_verified_entry + __ nop(); + __ nop(); + + // Generate stack overflow check + if (UseStackBanging) { + __ bang_stack_with_offset(StackShadowPages*os::vm_page_size()); + } + + // Generate a new frame for the wrapper. + // do mips need this ? +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + __ enter(); + // -2 because return address is already present and so is saved fp + __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); + + // Frame is now completed as far a size and linkage. + + int frame_complete = ((intptr_t)__ pc()) - start; + + // Calculate the difference between sp and fp. We need to know it + // after the native call because on windows Java Natives will pop + // the arguments and it is painful to do sp relative addressing + // in a platform independent way. So after the call we switch to + // fp relative addressing. + //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change + //the SP + int fp_adjustment = stack_size - 2*wordSize; + +#ifdef COMPILER2 + // C2 may leave the stack dirty if not in SSE2+ mode + __ empty_FPU_stack(); +#endif + + // Compute the fp offset for any slots used after the jni call + + int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; + // We use TREG as a thread pointer because it is callee save and + // if we load it once it is usable thru the entire wrapper + const Register thread = TREG; + + // We use S4 as the oop handle for the receiver/klass + // It is callee save so it survives the call to native + + const Register oop_handle_reg = S4; + if (is_critical_native) { + Unimplemented(); + // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, + // oop_handle_offset, oop_maps, in_regs, in_sig_bt); + } + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmpi, etc.) we will have + // captured the oops from our caller and have a valid oopMap for + // them. + + // ----------------- + // The Grand Shuffle + // + // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* + // and, if static, the class mirror instead of a receiver. This pretty much + // guarantees that register layout will not match (and mips doesn't use reg + // parms though amd does). Since the native abi doesn't use register args + // and the java conventions does we don't have to worry about collisions. + // All of our moved are reg->stack or stack->stack. + // We ignore the extra arguments during the shuffle and handle them at the + // last moment. The shuffle is described by the two calling convention + // vectors we have in our possession. We simply walk the java vector to + // get the source locations and the c vector to get the destinations. + + int c_arg = method->is_static() ? 2 : 1 ; + + // Record sp-based slot for receiver on stack for non-static methods + int receiver_offset = -1; + + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the + // caller. + // + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + + // Mark location of fp (someday) + // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + // This may iterate in two different directions depending on the + // kind of native it is. The reason is that for regular JNI natives + // the incoming and outgoing registers are offset upwards and for + // critical natives they are offset down. + GrowableArray arg_order(2 * total_in_args); + VMRegPair tmp_vmreg; + tmp_vmreg.set2(T8->as_VMReg()); + + if (!is_critical_native) { + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); + } + } else { + // Compute a valid move order, using tmp_vmreg to break any cycles + Unimplemented(); + // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); + } + + int temploc = -1; + for (int ai = 0; ai < arg_order.length(); ai += 2) { + int i = arg_order.at(ai); + int c_arg = arg_order.at(ai + 1); + __ block_comment(err_msg("move %d -> %d", i, c_arg)); + if (c_arg == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // This arg needs to be moved to a temporary + __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); + in_regs[i] = tmp_vmreg; + temploc = i; + continue; + } else if (i == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // Read from the temporary location + assert(temploc != -1, "must be valid"); + i = temploc; + temploc = -1; + } +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); + } else if (in_regs[i].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); + } + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif /* ASSERT */ + switch (in_sig_bt[i]) { + case T_ARRAY: + if (is_critical_native) { + Unimplemented(); + // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); + c_arg++; +#ifdef ASSERT + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif + break; + } + case T_OBJECT: + assert(!is_critical_native, "no oop arguments"); + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], + ((i == 0) && (!is_static)), + &receiver_offset); + break; + case T_VOID: + break; + + case T_FLOAT: + float_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_DOUBLE: + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + double_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_LONG : + long_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + simple_move32(masm, in_regs[i], out_regs[c_arg]); + } + } + + // point c_arg at the first arg that is already loaded in case we + // need to spill before we call out + c_arg = total_c_args - total_in_args; + // Pre-load a static method's oop. Used both by locking code and + // the normal JNI call code. + + __ move(oop_handle_reg, A1); + + if (method->is_static() && !is_critical_native) { + + // load opp into a register + int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( + (method->method_holder())->java_mirror())); + + + RelocationHolder rspec = oop_Relocation::spec(oop_index); + __ relocate(rspec); + __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); + // Now handlize the static class mirror it's known not-null. + __ sd( oop_handle_reg, SP, klass_offset); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + + // Now get the handle + __ lea(oop_handle_reg, Address(SP, klass_offset)); + // store the klass handle as second argument + __ move(A1, oop_handle_reg); + // and protect the arg if we must spill + c_arg--; + } + + // Change state to native (we save the return address in the thread, since it might not + // be pushed on the stack when we do a a stack traversal). It is enough that the pc() + // points into the right code segment. It does not have to be the correct return pc. + // We use the same pc/oopMap repeatedly when we call out + + intptr_t the_pc = (intptr_t) __ pc(); + oop_maps->add_gc_map(the_pc - start, map); + + __ set_last_Java_frame(SP, noreg, NULL); + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)the_pc ; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + + + // We have all of the arguments setup at this point. We must not touch any register + // argument registers at this point (what if we save/restore them there are no oop? + { + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); + save_args(masm, total_c_args, c_arg, out_regs); + int metadata_index = __ oop_recorder()->find_index(method()); + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); + __ relocate(rspec); + __ patchable_set48(AT, (long)(method())); + + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + thread, AT); + + restore_args(masm, total_c_args, c_arg, out_regs); + } + + // These are register definitions we need for locking/unlocking + const Register swap_reg = T8; // Must use T8 for cmpxchg instruction + const Register obj_reg = T9; // Will contain the oop + //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) + const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) + + + + Label slow_path_lock; + Label lock_done; + + // Lock a synchronized method + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + + // Get the handle (the 2nd argument) + __ move(oop_handle_reg, A1); + + // Get address of the box + __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); + + // Load the oop from the handle + __ ld(obj_reg, oop_handle_reg, 0); + + if (UseBiasedLocking) { + // Note that oop_handle_reg is trashed during this call + __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); + } + + // Load immediate 1 into swap_reg %T8 + __ move(swap_reg, 1); + + __ ld(AT, obj_reg, 0); + __ orr(swap_reg, swap_reg, AT); + + __ sd( swap_reg, lock_reg, mark_word_offset); + __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg); + __ bne(AT, R0, lock_done); + __ delayed()->nop(); + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 3) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (3 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 2 bits clear. + // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg + + __ dsubu(swap_reg, swap_reg, SP); + __ move(AT, 3 - os::vm_page_size()); + __ andr(swap_reg , swap_reg, AT); + // Save the test result, for recursive case, the result is zero + __ sd(swap_reg, lock_reg, mark_word_offset); + __ bne(swap_reg, R0, slow_path_lock); + __ delayed()->nop(); + // Slow path will re-enter here + __ bind(lock_done); + + if (UseBiasedLocking) { + // Re-fetch oop_handle_reg as we trashed it above + __ move(A1, oop_handle_reg); + } + } + + + // Finally just about ready to make the JNI call + + + // get JNIEnv* which is first argument to native + if (!is_critical_native) { + __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset())); + } + + // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) + // Load the second arguments into A1 + //__ ld(A1, SP , wordSize ); // klass + + // Now set thread in native + __ addiu(AT, R0, _thread_in_native); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); + // do the call + __ call(native_func, relocInfo::runtime_call_type); + __ delayed()->nop(); + // WARNING - on Windows Java Natives use pascal calling convention and pop the + // arguments off of the stack. We could just re-adjust the stack pointer here + // and continue to do SP relative addressing but we instead switch to FP + // relative addressing. + + // Unpack native results. + switch (ret_type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ andi(V0, V0, 0xFFFF); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : // nothing to do break; + case T_DOUBLE : + case T_FLOAT : + // Result is in st0 we'll save as needed + break; + case T_ARRAY: // Really a handle + case T_OBJECT: // Really a handle + break; // can't de-handlize until after safepoint check + case T_VOID: break; + case T_LONG: break; + default : ShouldNotReachHere(); + } + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ addiu(AT, R0, _thread_in_native_trans); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); + + if(os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ sync(); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(thread, A0); + } + } + + Label after_transition; + + // check for safepoint operation in progress and/or pending suspend requests + { + Label Continue; + __ li(AT, SafepointSynchronize::address_of_state()); + __ lw(A0, AT, 0); + __ addiu(AT, A0, -SafepointSynchronize::_not_synchronized); + Label L; + __ bne(AT, R0, L); + __ delayed()->nop(); + __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); + __ beq(AT, R0, Continue); + __ delayed()->nop(); + __ bind(L); + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // + save_native_result(masm, ret_type, stack_slots); + __ move(A0, thread); + __ addiu(SP, SP, -wordSize); + __ push(S2); + __ move(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + if (!is_critical_native) { + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); + __ delayed()->nop(); + } else { + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); + __ delayed()->nop(); + } + __ move(SP, S2); // use S2 as a sender SP holder + __ pop(S2); + __ addiu(SP, SP, wordSize); + //add for compressedoops + __ reinit_heapbase(); + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + + if (is_critical_native) { + // The call above performed the transition to thread_in_Java so + // skip the transition logic below. + __ beq(R0, R0, after_transition); + __ delayed()->nop(); + } + + __ bind(Continue); + } + + // change thread state + __ addiu(AT, R0, _thread_in_Java); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); + __ bind(after_transition); + Label reguard; + Label reguard_done; + __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); + __ addiu(AT, AT, -JavaThread::stack_guard_yellow_disabled); + __ beq(AT, R0, reguard); + __ delayed()->nop(); + // slow path reguard re-enters here + __ bind(reguard_done); + + // Handle possible exception (will unlock if necessary) + + // native result if any is live + + // Unlock + Label slow_path_unlock; + Label unlock_done; + if (method->is_synchronized()) { + + Label done; + + // Get locked oop from the handle we passed to jni + __ ld( obj_reg, oop_handle_reg, 0); + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, T8, done); + + } + + // Simple recursive lock? + + __ ld(AT, FP, lock_slot_fp_offset); + __ beq(AT, R0, done); + __ delayed()->nop(); + // Must save FSF if if it is live now because cmpxchg must use it + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + save_native_result(masm, ret_type, stack_slots); + } + + // get old displaced header + __ ld (T8, FP, lock_slot_fp_offset); + // get address of the stack lock + __ addiu(c_rarg0, FP, lock_slot_fp_offset); + // Atomic swap old header if oop still contains the stack lock + __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0); + + __ beq(AT, R0, slow_path_unlock); + __ delayed()->nop(); + // slow path re-enters here + __ bind(unlock_done); + if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { + restore_native_result(masm, ret_type, stack_slots); + } + + __ bind(done); + + } + { + SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); + // Tell dtrace about this method exit + save_native_result(masm, ret_type, stack_slots); + int metadata_index = __ oop_recorder()->find_index( (method())); + RelocationHolder rspec = metadata_Relocation::spec(metadata_index); + __ relocate(rspec); + __ patchable_set48(AT, (long)(method())); + + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + thread, AT); + restore_native_result(masm, ret_type, stack_slots); + } + + // We can finally stop using that last_Java_frame we setup ages ago + + __ reset_last_Java_frame(false); + + // Unpack oop result, e.g. JNIHandles::resolve value. + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + __ resolve_jobject(V0, thread, T9); + } + + if (!is_critical_native) { + // reset handle block + __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); + __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); + } + + if (!is_critical_native) { + // Any exception pending? + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, exception_pending); + __ delayed()->nop(); + } + // no exception, we're almost done + + // check that only result value is on FPU stack + __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); + + // Return +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); + __ leave(); + + __ jr(RA); + __ delayed()->nop(); + // Unexpected paths are out of line and go here + // Slow path locking & unlocking + if (method->is_synchronized()) { + + // BEGIN Slow path lock + __ bind(slow_path_lock); + + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM + // args are (oop obj, BasicLock* lock, JavaThread* thread) + + __ move(A0, obj_reg); + __ move(A1, lock_reg); + __ move(A2, thread); + __ addiu(SP, SP, - 3*wordSize); + + __ move(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ move(SP, S2); + __ addiu(SP, SP, 3*wordSize); + + restore_args(masm, total_c_args, c_arg, out_regs); + +#ifdef ASSERT + { Label L; + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("no pending exception allowed on exit from monitorenter"); + __ bind(L); + } +#endif + __ b(lock_done); + __ delayed()->nop(); + // END Slow path lock + + // BEGIN Slow path unlock + __ bind(slow_path_unlock); + + // Slow path unlock + + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + save_native_result(masm, ret_type, stack_slots); + } + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) + + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ push(AT); + __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); + + __ move(AT, -(StackAlignmentInBytes)); + __ move(S2, SP); // use S2 as a sender SP holder + __ andr(SP, SP, AT); // align stack as required by ABI + + // should be a peal + // +wordSize because of the push above + __ addiu(A1, FP, lock_slot_fp_offset); + + __ move(A0, obj_reg); + __ addiu(SP,SP, -2*wordSize); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), + relocInfo::runtime_call_type); + __ delayed()->nop(); + __ addiu(SP, SP, 2*wordSize); + __ move(SP, S2); + //add for compressedoops + __ reinit_heapbase(); +#ifdef ASSERT + { + Label L; + __ ld( AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); + __ bind(L); + } +#endif /* ASSERT */ + + __ pop(AT); + __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); + if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { + restore_native_result(masm, ret_type, stack_slots); + } + __ b(unlock_done); + __ delayed()->nop(); + // END Slow path unlock + + } + + // SLOW PATH Reguard the stack if needed + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), + relocInfo::runtime_call_type); + __ delayed()->nop(); + //add for compressedoops + __ reinit_heapbase(); + restore_native_result(masm, ret_type, stack_slots); + __ b(reguard_done); + __ delayed()->nop(); + + // BEGIN EXCEPTION PROCESSING + if (!is_critical_native) { + // Forward the exception + __ bind(exception_pending); + + // remove possible return value from FPU register stack + __ empty_FPU_stack(); + + // pop our frame + //forward_exception_entry need return address on stack + __ move(SP, FP); + __ pop(FP); + + // and forward the exception + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + __ delayed()->nop(); + } + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); + + if (is_critical_native) { + nm->set_lazy_critical_native(true); + } + + return nm; + +} + +#ifdef HAVE_DTRACE_H +// --------------------------------------------------------------------------- +// Generate a dtrace nmethod for a given signature. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// abi and then leaves nops at the position you would expect to call a native +// function. When the probe is enabled the nops are replaced with a trap +// instruction that dtrace inserts and the trace will cause a notification +// to dtrace. +// +// The probes are only able to take primitive types and java/lang/String as +// arguments. No other java types are allowed. Strings are converted to utf8 +// strings so that from dtrace point of view java strings are converted to C +// strings. There is an arbitrary fixed limit on the total space that a method +// can use for converting the strings. (256 chars per string in the signature). +// So any java string larger then this is truncated. + +static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; +static bool offsets_initialized = false; + +static VMRegPair reg64_to_VMRegPair(Register r) { + VMRegPair ret; + if (wordSize == 8) { + ret.set2(r->as_VMReg()); + } else { + ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); + } + return ret; +} + + +nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, + methodHandle method) { + + + // generate_dtrace_nmethod is guarded by a mutex so we are sure to + // be single threaded in this method. + assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); + + // Fill in the signature array, for the calling-convention call. + int total_args_passed = method->size_of_parameters(); + + BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); + VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); + + // The signature we are going to use for the trap that dtrace will see + // java/lang/String is converted. We drop "this" and any other object + // is converted to NULL. (A one-slot java/lang/Long object reference + // is converted to a two-slot long, which is why we double the allocation). + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); + + int i=0; + int total_strings = 0; + int first_arg_to_pass = 0; + int total_c_args = 0; + + // Skip the receiver as dtrace doesn't want to see it + if( !method->is_static() ) { + in_sig_bt[i++] = T_OBJECT; + first_arg_to_pass = 1; + } + + SignatureStream ss(method->signature()); + for ( ; !ss.at_return_type(); ss.next()) { + BasicType bt = ss.type(); + in_sig_bt[i++] = bt; // Collect remaining bits of signature + out_sig_bt[total_c_args++] = bt; + if( bt == T_OBJECT) { + symbolOop s = ss.as_symbol_or_null(); + if (s == vmSymbols::java_lang_String()) { + total_strings++; + out_sig_bt[total_c_args-1] = T_ADDRESS; + } else if (s == vmSymbols::java_lang_Boolean() || + s == vmSymbols::java_lang_Byte()) { + out_sig_bt[total_c_args-1] = T_BYTE; + } else if (s == vmSymbols::java_lang_Character() || + s == vmSymbols::java_lang_Short()) { + out_sig_bt[total_c_args-1] = T_SHORT; + } else if (s == vmSymbols::java_lang_Integer() || + s == vmSymbols::java_lang_Float()) { + out_sig_bt[total_c_args-1] = T_INT; + } else if (s == vmSymbols::java_lang_Long() || + s == vmSymbols::java_lang_Double()) { + out_sig_bt[total_c_args-1] = T_LONG; + out_sig_bt[total_c_args++] = T_VOID; + } + } else if ( bt == T_LONG || bt == T_DOUBLE ) { + in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots + // We convert double to long + out_sig_bt[total_c_args-1] = T_LONG; + out_sig_bt[total_c_args++] = T_VOID; + } else if ( bt == T_FLOAT) { + // We convert float to int + out_sig_bt[total_c_args-1] = T_INT; + } + } + + assert(i==total_args_passed, "validly parsed signature"); + + // Now get the compiled-Java layout as input arguments + int comp_args_on_stack; + comp_args_on_stack = SharedRuntime::java_calling_convention( + in_sig_bt, in_regs, total_args_passed, false); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the a native (non-jni) function would expect them. To figure out + // where they go we convert the java signature to a C signature and remove + // T_VOID for any long/double we might have received. + + + // Now figure out where the args must be stored and how much stack space + // they require (neglecting out_preserve_stack_slots but space for storing + // the 1st six register arguments). It's weird see int_stk_helper. + + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Plus a temp for possible converion of float/double/long register args + + int conversion_temp = stack_slots; + stack_slots += 2; + + + // Now space for the string(s) we must convert + + int string_locs = stack_slots; + stack_slots += total_strings * + (max_dtrace_string_size / VMRegImpl::stack_slot_size); + + // Ok The space we have allocated will look like: + // + // + // FP-> | | + // |---------------------| + // | string[n] | + // |---------------------| <- string_locs[n] + // | string[n-1] | + // |---------------------| <- string_locs[n-1] + // | ... | + // | ... | + // |---------------------| <- string_locs[1] + // | string[0] | + // |---------------------| <- string_locs[0] + // | temp | + // |---------------------| <- conversion_temp + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + intptr_t start = (intptr_t)__ pc(); + + // First thing make an ic check to see if we should even be here + + { + Label L; + const Register temp_reg = G3_scratch; + Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); + __ verify_oop(O0); + __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); + __ cmp(temp_reg, G5_inline_cache_reg); + __ brx(Assembler::equal, true, Assembler::pt, L); + __ delayed()->nop(); + + __ jump_to(ic_miss, 0); + __ delayed()->nop(); + __ align(CodeEntryAlignment); + __ bind(L); + } + + int vep_offset = ((intptr_t)__ pc()) - start; + + // Make enough room for patch_verified_entry + __ nop(); + __ nop(); + + // Generate stack overflow check before creating frame + __ generate_stack_overflow_check(stack_size); + + // Generate a new frame for the wrapper. + __ save(SP, -stack_size, SP); + + // Frame is now completed as far a size and linkage. + + int frame_complete = ((intptr_t)__ pc()) - start; + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif /* ASSERT */ + + VMRegPair zero; + const Register g0 = G0; // without this we get a compiler warning (why??) + zero.set2(g0->as_VMReg()); + + int c_arg, j_arg; + + Register conversion_off = noreg; + + for (j_arg = first_arg_to_pass, c_arg = 0 ; + j_arg < total_args_passed ; j_arg++, c_arg++ ) { + + VMRegPair src = in_regs[j_arg]; + VMRegPair dst = out_regs[c_arg]; + +#ifdef ASSERT + if (src.first()->is_Register()) { + assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); + } else if (src.first()->is_FloatRegister()) { + assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( + FloatRegisterImpl::S)], "ack!"); + } + if (dst.first()->is_Register()) { + reg_destroyed[dst.first()->as_Register()->encoding()] = true; + } else if (dst.first()->is_FloatRegister()) { + freg_destroyed[dst.first()->as_FloatRegister()->encoding( + FloatRegisterImpl::S)] = true; + } +#endif /* ASSERT */ + + switch (in_sig_bt[j_arg]) { + case T_ARRAY: + case T_OBJECT: + { + if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || + out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { + // need to unbox a one-slot value + Register in_reg = L0; + Register tmp = L2; + if ( src.first()->is_reg() ) { + in_reg = src.first()->as_Register(); + } else { + assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), + "must be"); + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); + } + // If the final destination is an acceptable register + if ( dst.first()->is_reg() ) { + if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { + tmp = dst.first()->as_Register(); + } + } + + Label skipUnbox; + if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { + __ mov(G0, tmp->successor()); + } + __ br_null(in_reg, true, Assembler::pn, skipUnbox); + __ delayed()->mov(G0, tmp); + + BasicType bt = out_sig_bt[c_arg]; + int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); + switch (bt) { + case T_BYTE: + __ ldub(in_reg, box_offset, tmp); break; + case T_SHORT: + __ lduh(in_reg, box_offset, tmp); break; + case T_INT: + __ ld(in_reg, box_offset, tmp); break; + case T_LONG: + __ ld_long(in_reg, box_offset, tmp); break; + default: ShouldNotReachHere(); + } + + __ bind(skipUnbox); + // If tmp wasn't final destination copy to final destination + if (tmp == L2) { + VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); + if (out_sig_bt[c_arg] == T_LONG) { + long_move(masm, tmp_as_VM, dst); + } else { + move32_64(masm, tmp_as_VM, out_regs[c_arg]); + } + } + if (out_sig_bt[c_arg] == T_LONG) { + assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); + ++c_arg; // move over the T_VOID to keep the loop indices in sync + } + } else if (out_sig_bt[c_arg] == T_ADDRESS) { + Register s = + src.first()->is_reg() ? src.first()->as_Register() : L2; + Register d = + dst.first()->is_reg() ? dst.first()->as_Register() : L2; + + // We store the oop now so that the conversion pass can reach + // while in the inner frame. This will be the only store if + // the oop is NULL. + if (s != L2) { + // src is register + if (d != L2) { + // dst is register + __ mov(s, d); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } else { + // src not a register + assert(Assembler::is_simm13(reg2offset(src.first()) + + STACK_BIAS), "must be"); + __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); + if (d == L2) { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } + } else if (out_sig_bt[c_arg] != T_VOID) { + // Convert the arg to NULL + if (dst.first()->is_reg()) { + __ mov(G0, dst.first()->as_Register()); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + + STACK_BIAS), "must be"); + __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); + } + } + } + break; + case T_VOID: + break; + + case T_FLOAT: + if (src.first()->is_stack()) { + // Stack to stack/reg is simple + move32_64(masm, src, dst); + } else { + if (dst.first()->is_reg()) { + // freg -> reg + int off = + STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + Register d = dst.first()->as_Register(); + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, off); + __ ld(SP, off, d); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, conversion_off); + __ ld(SP, conversion_off , d); + } + } else { + // freg -> mem + int off = STACK_BIAS + reg2offset(dst.first()); + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, off); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), + SP, conversion_off); + } + } + } + break; + + case T_DOUBLE: + assert( j_arg + 1 < total_args_passed && + in_sig_bt[j_arg + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + if (src.first()->is_stack()) { + // Stack to stack/reg is simple + long_move(masm, src, dst); + } else { + Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; + + // Destination could be an odd reg on 32bit in which case + // we can't load direct to the destination. + + if (!d->is_even() && wordSize == 4) { + d = L2; + } + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + if (Assembler::is_simm13(off)) { + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), + SP, off); + __ ld_long(SP, off, d); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), + SP, conversion_off); + __ ld_long(SP, conversion_off, d); + } + if (d == L2) { + long_move(masm, reg64_to_VMRegPair(L2), dst); + } + } + break; + + case T_LONG : + // 32bit can't do a split move of something like g1 -> O0, O1 + // so use a memory temp + if (src.is_single_phys_reg() && wordSize == 4) { + Register tmp = L2; + if (dst.first()->is_reg() && + (wordSize == 8 || dst.first()->as_Register()->is_even())) { + tmp = dst.first()->as_Register(); + } + + int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; + if (Assembler::is_simm13(off)) { + __ stx(src.first()->as_Register(), SP, off); + __ ld_long(SP, off, tmp); + } else { + if (conversion_off == noreg) { + __ set(off, L6); + conversion_off = L6; + } + __ stx(src.first()->as_Register(), SP, conversion_off); + __ ld_long(SP, conversion_off, tmp); + } + + if (tmp == L2) { + long_move(masm, reg64_to_VMRegPair(L2), dst); + } + } else { + long_move(masm, src, dst); + } + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + + default: + move32_64(masm, src, dst); + } + } + + + // If we have any strings we must store any register based arg to the stack + // This includes any still live xmm registers too. + + if (total_strings > 0 ) { + + // protect all the arg registers + __ save_frame(0); + __ mov(G2_thread, L7_thread_cache); + const Register L2_string_off = L2; + + // Get first string offset + __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); + + for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { + if (out_sig_bt[c_arg] == T_ADDRESS) { + + VMRegPair dst = out_regs[c_arg]; + const Register d = dst.first()->is_reg() ? + dst.first()->as_Register()->after_save() : noreg; + + // It's a string the oop and it was already copied to the out arg + // position + if (d != noreg) { + __ mov(d, O0); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), + "must be"); + __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); + } + Label skip; + + __ br_null(O0, false, Assembler::pn, skip); + __ delayed()->addu(FP, L2_string_off, O1); + + if (d != noreg) { + __ mov(O1, d); + } else { + assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), + "must be"); + __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); + } + + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), + relocInfo::runtime_call_type); + __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off); + + __ bind(skip); + + } + + } + __ mov(L7_thread_cache, G2_thread); + __ restore(); + + } + + + // Ok now we are done. Need to place the nop that dtrace wants in order to + // patch in the trap + + int patch_offset = ((intptr_t)__ pc()) - start; + + __ nop(); + + + // Return + + __ ret(); + __ delayed()->restore(); + + __ flush(); + + nmethod *nm = nmethod::new_dtrace_nmethod( + method, masm->code(), vep_offset, patch_offset, frame_complete, + stack_slots / VMRegImpl::slots_per_word); + return nm; + +} + +#endif // HAVE_DTRACE_H + +// this function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { + return (callee_locals - callee_parameters) * Interpreter::stackElementWords; +} + +// "Top of Stack" slots that may be unused by the calling convention but must +// otherwise be preserved. +// On Intel these are not necessary and the value can be zero. +// On Sparc this describes the words reserved for storing a register window +// when an interrupt occurs. +uint SharedRuntime::out_preserve_stack_slots() { + return 0; +} + +//------------------------------generate_deopt_blob---------------------------- +// Ought to generate an ideal graph & compile, but here's some SPARC ASM +// instead. +void SharedRuntime::generate_deopt_blob() { + // allocate space for the code + ResourceMark rm; + // setup code generation tools + //CodeBuffer buffer ("deopt_blob", 4000, 2048); + CodeBuffer buffer ("deopt_blob", 8000, 2048); + MacroAssembler* masm = new MacroAssembler( & buffer); + int frame_size_in_words; + OopMap* map = NULL; + // Account for the extra args we place on the stack + // by the time we call fetch_unroll_info + const int additional_words = 2; // deopt kind, thread + + OopMapSet *oop_maps = new OopMapSet(); + + address start = __ pc(); + Label cont; + // we use S3 for DeOpt reason register + Register reason = S3; + // use S6 for thread register + Register thread = TREG; + // use S7 for fetch_unroll_info returned UnrollBlock + Register unroll = S7; + // Prolog for non exception case! + // Correct the return address we were given. + //FIXME, return address is on the tos or Ra? + __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); + // Save everything in sight. + map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); + // Normal deoptimization + __ move(reason, Deoptimization::Unpack_deopt); + __ b(cont); + __ delayed()->nop(); + + int reexecute_offset = __ pc() - start; + + // Reexecute case + // return address is the pc describes what bci to do re-execute at + + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); + __ move(reason, Deoptimization::Unpack_reexecute); + __ b(cont); + __ delayed()->nop(); + + int exception_offset = __ pc() - start; + // Prolog for exception case + + // all registers are dead at this entry point, except for V0 and + // V1 which contain the exception oop and exception pc + // respectively. Set them in TLS and fall thru to the + // unpack_with_exception_in_tls entry point. + + __ get_thread(thread); + __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); + int exception_in_tls_offset = __ pc() - start; + // new implementation because exception oop is now passed in JavaThread + + // Prolog for exception case + // All registers must be preserved because they might be used by LinearScan + // Exceptiop oop and throwing PC are passed in JavaThread + // tos: stack at point of call to method that threw the exception (i.e. only + // args are on the stack, no return address) + + // Return address will be patched later with the throwing pc. The correct value is not + // available now because loading it from memory would destroy registers. + // Save everything in sight. + // No need to update map as each call to save_live_registers will produce identical oopmap + __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); + (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); + + // Now it is safe to overwrite any register + // store the correct deoptimization type + __ move(reason, Deoptimization::Unpack_exception); + // load throwing pc from JavaThread and patch it as the return address + // of the current frame. Then clear the field in JavaThread + __ get_thread(thread); + __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); + + +#ifdef ASSERT + // verify that there is really an exception oop in JavaThread + __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); + __ verify_oop(AT); + // verify that there is no pending exception + Label no_pending_exception; + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, no_pending_exception); + __ delayed()->nop(); + __ stop("must not have pending exception here"); + __ bind(no_pending_exception); +#endif + __ bind(cont); + // Compiled code leaves the floating point stack dirty, empty it. + __ empty_FPU_stack(); + + + // Call C code. Need thread and this frame, but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + __ move(A0, thread); + __ addiu(SP, SP, -additional_words * wordSize); + + __ set_last_Java_frame(NOREG, NOREG, NULL); + + // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on + // this call, no GC can happen. Call should capture return values. + + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + + __ call((address)Deoptimization::fetch_unroll_info); + //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); + __ delayed()->nop(); + oop_maps->add_gc_map(__ pc() - start, map); + __ addiu(SP, SP, additional_words * wordSize); + __ get_thread(thread); + __ reset_last_Java_frame(false); + + // Load UnrollBlock into S7 + __ move(unroll, V0); + + + // Move the unpack kind to a safe place in the UnrollBlock because + // we are very short of registers + + Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); + __ sw(reason, unpack_kind); + // save the unpack_kind value + // Retrieve the possible live values (return values) + // All callee save registers representing jvm state + // are now in the vframeArray. + + Label noException; + __ move(AT, Deoptimization::Unpack_exception); + __ bne(AT, reason, noException);// Was exception pending? + __ delayed()->nop(); + __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); + __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); + __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); + + __ verify_oop(V0); + + // Overwrite the result registers with the exception results. + __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); + __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize); + + __ bind(noException); + + + // Stack is back to only having register save data on the stack. + // Now restore the result registers. Everything else is either dead or captured + // in the vframeArray. + + RegisterSaver::restore_result_registers(masm); + // All of the register save area has been popped of the stack. Only the + // return address remains. + // Pop all the frames we must move/replace. + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + // + // Note: by leaving the return address of self-frame on the stack + // and using the size of frame 2 to adjust the stack + // when we are done the return to frame 3 will still be on the stack. + + // register for the sender's sp + Register sender_sp = Rsender; + // register for frame pcs + Register pcs = T0; + // register for frame sizes + Register sizes = T1; + // register for frame count + Register count = T3; + + // Pop deoptimized frame + __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); + __ addu(SP, SP, AT); + // sp should be pointing at the return address to the caller (3) + + // Load array of frame pcs into pcs + __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); + __ addiu(SP, SP, wordSize); // trash the old pc + // Load array of frame sizes into T6 + __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); + + + + // Load count of frams into T3 + __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); + // Pick up the initial fp we should save + __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + __ move(sender_sp, SP); + __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); + __ subu(SP, SP, AT); + + // Push interpreter frames in a loop + // + //Loop: + // 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld + // 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] + // 0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16 + // 0x000000555bd82d24: daddiu sp, sp, 0xfffffff0 + // 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp + // 0x000000555bd82d2c: sd at, 0x8(sp) ; push at + // 0x000000555bd82d30: daddu fp, sp, zero ; fp <- sp + // 0x000000555bd82d34: dsubu sp, sp, t2 ; sp -= t2 + // 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + // 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + // 0x000000555bd82d40: daddu s4, sp, zero ; move(sender_sp, SP); + // 0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count -- + // 0x000000555bd82d48: daddiu t1, t1, 0x4 ; sizes += 4 + // 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18 + // 0x000000555bd82d50: daddiu t0, t0, 0x4 ; <--- error t0 += 8 + // + // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split + Label loop; + __ bind(loop); + __ ld(T2, sizes, 0); // Load frame size + __ ld_ptr(AT, pcs, 0); // save return address + __ addiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand + __ push2(AT, FP); + __ move(FP, SP); + __ subu(SP, SP, T2); // Prolog! + // This value is corrected by layout_activation_impl + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable + __ move(sender_sp, SP); // pass to next frame + __ addiu(count, count, -1); // decrement counter + __ addiu(sizes, sizes, wordSize); // Bump array pointer (sizes) + __ bne(count, R0, loop); + __ delayed()->addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) + __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); + // Re-push self-frame + __ push2(AT, FP); + __ move(FP, SP); + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); + __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); + + // Restore frame locals after moving the frame + __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize); + __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize); + __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local + __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); + + + // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on + // this call, no GC can happen. + __ move(A1, reason); // exec_mode + __ get_thread(thread); + __ move(A0, thread); // thread + __ addiu(SP, SP, (-additional_words) *wordSize); + + // set last_Java_sp, last_Java_fp + __ set_last_Java_frame(NOREG, FP, NULL); + + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + + __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); + __ delayed()->nop(); + // Revert SP alignment after call since we're going to do some SP relative addressing below + __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); + // Set an oopmap for the call site + oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0)); + + __ push(V0); + + __ get_thread(thread); + __ reset_last_Java_frame(true); + + // Collect return values + __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize); + __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize); + __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local + __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize); + //FIXME, + // Clear floating point stack before returning to interpreter + __ empty_FPU_stack(); + //FIXME, we should consider about float and double + // Push a float or double return value if necessary. + __ leave(); + + // Jump to interpreter + __ jr(RA); + __ delayed()->nop(); + + masm->flush(); + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); +} + +#ifdef COMPILER2 + +//------------------------------generate_uncommon_trap_blob-------------------- +// Ought to generate an ideal graph & compile, but here's some SPARC ASM +// instead. +void SharedRuntime::generate_uncommon_trap_blob() { + // allocate space for the code + ResourceMark rm; + // setup code generation tools + CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); + MacroAssembler* masm = new MacroAssembler(&buffer); + + enum frame_layout { + fp_off, fp_off2, + return_off, return_off2, + framesize + }; + assert(framesize % 4 == 0, "sp not 16-byte aligned"); + + address start = __ pc(); + + // Push self-frame. + __ daddiu(SP, SP, -framesize * BytesPerInt); + + __ sd(RA, SP, return_off * BytesPerInt); + __ sd(FP, SP, fp_off * BytesPerInt); + + __ daddiu(FP, SP, fp_off * BytesPerInt); + + // Clear the floating point exception stack + __ empty_FPU_stack(); + + Register thread = TREG; + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // set last_Java_sp + __ set_last_Java_frame(NOREG, FP, NULL); + __ relocate(relocInfo::internal_pc_type); + { + long save_pc = (long)__ pc() + 52; + __ patchable_set48(AT, (long)save_pc); + __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); + } + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // capture callee-saved registers as well as return values. + __ move(A0, thread); + // argument already in T0 + __ move(A1, T0); + __ patchable_call((address)Deoptimization::uncommon_trap); + + // Set an oopmap for the call site + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = new OopMap( framesize, 0 ); + + //oop_maps->add_gc_map( __ offset(), true, map); + oop_maps->add_gc_map( __ offset(), map); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ reset_last_Java_frame(false); + + // Load UnrollBlock into S7 + Register unroll = S7; + __ move(unroll, V0); + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: possible-i2c-adapter-frame + // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an + // and c2i here) + + __ daddiu(SP, SP, framesize * BytesPerInt); + + // Pop deoptimized frame + __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); + __ daddu(SP, SP, AT); + + // register for frame pcs + Register pcs = T8; + // register for frame sizes + Register sizes = T9; + // register for frame count + Register count = T3; + // register for the sender's sp + Register sender_sp = T1; + + // sp should be pointing at the return address to the caller (4) + // Load array of frame pcs + __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); + + // Load array of frame sizes + __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); + __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); + + // Pick up the initial fp we should save + __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + + __ move(sender_sp, SP); + __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); + __ dsubu(SP, SP, AT); + // Push interpreter frames in a loop + Label loop; + __ bind(loop); + __ ld(T2, sizes, 0); // Load frame size + __ ld(AT, pcs, 0); // save return address + __ daddiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand + __ push2(AT, FP); + __ move(FP, SP); + __ dsubu(SP, SP, T2); // Prolog! + // This value is corrected by layout_activation_impl + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable + __ move(sender_sp, SP); // pass to next frame + __ daddiu(count, count, -1); // decrement counter + __ daddiu(sizes, sizes, wordSize); // Bump array pointer (sizes) + __ addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) + __ bne(count, R0, loop); + __ delayed()->nop(); // Bump array pointer (pcs) + + __ ld(RA, pcs, 0); + + // Re-push self-frame + // save old & set new FP + // save final return address + __ enter(); + + // Use FP because the frames look interpreted now + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(NOREG, FP, the_pc); + + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); // Fix stack alignment as required by ABI + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + __ move(A0, thread); + __ move(A1, Deoptimization::Unpack_uncommon_trap); + __ patchable_call((address)Deoptimization::unpack_frames); + // Set an oopmap for the call site + oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) ); + + __ reset_last_Java_frame(true); + + // Pop self-frame. + __ leave(); // Epilog! + + // Jump to interpreter + __ jr(RA); + __ delayed()->nop(); + // ------------- + // make sure all code is generated + masm->flush(); + + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); +} + +#endif // COMPILER2 + +//------------------------------generate_handler_blob------------------- +// +// Generate a special Compile2Runtime blob that saves all registers, and sets +// up an OopMap and calls safepoint code to stop the compiled code for +// a safepoint. +// +// This blob is jumped to (via a breakpoint and the signal handler) from a +// safepoint in compiled code. + +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) { + + // Account for thread arg in our frame + const int additional_words = 0; + int frame_size_in_words; + + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + ResourceMark rm; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map; + + // allocate space for the code + // setup code generation tools + CodeBuffer buffer ("handler_blob", 2048, 512); + MacroAssembler* masm = new MacroAssembler( &buffer); + + const Register thread = TREG; + address start = __ pc(); + address call_pc = NULL; + bool cause_return = (pool_type == POLL_AT_RETURN); + bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP); + + // If cause_return is true we are at a poll_return and there is + // the return address in RA to the caller on the nmethod + // that is safepoint. We can leave this return in RA and + // effectively complete the return and safepoint in the caller. + // Otherwise we load exception pc to RA. + __ push(thread); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + if(!cause_return) { + __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset())); + } + + __ pop(thread); + map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + + // The following is basically a call_VM. However, we need the precise + // address of the call in order to generate an oopmap. Hence, we do all the + // work outselvs. + + __ move(A0, thread); + __ set_last_Java_frame(NOREG, NOREG, NULL); + + + // Do the call + __ call(call_ptr); + __ delayed()->nop(); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + oop_maps->add_gc_map(__ offset(), map); + + Label noException; + + // Clear last_Java_sp again + __ reset_last_Java_frame(false); + + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, noException); + __ delayed()->nop(); + + // Exception pending + + RegisterSaver::restore_live_registers(masm, save_vectors); + //forward_exception_entry need return address on the stack + __ push(RA); + __ patchable_jump((address)StubRoutines::forward_exception_entry()); + + // No exception case + __ bind(noException); + // Normal exit, register restoring and exit + RegisterSaver::restore_live_registers(masm, save_vectors); + __ jr(RA); + __ delayed()->nop(); + + masm->flush(); + + // Fill-out other meta info + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); +} + +// +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss +// +// Generate a stub that calls into vm to find out the proper destination +// of a java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + + //CodeBuffer buffer(name, 1000, 512); + CodeBuffer buffer(name, 2000, 2048); + MacroAssembler* masm = new MacroAssembler(&buffer); + + int frame_size_words; + //we put the thread in A0 + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + int start = __ offset(); + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); + + + int frame_complete = __ offset(); + + const Register thread = T8; + __ get_thread(thread); + + __ move(A0, thread); + __ set_last_Java_frame(noreg, FP, NULL); + //align the stack before invoke native + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); + + __ call(destination); + __ delayed()->nop(); + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + oop_maps->add_gc_map( __ offset() - start, map); + // V0 contains the address we are going to jump to assuming no exception got installed + __ get_thread(thread); + __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); + // clear last_Java_sp + __ reset_last_Java_frame(true); + // check for pending exceptions + Label pending; + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, pending); + __ delayed()->nop(); + // get the returned Method* + //FIXME, do mips need this ? + __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 + __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize); + __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize); + RegisterSaver::restore_live_registers(masm); + + // We are back the the original state on entry and ready to go the callee method. + __ jr(V0); + __ delayed()->nop(); + // Pending exception after the safepoint + + __ bind(pending); + + RegisterSaver::restore_live_registers(masm); + + // exception pending => remove activation and forward to exception handler + //forward_exception_entry need return address on the stack + __ push(RA); + __ get_thread(thread); + __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); + __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + __ delayed()->nop(); + // + // make sure all code is generated + masm->flush(); + + RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); + return tmp; +} + +extern "C" int SpinPause() {return 0;} + + +//------------------------------Montgomery multiplication------------------------ +// + +// Subtract 0:b from carry:a. Return carry. +static unsigned long +sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { + long borrow = 0, t = 0; + unsigned long tmp0, tmp1; + __asm__ __volatile__ ( + "0: \n" + "ld %[tmp0], 0(%[a]) \n" + "ld %[tmp1], 0(%[b]) \n" + "sltu %[t], %[tmp0], %[borrow] \n" + "dsubu %[tmp0], %[tmp0], %[borrow] \n" + "sltu %[borrow], %[tmp0], %[tmp1] \n" + "or %[borrow], %[borrow], %[t] \n" + "dsubu %[tmp0], %[tmp0], %[tmp1] \n" + "sd %[tmp0], 0(%[a]) \n" + "daddiu %[a], %[a], 8 \n" + "daddiu %[b], %[b], 8 \n" + "daddiu %[len], %[len], -1 \n" + "bgtz %[len], 0b \n" + "dsubu %[tmp0], %[carry], %[borrow] \n" + : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t) + : [carry]"r"(carry) + : "memory" + ); + return tmp0; +} + +// Multiply (unsigned) Long A by Long B, accumulating the double- +// length result into the accumulator formed of t0, t1, and t2. +inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { + unsigned long hi, lo, carry = 0, t = 0; + __asm__ __volatile__( + "dmultu %[A], %[B] \n" + "mfhi %[hi] \n" + "mflo %[lo] \n" + "daddu %[t0], %[t0], %[lo] \n" + "sltu %[carry], %[t0], %[lo] \n" + "daddu %[t1], %[t1], %[carry] \n" + "sltu %[t], %[t1], %[carry] \n" + "daddu %[t1], %[t1], %[hi] \n" + "sltu %[carry], %[t1], %[hi] \n" + "or %[carry], %[carry], %[t] \n" + "daddu %[t2], %[t2], %[carry] \n" + : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) + : [A]"r"(A), [B]"r"(B) + : + ); +} + +// As above, but add twice the double-length result into the +// accumulator. +inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { + unsigned long hi, lo, carry = 0, t = 0; + __asm__ __volatile__( + "dmultu %[A], %[B] \n" + "mfhi %[hi] \n" + "mflo %[lo] \n" + "daddu %[t0], %[t0], %[lo] \n" + "sltu %[carry], %[t0], %[lo] \n" + "daddu %[t1], %[t1], %[carry] \n" + "sltu %[t], %[t1], %[carry] \n" + "daddu %[t1], %[t1], %[hi] \n" + "sltu %[carry], %[t1], %[hi] \n" + "or %[carry], %[carry], %[t] \n" + "daddu %[t2], %[t2], %[carry] \n" + "daddu %[t0], %[t0], %[lo] \n" + "sltu %[carry], %[t0], %[lo] \n" + "daddu %[t1], %[t1], %[carry] \n" + "sltu %[t], %[t1], %[carry] \n" + "daddu %[t1], %[t1], %[hi] \n" + "sltu %[carry], %[t1], %[hi] \n" + "or %[carry], %[carry], %[t] \n" + "daddu %[t2], %[t2], %[carry] \n" + : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) + : [A]"r"(A), [B]"r"(B) + : + ); +} + +// Fast Montgomery multiplication. The derivation of the algorithm is +// in A Cryptographic Library for the Motorola DSP56000, +// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + +static void __attribute__((noinline)) +montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], + unsigned long m[], unsigned long inv, int len) { + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + int i; + + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + + for (i = 0; i < len; i++) { + int j; + for (j = 0; j < i; j++) { + MACC(a[j], b[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + MACC(a[i], b[0], t0, t1, t2); + m[i] = t0 * inv; + MACC(m[i], n[0], t0, t1, t2); + + assert(t0 == 0, "broken Montgomery multiply"); + + t0 = t1; t1 = t2; t2 = 0; + } + + for (i = len; i < 2*len; i++) { + int j; + for (j = i-len+1; j < len; j++) { + MACC(a[j], b[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i-len] = t0; + t0 = t1; t1 = t2; t2 = 0; + } + + while (t0) + t0 = sub(m, n, t0, len); +} + +// Fast Montgomery squaring. This uses asymptotically 25% fewer +// multiplies so it should be up to 25% faster than Montgomery +// multiplication. However, its loop control is more complex and it +// may actually run slower on some machines. + +static void __attribute__((noinline)) +montgomery_square(unsigned long a[], unsigned long n[], + unsigned long m[], unsigned long inv, int len) { + unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + int i; + + assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + + for (i = 0; i < len; i++) { + int j; + int end = (i+1)/2; + for (j = 0; j < end; j++) { + MACC2(a[j], a[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + if ((i & 1) == 0) { + MACC(a[j], a[j], t0, t1, t2); + } + for (; j < i; j++) { + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i] = t0 * inv; + MACC(m[i], n[0], t0, t1, t2); + + assert(t0 == 0, "broken Montgomery square"); + + t0 = t1; t1 = t2; t2 = 0; + } + + for (i = len; i < 2*len; i++) { + int start = i-len+1; + int end = start + (len - start)/2; + int j; + for (j = start; j < end; j++) { + MACC2(a[j], a[i-j], t0, t1, t2); + MACC(m[j], n[i-j], t0, t1, t2); + } + if ((i & 1) == 0) { + MACC(a[j], a[j], t0, t1, t2); + } + for (; j < len; j++) { + MACC(m[j], n[i-j], t0, t1, t2); + } + m[i-len] = t0; + t0 = t1; t1 = t2; t2 = 0; + } + + while (t0) + t0 = sub(m, n, t0, len); +} + +// Swap words in a longword. +static unsigned long swap(unsigned long x) { + return (x << 32) | (x >> 32); +} + +// Copy len longwords from s to d, word-swapping as we go. The +// destination array is reversed. +static void reverse_words(unsigned long *s, unsigned long *d, int len) { + d += len; + while(len-- > 0) { + d--; + *d = swap(*s); + s++; + } +} + +// The threshold at which squaring is advantageous was determined +// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. +// Doesn't seem to be relevant for MIPS64 so we use the same value. +#define MONTGOMERY_SQUARING_THRESHOLD 64 + +void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, + jint len, jlong inv, + jint *m_ints) { + assert(len % 2 == 0, "array length in montgomery_multiply must be even"); + int longwords = len/2; + + // Make very sure we don't use so much space that the stack might + // overflow. 512 jints corresponds to an 16384-bit integer and + // will use here a total of 8k bytes of stack space. + int total_allocation = longwords * sizeof (unsigned long) * 4; + guarantee(total_allocation <= 8192, "must be"); + unsigned long *scratch = (unsigned long *)alloca(total_allocation); + + // Local scratch arrays + unsigned long + *a = scratch + 0 * longwords, + *b = scratch + 1 * longwords, + *n = scratch + 2 * longwords, + *m = scratch + 3 * longwords; + + reverse_words((unsigned long *)a_ints, a, longwords); + reverse_words((unsigned long *)b_ints, b, longwords); + reverse_words((unsigned long *)n_ints, n, longwords); + + ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); + + reverse_words(m, (unsigned long *)m_ints, longwords); +} + +void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, + jint len, jlong inv, + jint *m_ints) { + assert(len % 2 == 0, "array length in montgomery_square must be even"); + int longwords = len/2; + + // Make very sure we don't use so much space that the stack might + // overflow. 512 jints corresponds to an 16384-bit integer and + // will use here a total of 6k bytes of stack space. + int total_allocation = longwords * sizeof (unsigned long) * 3; + guarantee(total_allocation <= 8192, "must be"); + unsigned long *scratch = (unsigned long *)alloca(total_allocation); + + // Local scratch arrays + unsigned long + *a = scratch + 0 * longwords, + *n = scratch + 1 * longwords, + *m = scratch + 2 * longwords; + + reverse_words((unsigned long *)a_ints, a, longwords); + reverse_words((unsigned long *)n_ints, n, longwords); + + if (len >= MONTGOMERY_SQUARING_THRESHOLD) { + ::montgomery_square(a, n, m, (unsigned long)inv, longwords); + } else { + ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); + } + + reverse_words(m, (unsigned long *)m_ints, longwords); +} diff --git a/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp b/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp new file mode 100644 index 00000000000..aeb797faf9f --- /dev/null +++ b/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp @@ -0,0 +1,2147 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_mips.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/top.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure +// see the comment in stubRoutines.hpp + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) +//#define a__ ((Assembler*)_masm)-> + +//#ifdef PRODUCT +//#define BLOCK_COMMENT(str) /* nothing */ +//#else +//#define BLOCK_COMMENT(str) __ block_comment(str) +//#endif + +//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") +const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions + +// Stub Code definitions + +static address handle_unsafe_access() { + JavaThread* thread = JavaThread::current(); + address pc = thread->saved_exception_pc(); + // pc is the instruction which we must emulate + // doing a no-op is fine: return garbage from the load + // therefore, compute npc + address npc = (address)((unsigned long)pc + sizeof(unsigned int)); + + // request an async exception + thread->set_pending_unsafe_access_error(); + + // return address of next instruction to execute + return npc; +} + +class StubGenerator: public StubCodeGenerator { + private: + + // ABI mips n64 + // This fig is not MIPS ABI. It is call Java from C ABI. + // Call stubs are used to call Java from C + // + // [ return_from_Java ] + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + // ... + // -8 [ S6 ] + // -7 [ S5 ] + // -6 [ S4 ] + // -5 [ S3 ] + // -4 [ S1 ] + // -3 [ TSR(S2) ] + // -2 [ LVP(S7) ] + // -1 [ BCP(S1) ] + // 0 [ saved fp ] <--- fp_after_call + // 1 [ return address ] + // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp + // 3 [ result ] <--- a1 + // 4 [ result_type ] <--- a2 + // 5 [ method ] <--- a3 + // 6 [ entry_point ] <--- a4 + // 7 [ parameters ] <--- a5 + // 8 [ parameter_size ] <--- a6 + // 9 [ thread ] <--- a7 + + // + // n64 does not save paras in sp. + // + // [ return_from_Java ] + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + // ... + //-13 [ thread ] + //-12 [ result_type ] <--- a2 + //-11 [ result ] <--- a1 + //-10 [ ] + // -9 [ ptr. to call wrapper ] <--- a0 + // -8 [ S6 ] + // -7 [ S5 ] + // -6 [ S4 ] + // -5 [ S3 ] + // -4 [ S1 ] + // -3 [ TSR(S2) ] + // -2 [ LVP(S7) ] + // -1 [ BCP(S1) ] + // 0 [ saved fp ] <--- fp_after_call + // 1 [ return address ] + // 2 [ ] <--- old sp + // + // Find a right place in the call_stub for GP. + // GP will point to the starting point of Interpreter::dispatch_table(itos). + // It should be saved/restored before/after Java calls. + // + enum call_stub_layout { + RA_off = 1, + FP_off = 0, + BCP_off = -1, + LVP_off = -2, + TSR_off = -3, + S1_off = -4, + S3_off = -5, + S4_off = -6, + S5_off = -7, + S6_off = -8, + call_wrapper_off = -9, + result_off = -11, + result_type_off = -12, + thread_off = -13, + total_off = thread_off - 1, + GP_off = -14, + }; + + address generate_call_stub(address& return_address) { + + assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); + StubCodeMark mark(this, "StubRoutines", "call_stub"); + address start = __ pc(); + + // same as in generate_catch_exception()! + + // stub code + // save ra and fp + __ enter(); + // I think 14 is the max gap between argument and callee saved register + __ daddiu(SP, SP, total_off * wordSize); + __ sd(BCP, FP, BCP_off * wordSize); + __ sd(LVP, FP, LVP_off * wordSize); + __ sd(TSR, FP, TSR_off * wordSize); + __ sd(S1, FP, S1_off * wordSize); + __ sd(S3, FP, S3_off * wordSize); + __ sd(S4, FP, S4_off * wordSize); + __ sd(S5, FP, S5_off * wordSize); + __ sd(S6, FP, S6_off * wordSize); + __ sd(A0, FP, call_wrapper_off * wordSize); + __ sd(A1, FP, result_off * wordSize); + __ sd(A2, FP, result_type_off * wordSize); + __ sd(A7, FP, thread_off * wordSize); + __ sd(GP, FP, GP_off * wordSize); + + __ set64(GP, (long)Interpreter::dispatch_table(itos)); + +#ifdef OPT_THREAD + __ move(TREG, A7); +#endif + //add for compressedoops + __ reinit_heapbase(); + +#ifdef ASSERT + // make sure we have no pending exceptions + { + Label L; + __ ld(AT, A7, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + /* FIXME: I do not know how to realize stop in mips arch, do it in the future */ + __ stop("StubRoutines::call_stub: entered with pending exception"); + __ bind(L); + } +#endif + + // pass parameters if any + // A5: parameter + // A6: parameter_size + // T0: parameter_size_tmp(--) + // T2: offset(++) + // T3: tmp + Label parameters_done; + // judge if the parameter_size equals 0 + __ beq(A6, R0, parameters_done); + __ delayed()->nop(); + __ dsll(AT, A6, Interpreter::logStackElementSize); + __ dsubu(SP, SP, AT); + __ move(AT, -StackAlignmentInBytes); + __ andr(SP, SP , AT); + // Copy Java parameters in reverse order (receiver last) + // Note that the argument order is inverted in the process + Label loop; + __ move(T0, A6); + __ move(T2, R0); + __ bind(loop); + + // get parameter + __ dsll(T3, T0, LogBytesPerWord); + __ daddu(T3, T3, A5); + __ ld(AT, T3, -wordSize); + __ dsll(T3, T2, LogBytesPerWord); + __ daddu(T3, T3, SP); + __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0)); + __ daddiu(T2, T2, 1); + __ daddiu(T0, T0, -1); + __ bne(T0, R0, loop); + __ delayed()->nop(); + // advance to next parameter + + // call Java function + __ bind(parameters_done); + + // receiver in V0, methodOop in Rmethod + + __ move(Rmethod, A3); + __ move(Rsender, SP); //set sender sp + __ jalr(A4); + __ delayed()->nop(); + return_address = __ pc(); + + Label common_return; + __ bind(common_return); + + // store result depending on type + // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) + __ ld(T0, FP, result_off * wordSize); // result --> T0 + Label is_long, is_float, is_double, exit; + __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2 + __ daddiu(T3, T2, (-1) * T_LONG); + __ beq(T3, R0, is_long); + __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT); + __ beq(T3, R0, is_float); + __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE); + __ beq(T3, R0, is_double); + __ delayed()->nop(); + + // handle T_INT case + __ sd(V0, T0, 0 * wordSize); + __ bind(exit); + + // restore + __ ld(BCP, FP, BCP_off * wordSize); + __ ld(LVP, FP, LVP_off * wordSize); + __ ld(GP, FP, GP_off * wordSize); + __ ld(TSR, FP, TSR_off * wordSize); + + __ ld(S1, FP, S1_off * wordSize); + __ ld(S3, FP, S3_off * wordSize); + __ ld(S4, FP, S4_off * wordSize); + __ ld(S5, FP, S5_off * wordSize); + __ ld(S6, FP, S6_off * wordSize); + + __ leave(); + + // return + __ jr(RA); + __ delayed()->nop(); + + // handle return types different from T_INT + __ bind(is_long); + __ sd(V0, T0, 0 * wordSize); + __ b(exit); + __ delayed()->nop(); + + __ bind(is_float); + __ swc1(F0, T0, 0 * wordSize); + __ b(exit); + __ delayed()->nop(); + + __ bind(is_double); + __ sdc1(F0, T0, 0 * wordSize); + __ b(exit); + __ delayed()->nop(); + //FIXME, 1.6 mips version add operation of fpu here + StubRoutines::gs2::set_call_stub_compiled_return(__ pc()); + __ b(common_return); + __ delayed()->nop(); + return start; + } + + // Return point for a Java call if there's an exception thrown in + // Java code. The exception is caught and transformed into a + // pending exception stored in JavaThread that can be tested from + // within the VM. + // + // Note: Usually the parameters are removed by the callee. In case + // of an exception crossing an activation frame boundary, that is + // not the case if the callee is compiled code => need to setup the + // sp. + // + // V0: exception oop + + address generate_catch_exception() { + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + address start = __ pc(); + + Register thread = TREG; + + // get thread directly +#ifndef OPT_THREAD + __ ld(thread, FP, thread_off * wordSize); +#endif + +#ifdef ASSERT + // verify that threads correspond + { Label L; + __ get_thread(T8); + __ beq(T8, thread, L); + __ delayed()->nop(); + __ stop("StubRoutines::catch_exception: threads must correspond"); + __ bind(L); + } +#endif + // set pending exception + __ verify_oop(V0); + __ sd(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ li(AT, (long)__FILE__); + __ sd(AT, thread, in_bytes(Thread::exception_file_offset ())); + __ li(AT, (long)__LINE__); + __ sd(AT, thread, in_bytes(Thread::exception_line_offset ())); + + // complete return to VM + assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); + __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); + __ delayed()->nop(); + + return start; + } + + // Continuation point for runtime calls returning with a pending + // exception. The pending exception check happened in the runtime + // or native call stub. The pending exception in Thread is + // converted into a Java-level exception. + // + // Contract with Java-level exception handlers: + // V0: exception + // V1: throwing pc + // + // NOTE: At entry of this stub, exception-pc must be on stack !! + + address generate_forward_exception() { + StubCodeMark mark(this, "StubRoutines", "forward exception"); + //Register thread = TREG; + Register thread = TREG; + address start = __ pc(); + + // Upon entry, the sp points to the return address returning into + // Java (interpreted or compiled) code; i.e., the return address + // throwing pc. + // + // Arguments pushed before the runtime call are still on the stack + // but the exception handler will reset the stack pointer -> + // ignore them. A potential result in registers can be ignored as + // well. + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif +#ifdef ASSERT + // make sure this code is only executed if there is a pending exception + { + Label L; + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } +#endif + + // compute exception handler into T9 + __ ld(A1, SP, 0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); + __ move(T9, V0); + __ pop(V1); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld(V0, thread, in_bytes(Thread::pending_exception_offset())); + __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); + +#ifdef ASSERT + // make sure exception is set + { + Label L; + __ bne(V0, R0, L); + __ delayed()->nop(); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + + // continue at exception handler (return address removed) + // V0: exception + // T9: exception handler + // V1: throwing pc + __ verify_oop(V0); + __ jr(T9); + __ delayed()->nop(); + + return start; + } + + // The following routine generates a subroutine to throw an + // asynchronous UnknownError when an unsafe access gets a fault that + // could not be reasonably prevented by the programmer. (Example: + // SIGBUS/OBJERR.) + address generate_handler_for_unsafe_access() { + StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access"); + address start = __ pc(); + __ push(V0); + __ pushad_except_v0(); // push registers + __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ popad_except_v0(); + __ move(RA, V0); + __ pop(V0); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + // Non-destructive plausibility checks for oops + // + address generate_verify_oop() { + StubCodeMark mark(this, "StubRoutines", "verify_oop"); + address start = __ pc(); + __ reinit_heapbase(); + __ verify_oop_subroutine(); + address end = __ pc(); + return start; + } + + // + // Generate overlap test for array copy stubs + // + // Input: + // A0 - array1 + // A1 - array2 + // A2 - element count + // + + // use T9 as temp + void array_overlap_test(address no_overlap_target, int log2_elem_size) { + int elem_size = 1 << log2_elem_size; + Address::ScaleFactor sf = Address::times_1; + + switch (log2_elem_size) { + case 0: sf = Address::times_1; break; + case 1: sf = Address::times_2; break; + case 2: sf = Address::times_4; break; + case 3: sf = Address::times_8; break; + } + + __ dsll(AT, A2, sf); + __ daddu(AT, AT, A0); + __ daddiu(T9, AT, -elem_size); + __ dsubu(AT, A1, A0); + __ blez(AT, no_overlap_target); + __ delayed()->nop(); + __ dsubu(AT, A1, T9); + __ bgtz(AT, no_overlap_target); + __ delayed()->nop(); + + // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target + Label L; + __ bgez(A0, L); + __ delayed()->nop(); + __ bgtz(A1, no_overlap_target); + __ delayed()->nop(); + __ bind(L); + + } + + // + // Generate store check for array + // + // Input: + // T0 - starting address + // T1 - element count + // + // The 2 input registers are overwritten + // + + + void array_store_check(Register tmp) { + assert_different_registers(tmp, AT, T0, T1); + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + Label l_0; + + if (UseConcMarkSweepGC) __ sync(); + + __ set64(tmp, (long)ct->byte_map_base); + + __ dsll(AT, T1, TIMES_OOP); + __ daddu(AT, T0, AT); + __ daddiu(T1, AT, - BytesPerHeapOop); + + __ shr(T0, CardTableModRefBS::card_shift); + __ shr(T1, CardTableModRefBS::card_shift); + + __ dsubu(T1, T1, T0); // end --> cards count + __ bind(l_0); + + __ daddu(AT, tmp, T0); + if (UseLEXT1) { + __ gssbx(R0, AT, T1, 0); + } else { + __ daddu(AT, AT, T1); + __ sb(R0, AT, 0); + } + + __ bgtz(T1, l_0); + __ delayed()->daddiu(T1, T1, - 1); + } + + // Generate code for an array write pre barrier + // + // addr - starting address + // count - element count + // tmp - scratch register + // + // Destroy no registers! + // + void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { + BarrierSet* bs = Universe::heap()->barrier_set(); + switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + // With G1, don't generate the call if we statically know that the target in uninitialized + if (!dest_uninitialized) { + __ pushad(); // push registers + if (count == A0) { + if (addr == A1) { + // exactly backwards!! + //__ xchgptr(c_rarg1, c_rarg0); + __ move(AT, A0); + __ move(A0, A1); + __ move(A1, AT); + } else { + __ move(A1, count); + __ move(A0, addr); + } + } else { + __ move(A0, addr); + __ move(A1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2); + __ popad(); + } + break; + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + case BarrierSet::ModRef: + break; + default: + ShouldNotReachHere(); + + } + } + + // + // Generate code for an array write post barrier + // + // Input: + // start - register containing starting address of destination array + // count - elements count + // scratch - scratch register + // + // The input registers are overwritten. + // + void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) { + assert_different_registers(start, count, scratch, AT); + BarrierSet* bs = Universe::heap()->barrier_set(); + switch (bs->kind()) { + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + __ pushad(); // push registers (overkill) + if (count == A0) { + if (start == A1) { + // exactly backwards!! + //__ xchgptr(c_rarg1, c_rarg0); + __ move(AT, A0); + __ move(A0, A1); + __ move(A1, AT); + } else { + __ move(A1, count); + __ move(A0, start); + } + } else { + __ move(A0, start); + __ move(A1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2); + __ popad(); + } + break; + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + Label L_loop; + const Register end = count; + + if (UseConcMarkSweepGC) __ sync(); + + int64_t disp = (int64_t) ct->byte_map_base; + __ set64(scratch, disp); + + __ lea(end, Address(start, count, TIMES_OOP, 0)); // end == start+count*oop_size + __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive + __ shr(start, CardTableModRefBS::card_shift); + __ shr(end, CardTableModRefBS::card_shift); + __ dsubu(end, end, start); // end --> cards count + + __ daddu(start, start, scratch); + + __ bind(L_loop); + if (UseLEXT1) { + __ gssbx(R0, start, count, 0); + } else { + __ daddu(AT, start, count); + __ sb(R0, AT, 0); + } + __ daddiu(count, count, -1); + __ slt(AT, count, R0); + __ beq(AT, R0, L_loop); + __ delayed()->nop(); + } + break; + default: + ShouldNotReachHere(); + } + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + // Side Effects: + // disjoint_byte_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_byte_copy(). + // + address generate_disjoint_byte_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + + Register tmp1 = T0; + Register tmp2 = T1; + Register tmp3 = T3; + + address start = __ pc(); + + __ push(tmp1); + __ push(tmp2); + __ push(tmp3); + __ move(tmp1, A0); + __ move(tmp2, A1); + __ move(tmp3, A2); + + + Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11; + Label l_debug; + + __ daddiu(AT, tmp3, -9); //why the number is 9 ? + __ blez(AT, l_9); + __ delayed()->nop(); + + if (!aligned) { + __ xorr(AT, tmp1, tmp2); + __ andi(AT, AT, 1); + __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy + __ delayed()->nop(); + + __ andi(AT, tmp1, 1); + __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes + __ delayed()->nop(); + + __ lb(AT, tmp1, 0); + __ daddiu(tmp1, tmp1, 1); + __ sb(AT, tmp2, 0); + __ daddiu(tmp2, tmp2, 1); + __ daddiu(tmp3, tmp3, -1); + __ bind(l_10); + + __ xorr(AT, tmp1, tmp2); + __ andi(AT, AT, 3); + __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy + __ delayed()->nop(); + + // At this point it is guaranteed that both, from and to have the same alignment mod 4. + + // Copy 2 elements if necessary to align to 4 bytes. + __ andi(AT, tmp1, 3); + __ beq(AT, R0, l_2); + __ delayed()->nop(); + + __ lhu(AT, tmp1, 0); + __ daddiu(tmp1, tmp1, 2); + __ sh(AT, tmp2, 0); + __ daddiu(tmp2, tmp2, 2); + __ daddiu(tmp3, tmp3, -2); + __ bind(l_2); + + // At this point the positions of both, from and to, are at least 4 byte aligned. + + // Copy 4 elements at a time. + // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. + __ xorr(AT, tmp1, tmp2); + __ andi(AT, AT, 7); + __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned + __ delayed()->nop(); + + // Copy a 4 elements if necessary to align to 8 bytes. + __ andi(AT, tmp1, 7); + __ beq(AT, R0, l_7); + __ delayed()->nop(); + + __ lw(AT, tmp1, 0); + __ daddiu(tmp3, tmp3, -4); + __ sw(AT, tmp2, 0); + { // FasterArrayCopy + __ daddiu(tmp1, tmp1, 4); + __ daddiu(tmp2, tmp2, 4); + } + } + + __ bind(l_7); + + // Copy 4 elements at a time; either the loads or the stores can + // be unaligned if aligned == false. + + { // FasterArrayCopy + __ daddiu(AT, tmp3, -7); + __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain + __ delayed()->nop(); + + __ bind(l_8); + // For Loongson, there is 128-bit memory access. TODO + __ ld(AT, tmp1, 0); + __ sd(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 8); + __ daddiu(tmp2, tmp2, 8); + __ daddiu(tmp3, tmp3, -8); + __ daddiu(AT, tmp3, -8); + __ bgez(AT, l_8); + __ delayed()->nop(); + } + __ bind(l_6); + + // copy 4 bytes at a time + { // FasterArrayCopy + __ daddiu(AT, tmp3, -3); + __ blez(AT, l_1); + __ delayed()->nop(); + + __ bind(l_3); + __ lw(AT, tmp1, 0); + __ sw(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 4); + __ daddiu(tmp2, tmp2, 4); + __ daddiu(tmp3, tmp3, -4); + __ daddiu(AT, tmp3, -4); + __ bgez(AT, l_3); + __ delayed()->nop(); + + } + + // do 2 bytes copy + __ bind(l_1); + { + __ daddiu(AT, tmp3, -1); + __ blez(AT, l_9); + __ delayed()->nop(); + + __ bind(l_5); + __ lhu(AT, tmp1, 0); + __ daddiu(tmp3, tmp3, -2); + __ sh(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 2); + __ daddiu(tmp2, tmp2, 2); + __ daddiu(AT, tmp3, -2); + __ bgez(AT, l_5); + __ delayed()->nop(); + } + + //do 1 element copy--byte + __ bind(l_9); + __ beq(R0, tmp3, l_4); + __ delayed()->nop(); + + { + __ bind(l_11); + __ lb(AT, tmp1, 0); + __ daddiu(tmp3, tmp3, -1); + __ sb(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 1); + __ daddiu(tmp2, tmp2, 1); + __ daddiu(AT, tmp3, -1); + __ bgez(AT, l_11); + __ delayed()->nop(); + } + + __ bind(l_4); + __ pop(tmp3); + __ pop(tmp2); + __ pop(tmp1); + + __ jr(RA); + __ delayed()->nop(); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // A0 - source array address + // A1 - destination array address + // A2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, + // we let the hardware handle it. The one to eight bytes within words, + // dwords or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_byte_copy(bool aligned, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit; + Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned; + + address nooverlap_target = aligned ? + StubRoutines::arrayof_jbyte_disjoint_arraycopy() : + StubRoutines::jbyte_disjoint_arraycopy(); + + array_overlap_test(nooverlap_target, 0); + + const Register from = A0; // source array address + const Register to = A1; // destination array address + const Register count = A2; // elements count + const Register end_from = T3; // source array end address + const Register end_to = T0; // destination array end address + const Register end_count = T1; // destination array end address + + __ push(end_from); + __ push(end_to); + __ push(end_count); + __ push(T8); + + // copy from high to low + __ move(end_count, count); + __ daddu(end_from, from, end_count); + __ daddu(end_to, to, end_count); + + // If end_from and end_to has differante alignment, unaligned copy is performed. + __ andi(AT, end_from, 3); + __ andi(T8, end_to, 3); + __ bne(AT, T8, l_copy_byte); + __ delayed()->nop(); + + // First deal with the unaligned data at the top. + __ bind(l_unaligned); + __ beq(end_count, R0, l_exit); + __ delayed()->nop(); + + __ andi(AT, end_from, 3); + __ bne(AT, R0, l_from_unaligned); + __ delayed()->nop(); + + __ andi(AT, end_to, 3); + __ beq(AT, R0, l_4_bytes_aligned); + __ delayed()->nop(); + + __ bind(l_from_unaligned); + __ lb(AT, end_from, -1); + __ sb(AT, end_to, -1); + __ daddiu(end_from, end_from, -1); + __ daddiu(end_to, end_to, -1); + __ daddiu(end_count, end_count, -1); + __ b(l_unaligned); + __ delayed()->nop(); + + // now end_to, end_from point to 4-byte aligned high-ends + // end_count contains byte count that is not copied. + // copy 4 bytes at a time + __ bind(l_4_bytes_aligned); + + __ move(T8, end_count); + __ daddiu(AT, end_count, -3); + __ blez(AT, l_copy_suffix); + __ delayed()->nop(); + + //__ andi(T8, T8, 3); + __ lea(end_from, Address(end_from, -4)); + __ lea(end_to, Address(end_to, -4)); + + __ dsrl(end_count, end_count, 2); + __ align(16); + __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes + __ lw(AT, end_from, 0); + __ sw(AT, end_to, 0); + __ addiu(end_from, end_from, -4); + __ addiu(end_to, end_to, -4); + __ addiu(end_count, end_count, -1); + __ bne(end_count, R0, l_copy_4_bytes_loop); + __ delayed()->nop(); + + __ b(l_copy_suffix); + __ delayed()->nop(); + // copy dwords aligned or not with repeat move + // l_copy_suffix + // copy suffix (0-3 bytes) + __ bind(l_copy_suffix); + __ andi(T8, T8, 3); + __ beq(T8, R0, l_exit); + __ delayed()->nop(); + __ addiu(end_from, end_from, 3); + __ addiu(end_to, end_to, 3); + __ bind(l_copy_suffix_loop); + __ lb(AT, end_from, 0); + __ sb(AT, end_to, 0); + __ addiu(end_from, end_from, -1); + __ addiu(end_to, end_to, -1); + __ addiu(T8, T8, -1); + __ bne(T8, R0, l_copy_suffix_loop); + __ delayed()->nop(); + + __ bind(l_copy_byte); + __ beq(end_count, R0, l_exit); + __ delayed()->nop(); + __ lb(AT, end_from, -1); + __ sb(AT, end_to, -1); + __ daddiu(end_from, end_from, -1); + __ daddiu(end_to, end_to, -1); + __ daddiu(end_count, end_count, -1); + __ b(l_copy_byte); + __ delayed()->nop(); + + __ bind(l_exit); + __ pop(T8); + __ pop(end_count); + __ pop(end_to); + __ pop(end_from); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + // Generate stub for disjoint short copy. If "aligned" is true, the + // "from" and "to" addresses are assumed to be heapword aligned. + // + // Arguments for generated stub: + // from: A0 + // to: A1 + // elm.count: A2 treated as signed + // one element: 2 bytes + // + // Strategy for aligned==true: + // + // If length <= 9: + // 1. copy 1 elements at a time (l_5) + // + // If length > 9: + // 1. copy 4 elements at a time until less than 4 elements are left (l_7) + // 2. copy 2 elements at a time until less than 2 elements are left (l_6) + // 3. copy last element if one was left in step 2. (l_1) + // + // + // Strategy for aligned==false: + // + // If length <= 9: same as aligned==true case + // + // If length > 9: + // 1. continue with step 7. if the alignment of from and to mod 4 + // is different. + // 2. align from and to to 4 bytes by copying 1 element if necessary + // 3. at l_2 from and to are 4 byte aligned; continue with + // 6. if they cannot be aligned to 8 bytes because they have + // got different alignment mod 8. + // 4. at this point we know that both, from and to, have the same + // alignment mod 8, now copy one element if necessary to get + // 8 byte alignment of from and to. + // 5. copy 4 elements at a time until less than 4 elements are + // left; depending on step 3. all load/stores are aligned. + // 6. copy 2 elements at a time until less than 2 elements are + // left. (l_6) + // 7. copy 1 element at a time. (l_5) + // 8. copy last element if one was left in step 6. (l_1) + + address generate_disjoint_short_copy(bool aligned, const char * name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + + Register tmp1 = T0; + Register tmp2 = T1; + Register tmp3 = T3; + Register tmp4 = T8; + Register tmp5 = T9; + Register tmp6 = T2; + + address start = __ pc(); + + __ push(tmp1); + __ push(tmp2); + __ push(tmp3); + __ move(tmp1, A0); + __ move(tmp2, A1); + __ move(tmp3, A2); + + Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14; + Label l_debug; + // don't try anything fancy if arrays don't have many elements + __ daddiu(AT, tmp3, -23); + __ blez(AT, l_14); + __ delayed()->nop(); + // move push here + __ push(tmp4); + __ push(tmp5); + __ push(tmp6); + + if (!aligned) { + __ xorr(AT, A0, A1); + __ andi(AT, AT, 1); + __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen? + __ delayed()->nop(); + + __ xorr(AT, A0, A1); + __ andi(AT, AT, 3); + __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy + __ delayed()->nop(); + + // At this point it is guaranteed that both, from and to have the same alignment mod 4. + + // Copy 1 element if necessary to align to 4 bytes. + __ andi(AT, A0, 3); + __ beq(AT, R0, l_2); + __ delayed()->nop(); + + __ lhu(AT, tmp1, 0); + __ daddiu(tmp1, tmp1, 2); + __ sh(AT, tmp2, 0); + __ daddiu(tmp2, tmp2, 2); + __ daddiu(tmp3, tmp3, -1); + __ bind(l_2); + + // At this point the positions of both, from and to, are at least 4 byte aligned. + + // Copy 4 elements at a time. + // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. + __ xorr(AT, tmp1, tmp2); + __ andi(AT, AT, 7); + __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned + __ delayed()->nop(); + + // Copy a 2-element word if necessary to align to 8 bytes. + __ andi(AT, tmp1, 7); + __ beq(AT, R0, l_7); + __ delayed()->nop(); + + __ lw(AT, tmp1, 0); + __ daddiu(tmp3, tmp3, -2); + __ sw(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 4); + __ daddiu(tmp2, tmp2, 4); + }// end of if (!aligned) + + __ bind(l_7); + // At this time the position of both, from and to, are at least 8 byte aligned. + // Copy 8 elemnets at a time. + // Align to 16 bytes, but only if both from and to have same alignment mod 8. + __ xorr(AT, tmp1, tmp2); + __ andi(AT, AT, 15); + __ bne(AT, R0, l_9); + __ delayed()->nop(); + + // Copy 4-element word if necessary to align to 16 bytes, + __ andi(AT, tmp1, 15); + __ beq(AT, R0, l_10); + __ delayed()->nop(); + + __ ld(AT, tmp1, 0); + __ daddiu(tmp3, tmp3, -4); + __ sd(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 8); + __ daddiu(tmp2, tmp2, 8); + + __ bind(l_10); + + // Copy 8 elements at a time; either the loads or the stores can + // be unalligned if aligned == false + + { // FasterArrayCopy + __ bind(l_11); + // For loongson the 128-bit memory access instruction is gslq/gssq + if (UseLEXT1) { + __ gslq(AT, tmp4, tmp1, 0); + __ gslq(tmp5, tmp6, tmp1, 16); + __ daddiu(tmp1, tmp1, 32); + __ daddiu(tmp2, tmp2, 32); + __ gssq(AT, tmp4, tmp2, -32); + __ gssq(tmp5, tmp6, tmp2, -16); + } else { + __ ld(AT, tmp1, 0); + __ ld(tmp4, tmp1, 8); + __ ld(tmp5, tmp1, 16); + __ ld(tmp6, tmp1, 24); + __ daddiu(tmp1, tmp1, 32); + __ sd(AT, tmp2, 0); + __ sd(tmp4, tmp2, 8); + __ sd(tmp5, tmp2, 16); + __ sd(tmp6, tmp2, 24); + __ daddiu(tmp2, tmp2, 32); + } + __ daddiu(tmp3, tmp3, -16); + __ daddiu(AT, tmp3, -16); + __ bgez(AT, l_11); + __ delayed()->nop(); + } + __ bind(l_9); + + // Copy 4 elements at a time; either the loads or the stores can + // be unaligned if aligned == false. + { // FasterArrayCopy + __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16 + __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain + __ delayed()->nop(); + + __ bind(l_8); + __ ld(AT, tmp1, 0); + __ ld(tmp4, tmp1, 8); + __ ld(tmp5, tmp1, 16); + __ ld(tmp6, tmp1, 24); + __ sd(AT, tmp2, 0); + __ sd(tmp4, tmp2, 8); + __ sd(tmp5, tmp2,16); + __ daddiu(tmp1, tmp1, 32); + __ daddiu(tmp2, tmp2, 32); + __ daddiu(tmp3, tmp3, -16); + __ daddiu(AT, tmp3, -16); + __ bgez(AT, l_8); + __ delayed()->sd(tmp6, tmp2, -8); + } + __ bind(l_6); + + // copy 2 element at a time + { // FasterArrayCopy + __ daddiu(AT, tmp3, -7); + __ blez(AT, l_4); + __ delayed()->nop(); + + __ bind(l_3); + __ lw(AT, tmp1, 0); + __ lw(tmp4, tmp1, 4); + __ lw(tmp5, tmp1, 8); + __ lw(tmp6, tmp1, 12); + __ sw(AT, tmp2, 0); + __ sw(tmp4, tmp2, 4); + __ sw(tmp5, tmp2, 8); + __ daddiu(tmp1, tmp1, 16); + __ daddiu(tmp2, tmp2, 16); + __ daddiu(tmp3, tmp3, -8); + __ daddiu(AT, tmp3, -8); + __ bgez(AT, l_3); + __ delayed()->sw(tmp6, tmp2, -4); + } + + __ bind(l_1); + // do single element copy (8 bit), can this happen? + { // FasterArrayCopy + __ daddiu(AT, tmp3, -3); + __ blez(AT, l_4); + __ delayed()->nop(); + + __ bind(l_5); + __ lhu(AT, tmp1, 0); + __ lhu(tmp4, tmp1, 2); + __ lhu(tmp5, tmp1, 4); + __ lhu(tmp6, tmp1, 6); + __ sh(AT, tmp2, 0); + __ sh(tmp4, tmp2, 2); + __ sh(tmp5, tmp2, 4); + __ daddiu(tmp1, tmp1, 8); + __ daddiu(tmp2, tmp2, 8); + __ daddiu(tmp3, tmp3, -4); + __ daddiu(AT, tmp3, -4); + __ bgez(AT, l_5); + __ delayed()->sh(tmp6, tmp2, -2); + } + // single element + __ bind(l_4); + + __ pop(tmp6); + __ pop(tmp5); + __ pop(tmp4); + + __ bind(l_14); + { // FasterArrayCopy + __ beq(R0, tmp3, l_13); + __ delayed()->nop(); + + __ bind(l_12); + __ lhu(AT, tmp1, 0); + __ sh(AT, tmp2, 0); + __ daddiu(tmp1, tmp1, 2); + __ daddiu(tmp2, tmp2, 2); + __ daddiu(tmp3, tmp3, -1); + __ daddiu(AT, tmp3, -1); + __ bgez(AT, l_12); + __ delayed()->nop(); + } + + __ bind(l_13); + __ pop(tmp3); + __ pop(tmp2); + __ pop(tmp1); + + __ jr(RA); + __ delayed()->nop(); + + __ bind(l_debug); + __ stop("generate_disjoint_short_copy should not reach here"); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we + // let the hardware handle it. The two or four words within dwords + // or qwords that span cache line boundaries will still be loaded + // and stored atomically. + // + address generate_conjoint_short_copy(bool aligned, const char *name) { + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned; + + address nooverlap_target = aligned ? + StubRoutines::arrayof_jshort_disjoint_arraycopy() : + StubRoutines::jshort_disjoint_arraycopy(); + + array_overlap_test(nooverlap_target, 1); + + const Register from = A0; // source array address + const Register to = A1; // destination array address + const Register count = A2; // elements count + const Register end_from = T3; // source array end address + const Register end_to = T0; // destination array end address + const Register end_count = T1; // destination array end address + + __ push(end_from); + __ push(end_to); + __ push(end_count); + __ push(T8); + + // copy from high to low + __ move(end_count, count); + __ sll(AT, end_count, Address::times_2); + __ daddu(end_from, from, AT); + __ daddu(end_to, to, AT); + + // If end_from and end_to has differante alignment, unaligned copy is performed. + __ andi(AT, end_from, 3); + __ andi(T8, end_to, 3); + __ bne(AT, T8, l_copy_short); + __ delayed()->nop(); + + // First deal with the unaligned data at the top. + __ bind(l_unaligned); + __ beq(end_count, R0, l_exit); + __ delayed()->nop(); + + __ andi(AT, end_from, 3); + __ bne(AT, R0, l_from_unaligned); + __ delayed()->nop(); + + __ andi(AT, end_to, 3); + __ beq(AT, R0, l_4_bytes_aligned); + __ delayed()->nop(); + + // Copy 1 element if necessary to align to 4 bytes. + __ bind(l_from_unaligned); + __ lhu(AT, end_from, -2); + __ sh(AT, end_to, -2); + __ daddiu(end_from, end_from, -2); + __ daddiu(end_to, end_to, -2); + __ daddiu(end_count, end_count, -1); + __ b(l_unaligned); + __ delayed()->nop(); + + // now end_to, end_from point to 4-byte aligned high-ends + // end_count contains byte count that is not copied. + // copy 4 bytes at a time + __ bind(l_4_bytes_aligned); + + __ daddiu(AT, end_count, -1); + __ blez(AT, l_copy_short); + __ delayed()->nop(); + + __ lw(AT, end_from, -4); + __ sw(AT, end_to, -4); + __ addiu(end_from, end_from, -4); + __ addiu(end_to, end_to, -4); + __ addiu(end_count, end_count, -2); + __ b(l_4_bytes_aligned); + __ delayed()->nop(); + + // copy 1 element at a time + __ bind(l_copy_short); + __ beq(end_count, R0, l_exit); + __ delayed()->nop(); + __ lhu(AT, end_from, -2); + __ sh(AT, end_to, -2); + __ daddiu(end_from, end_from, -2); + __ daddiu(end_to, end_to, -2); + __ daddiu(end_count, end_count, -1); + __ b(l_copy_short); + __ delayed()->nop(); + + __ bind(l_exit); + __ pop(T8); + __ pop(end_count); + __ pop(end_to); + __ pop(end_from); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { + Label l_3, l_4, l_5, l_6, l_7; + StubCodeMark mark(this, "StubRoutines", name); + + __ align(CodeEntryAlignment); + address start = __ pc(); + __ push(T3); + __ push(T0); + __ push(T1); + __ push(T8); + __ push(T9); + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + + if (is_oop) { + gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized); + } + + if(!aligned) { + __ xorr(AT, T3, T0); + __ andi(AT, AT, 7); + __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time + __ delayed()->nop(); + + __ andi(AT, T3, 7); + __ beq(AT, R0, l_6); //copy 2 elements each time + __ delayed()->nop(); + + __ lw(AT, T3, 0); + __ daddiu(T1, T1, -1); + __ sw(AT, T0, 0); + __ daddiu(T3, T3, 4); + __ daddiu(T0, T0, 4); + } + + { + __ bind(l_6); + __ daddiu(AT, T1, -1); + __ blez(AT, l_5); + __ delayed()->nop(); + + __ bind(l_7); + __ ld(AT, T3, 0); + __ sd(AT, T0, 0); + __ daddiu(T3, T3, 8); + __ daddiu(T0, T0, 8); + __ daddiu(T1, T1, -2); + __ daddiu(AT, T1, -2); + __ bgez(AT, l_7); + __ delayed()->nop(); + } + + __ bind(l_5); + __ beq(T1, R0, l_4); + __ delayed()->nop(); + + __ align(16); + __ bind(l_3); + __ lw(AT, T3, 0); + __ sw(AT, T0, 0); + __ addiu(T3, T3, 4); + __ addiu(T0, T0, 4); + __ addiu(T1, T1, -1); + __ bne(T1, R0, l_3); + __ delayed()->nop(); + + // exit + __ bind(l_4); + if (is_oop) { + gen_write_ref_array_post_barrier(A1, A2, T1); + } + __ pop(T9); + __ pop(T8); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { + Label l_2, l_4; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + address nooverlap_target; + + if (is_oop) { + nooverlap_target = aligned ? + StubRoutines::arrayof_oop_disjoint_arraycopy() : + StubRoutines::oop_disjoint_arraycopy(); + } else { + nooverlap_target = aligned ? + StubRoutines::arrayof_jint_disjoint_arraycopy() : + StubRoutines::jint_disjoint_arraycopy(); + } + + array_overlap_test(nooverlap_target, 2); + + if (is_oop) { + gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized); + } + + __ push(T3); + __ push(T0); + __ push(T1); + __ push(T8); + __ push(T9); + + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + + // T3: source array address + // T0: destination array address + // T1: element count + + __ sll(AT, T1, Address::times_4); + __ addu(AT, T3, AT); + __ daddiu(T3, AT, -4); + __ sll(AT, T1, Address::times_4); + __ addu(AT, T0, AT); + __ daddiu(T0, AT, -4); + + __ beq(T1, R0, l_4); + __ delayed()->nop(); + + __ align(16); + __ bind(l_2); + __ lw(AT, T3, 0); + __ sw(AT, T0, 0); + __ addiu(T3, T3, -4); + __ addiu(T0, T0, -4); + __ addiu(T1, T1, -1); + __ bne(T1, R0, l_2); + __ delayed()->nop(); + + __ bind(l_4); + if (is_oop) { + gen_write_ref_array_post_barrier(A1, A2, T1); + } + __ pop(T9); + __ pop(T8); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { + Label l_3, l_4; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + if (is_oop) { + gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized); + } + + __ push(T3); + __ push(T0); + __ push(T1); + __ push(T8); + __ push(T9); + + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + + // T3: source array address + // T0: destination array address + // T1: element count + + __ beq(T1, R0, l_4); + __ delayed()->nop(); + + __ align(16); + __ bind(l_3); + __ ld(AT, T3, 0); + __ sd(AT, T0, 0); + __ addiu(T3, T3, 8); + __ addiu(T0, T0, 8); + __ addiu(T1, T1, -1); + __ bne(T1, R0, l_3); + __ delayed()->nop(); + + // exit + __ bind(l_4); + if (is_oop) { + gen_write_ref_array_post_barrier(A1, A2, T1); + } + __ pop(T9); + __ pop(T8); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { + Label l_2, l_4; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + address nooverlap_target; + + if (is_oop) { + nooverlap_target = aligned ? + StubRoutines::arrayof_oop_disjoint_arraycopy() : + StubRoutines::oop_disjoint_arraycopy(); + } else { + nooverlap_target = aligned ? + StubRoutines::arrayof_jlong_disjoint_arraycopy() : + StubRoutines::jlong_disjoint_arraycopy(); + } + + array_overlap_test(nooverlap_target, 3); + + if (is_oop) { + gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized); + } + + __ push(T3); + __ push(T0); + __ push(T1); + __ push(T8); + __ push(T9); + + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + + __ sll(AT, T1, Address::times_8); + __ addu(AT, T3, AT); + __ daddiu(T3, AT, -8); + __ sll(AT, T1, Address::times_8); + __ addu(AT, T0, AT); + __ daddiu(T0, AT, -8); + + __ beq(T1, R0, l_4); + __ delayed()->nop(); + + __ align(16); + __ bind(l_2); + __ ld(AT, T3, 0); + __ sd(AT, T0, 0); + __ addiu(T3, T3, -8); + __ addiu(T0, T0, -8); + __ addiu(T1, T1, -1); + __ bne(T1, R0, l_2); + __ delayed()->nop(); + + // exit + __ bind(l_4); + if (is_oop) { + gen_write_ref_array_post_barrier(A1, A2, T1); + } + __ pop(T9); + __ pop(T8); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + //FIXME + address generate_disjoint_long_copy(bool aligned, const char *name) { + Label l_1, l_2; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + __ push(T3); + __ push(T0); + __ push(T1); + __ b(l_2); + __ delayed()->nop(); + __ align(16); + __ bind(l_1); + __ ld(AT, T3, 0); + __ sd (AT, T0, 0); + __ addiu(T3, T3, 8); + __ addiu(T0, T0, 8); + __ bind(l_2); + __ addiu(T1, T1, -1); + __ bgez(T1, l_1); + __ delayed()->nop(); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + + address generate_conjoint_long_copy(bool aligned, const char *name) { + Label l_1, l_2; + StubCodeMark mark(this, "StubRoutines", name); + __ align(CodeEntryAlignment); + address start = __ pc(); + address nooverlap_target = aligned ? + StubRoutines::arrayof_jlong_disjoint_arraycopy() : + StubRoutines::jlong_disjoint_arraycopy(); + array_overlap_test(nooverlap_target, 3); + + __ push(T3); + __ push(T0); + __ push(T1); + + __ move(T1, A2); + __ move(T3, A0); + __ move(T0, A1); + __ sll(AT, T1, Address::times_8); + __ addu(AT, T3, AT); + __ daddiu(T3, AT, -8); + __ sll(AT, T1, Address::times_8); + __ addu(AT, T0, AT); + __ daddiu(T0, AT, -8); + + __ b(l_2); + __ delayed()->nop(); + __ align(16); + __ bind(l_1); + __ ld(AT, T3, 0); + __ sd (AT, T0, 0); + __ addiu(T3, T3, -8); + __ addiu(T0, T0,-8); + __ bind(l_2); + __ addiu(T1, T1, -1); + __ bgez(T1, l_1); + __ delayed()->nop(); + __ pop(T1); + __ pop(T0); + __ pop(T3); + __ jr(RA); + __ delayed()->nop(); + return start; + } + + void generate_arraycopy_stubs() { + if (UseCompressedOops) { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, + "oop_disjoint_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, + "oop_arraycopy"); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, + "oop_disjoint_arraycopy_uninit", true); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, + "oop_arraycopy_uninit", true); + } else { + StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, + "oop_disjoint_arraycopy"); + StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, + "oop_arraycopy"); + StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, + "oop_disjoint_arraycopy_uninit", true); + StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, + "oop_arraycopy_uninit", true); + } + + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); + StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); + StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); + + StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); + StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); + StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); + + // We don't generate specialized code for HeapWord-aligned source + // arrays, so just use the code we've already generated + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; + StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; + + StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; + StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; + + StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; + StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; + + StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; + StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; + + StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; + StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; + + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; + StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; + } + + // add a function to implement SafeFetch32 and SafeFetchN + void generate_safefetch(const char* name, int size, address* entry, + address* fault_pc, address* continuation_pc) { + // safefetch signatures: + // int SafeFetch32(int* adr, int errValue); + // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); + // + // arguments: + // A0 = adr + // A1 = errValue + // + // result: + // PPC_RET = *adr or errValue + + StubCodeMark mark(this, "StubRoutines", name); + + // Entry point, pc or function descriptor. + *entry = __ pc(); + + // Load *adr into A1, may fault. + *fault_pc = __ pc(); + switch (size) { + case 4: + // int32_t + __ lw(A1, A0, 0); + break; + case 8: + // int64_t + __ ld(A1, A0, 0); + break; + default: + ShouldNotReachHere(); + } + + // return errValue or *adr + *continuation_pc = __ pc(); + __ addu(V0,A1,R0); + __ jr(RA); + __ delayed()->nop(); + } + + +#undef __ +#define __ masm-> + + // Continuation point for throwing of implicit exceptions that are + // not handled in the current activation. Fabricates an exception + // oop and initiates normal exception dispatching in this + // frame. Since we need to preserve callee-saved values (currently + // only for C2, but done for C1 as well) we need a callee-saved oop + // map and therefore have to make these stubs into RuntimeStubs + // rather than BufferBlobs. If the compiler needs all registers to + // be preserved between the fault point and the exception handler + // then it must assume responsibility for that in + // AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other + // implicit exceptions (e.g., NullPointerException or + // AbstractMethodError on entry) are either at call sites or + // otherwise assume that stack unwinding will be initiated, so + // caller saved registers were assumed volatile in the compiler. + address generate_throw_exception(const char* name, + address runtime_entry, + bool restore_saved_exception_pc) { + // Information about frame layout at time of blocking runtime call. + // Note that we only have to preserve callee-saved registers since + // the compilers are responsible for supplying a continuation point + // if they expect all registers to be preserved. + enum layout { + thread_off, // last_java_sp + S7_off, // callee saved register sp + 1 + S6_off, // callee saved register sp + 2 + S5_off, // callee saved register sp + 3 + S4_off, // callee saved register sp + 4 + S3_off, // callee saved register sp + 5 + S2_off, // callee saved register sp + 6 + S1_off, // callee saved register sp + 7 + S0_off, // callee saved register sp + 8 + FP_off, + ret_address, + framesize + }; + + int insts_size = 2048; + int locs_size = 32; + + // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, + // NULL, NULL, NULL, false, NULL, name, false); + CodeBuffer code (name , insts_size, locs_size); + OopMapSet* oop_maps = new OopMapSet(); + MacroAssembler* masm = new MacroAssembler(&code); + + address start = __ pc(); + + // This is an inlined and slightly modified version of call_VM + // which has the ability to fetch the return PC out of + // thread-local storage and also sets up last_Java_sp slightly + // differently than the real call_VM +#ifndef OPT_THREAD + Register java_thread = TREG; + __ get_thread(java_thread); +#else + Register java_thread = TREG; +#endif + if (restore_saved_exception_pc) { + __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); + } + + __ enter(); // required for proper stackwalking of RuntimeStub frame + + __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog + __ sd(S0, SP, S0_off * wordSize); + __ sd(S1, SP, S1_off * wordSize); + __ sd(S2, SP, S2_off * wordSize); + __ sd(S3, SP, S3_off * wordSize); + __ sd(S4, SP, S4_off * wordSize); + __ sd(S5, SP, S5_off * wordSize); + __ sd(S6, SP, S6_off * wordSize); + __ sd(S7, SP, S7_off * wordSize); + + int frame_complete = __ pc() - start; + // push java thread (becomes first argument of C function) + __ sd(java_thread, SP, thread_off * wordSize); + if (java_thread != A0) + __ move(A0, java_thread); + + // Set up last_Java_sp and last_Java_fp + __ set_last_Java_frame(java_thread, SP, FP, NULL); + // Align stack + __ set64(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + + __ relocate(relocInfo::internal_pc_type); + { + intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; + __ patchable_set48(AT, save_pc); + } + __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); + + // Call runtime + __ call(runtime_entry); + __ delayed()->nop(); + // Generate oop map + OopMap* map = new OopMap(framesize, 0); + oop_maps->add_gc_map(__ offset(), map); + + // restore the thread (cannot use the pushed argument since arguments + // may be overwritten by C code generated by an optimizing compiler); + // however can use the register value directly if it is callee saved. +#ifndef OPT_THREAD + __ get_thread(java_thread); +#endif + + __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); + __ reset_last_Java_frame(java_thread, true); + + // Restore callee save registers. This must be done after resetting the Java frame + __ ld(S0, SP, S0_off * wordSize); + __ ld(S1, SP, S1_off * wordSize); + __ ld(S2, SP, S2_off * wordSize); + __ ld(S3, SP, S3_off * wordSize); + __ ld(S4, SP, S4_off * wordSize); + __ ld(S5, SP, S5_off * wordSize); + __ ld(S6, SP, S6_off * wordSize); + __ ld(S7, SP, S7_off * wordSize); + + // discard arguments + __ move(SP, FP); // epilog + __ pop(FP); + // check for pending exceptions +#ifdef ASSERT + Label L; + __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ should_not_reach_here(); + __ bind(L); +#endif //ASSERT + __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + __ delayed()->nop(); + RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, + &code, + frame_complete, + framesize, + oop_maps, false); + return stub->entry_point(); + } + + // Initialization + void generate_initial() { + // Generates all stubs and initializes the entry points + + //------------------------------------------------------------- + //----------------------------------------------------------- + // entry points that exist in all platforms + // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller + // than the disadvantage of having a much more complicated generator structure. + // See also comment in stubRoutines.hpp. + StubRoutines::_forward_exception_entry = generate_forward_exception(); + StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); + // is referenced by megamorphic call + StubRoutines::_catch_exception_entry = generate_catch_exception(); + + StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access(); + + StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), false); + } + + void generate_all() { + // Generates all stubs and initializes the entry points + + // These entry points require SharedInfo::stack0 to be set up in + // non-core builds and need to be relocatable, so they each + // fabricate a RuntimeStub internally. + StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); + + StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); + + StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); + + // entry points that are platform specific + + // support for verify_oop (must happen after universe_init) + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); +#ifndef CORE + // arraycopy stubs used by compilers + generate_arraycopy_stubs(); +#endif + + // Safefetch stubs. + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, + &StubRoutines::_safefetch32_fault_pc, + &StubRoutines::_safefetch32_continuation_pc); + generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, + &StubRoutines::_safefetchN_fault_pc, + &StubRoutines::_safefetchN_continuation_pc); + + if (UseMontgomeryMultiplyIntrinsic) { + StubRoutines::_montgomeryMultiply + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); + } + if (UseMontgomerySquareIntrinsic) { + StubRoutines::_montgomerySquare + = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); + } + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + if (all) { + generate_all(); + } else { + generate_initial(); + } + } +}; // end class declaration + +void StubGenerator_generate(CodeBuffer* code, bool all) { + StubGenerator g(code, all); +} diff --git a/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp new file mode 100644 index 00000000000..733a48b8897 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" + +// a description of how to extend it, see the stubRoutines.hpp file. + +//find the last fp value +address StubRoutines::gs2::_call_stub_compiled_return = NULL; diff --git a/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp new file mode 100644 index 00000000000..920c08844e1 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP +#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to +// extend it. + +static bool returns_to_call_stub(address return_pc){ + return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return(); +} + +enum platform_dependent_constants { + code_size1 = 20000, // simply increase if too small (assembler will crash if too small) + code_size2 = 40000 // simply increase if too small (assembler will crash if too small) +}; + +class gs2 { + friend class StubGenerator; + friend class VMStructs; + private: + // If we call compiled code directly from the call stub we will + // need to adjust the return back to the call stub to a specialized + // piece of code that can handle compiled results and cleaning the fpu + // stack. The variable holds that location. + static address _call_stub_compiled_return; + +public: + // Call back points for traps in compiled code + static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } + static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } + +}; + +#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP diff --git a/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp new file mode 100644 index 00000000000..a83c3728f87 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP +#define CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP + + protected: + + void generate_fixed_frame(bool native_call); + + // address generate_asm_interpreter_entry(bool synchronized); + +#endif // CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp new file mode 100644 index 00000000000..204f1b2f216 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP +#define CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP + + + protected: + + // Size of interpreter code. Increase if too small. Interpreter will + // fail with a guarantee ("not enough space for interpreter generation"); + // if too small. + // Run with +PrintInterpreter to get the VM to print out the size. + // Max size with JVMTI + // The sethi() instruction generates lots more instructions when shell + // stack limit is unlimited, so that's why this is much bigger. + const static int InterpreterCodeSize = 500 * K; + +#endif // CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp b/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp new file mode 100644 index 00000000000..0cc5d33070f --- /dev/null +++ b/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp @@ -0,0 +1,2306 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +#ifndef CC_INTERP + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + + // fixed size of an interpreter frame: + int overhead = frame::sender_sp_offset - + frame::interpreter_frame_initial_sp_offset; + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params)*Interpreter::stackElementWords + + monitors * frame::interpreter_frame_monitor_size() + + temps* Interpreter::stackElementWords + extra_args; + + return size; +} + + +const int Interpreter::return_sentinel = 0xfeedbeed; +const int method_offset = frame::interpreter_frame_method_offset * wordSize; +const int bci_offset = frame::interpreter_frame_bcx_offset * wordSize; +const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; + +//----------------------------------------------------------------------------- + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + +#ifdef ASSERT + { + Label L; + __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ subu(T1, T1, SP); // T1 = maximal sp for current fp + __ bgez(T1, L); // check if frame is complete + __ delayed()->nop(); + __ stop("interpreter frame not set up"); + __ bind(L); + } +#endif // ASSERT + // Restore bcp under the assumption that the current frame is still + // interpreted + // FIXME: please change the func restore_bcp + // S0 is the conventional register for bcp + __ restore_bcp(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // throw exception + // FIXME: why do not pass parameter thread ? + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); + return entry; +} + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler( + const char* name) { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + __ li(A1, (long)name); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + __ empty_FPU_stack(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common( + const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + + // expression stack must be empty before entering the VM if an exception happened + __ empty_expression_stack(); + // setup parameters + __ li(A1, (long)name); + if (pass_oop) { + __ call_VM(V0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); + } else { + __ li(A2, (long)message); + __ call_VM(V0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); + } + // throw exception + __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); + __ delayed()->nop(); + return entry; +} + + +address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { + address entry = __ pc(); + // NULL last_sp until next java call + __ sd(R0,Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); + __ dispatch_next(state); + return entry; +} + + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + + address entry = __ pc(); + + // Restore stack bottom in case i2c adjusted stack + __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); + // and NULL it as marker that sp is now tos until next java call + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + __ restore_bcp(); + __ restore_locals(); + + // mdp: T8 + // ret: FSR + // tmp: T9 + if (state == atos) { + Register mdp = T8; + Register tmp = T9; + __ profile_return_type(mdp, FSR, tmp); + } + + + const Register cache = T9; + const Register index = T3; + __ get_cache_and_index_at_bcp(cache, index, 1, index_size); + + const Register flags = cache; + __ dsll(AT, index, Address::times_ptr); + __ daddu(AT, cache, AT); + __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); + __ dsll(AT, flags, Interpreter::stackElementScale()); + __ daddu(SP, SP, AT); + + __ dispatch_next(state, step); + + return entry; +} + + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, + int step) { + address entry = __ pc(); + // NULL last_sp until next java call + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ restore_bcp(); + __ restore_locals(); + // handle exceptions + { + Label L; + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + __ dispatch_next(state, step); + return entry; +} + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : // fall through + case T_LONG : // fall through + case T_VOID : i = 4; break; + case T_FLOAT : i = 5; break; + case T_DOUBLE : i = 6; break; + case T_OBJECT : // fall through + case T_ARRAY : i = 7; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, + "index out of bounds"); + return i; +} + + +address TemplateInterpreterGenerator::generate_result_handler_for( + BasicType type) { + address entry = __ pc(); + switch (type) { + case T_BOOLEAN: __ c2bool(V0); break; + case T_CHAR : __ andi(V0, V0, 0xFFFF); break; + case T_BYTE : __ sign_extend_byte (V0); break; + case T_SHORT : __ sign_extend_short(V0); break; + case T_INT : /* nothing to do */ break; + case T_FLOAT : /* nothing to do */ break; + case T_DOUBLE : /* nothing to do */ break; + case T_OBJECT : + { + __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + __ verify_oop(V0); // and verify it + } + break; + default : ShouldNotReachHere(); + } + __ jr(RA); // return from result handler + __ delayed()->nop(); + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for( + TosState state, + address runtime_entry) { + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + return entry; +} + + + +// Helpers for commoning out cases in the various type of method entries. +// + + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// Rmethod: method +// T3 : invocation counter +// +void InterpreterGenerator::generate_counter_incr( + Label* overflow, + Label* profile_method, + Label* profile_method_continue) { + Label done; + if (TieredCompilation) { + int increment = InvocationCounter::count_increment; + int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; + Label no_mdo; + if (ProfileInterpreter) { + // Are we profiling? + __ ld(FSR, Address(Rmethod, Method::method_data_offset())); + __ beq(FSR, R0, no_mdo); + __ delayed()->nop(); + // Increment counter in the MDO + const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); + __ beq(R0, R0, done); + __ delayed()->nop(); + } + __ bind(no_mdo); + // Increment counter in MethodCounters + const Address invocation_counter(FSR, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + __ get_method_counters(Rmethod, FSR, done); + __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow); + __ bind(done); + } else { + const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + + __ get_method_counters(Rmethod, FSR, done); + + if (ProfileInterpreter) { // %%% Merge this into methodDataOop + __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); + __ incrementl(T9, 1); + __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); + } + // Update standard invocation counters + __ lw(T3, invocation_counter); + __ increment(T3, InvocationCounter::count_increment); + __ sw(T3, invocation_counter); // save invocation count + + __ lw(FSR, backedge_counter); // load backedge counter + __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits + __ andr(FSR, FSR, AT); + + __ daddu(T3, T3, FSR); // add both counters + + if (ProfileInterpreter && profile_method != NULL) { + // Test to see if we should create a method data oop + if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { + __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); + __ lw(AT, AT, 0); + __ slt(AT, T3, AT); + } + + __ bne_far(AT, R0, *profile_method_continue); + __ delayed()->nop(); + + // if no method data exists, go to profile_method + __ test_method_data_pointer(FSR, *profile_method); + } + + if (Assembler::is_simm16(CompileThreshold)) { + __ srl(AT, T3, InvocationCounter::count_shift); + __ slti(AT, AT, CompileThreshold); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); + __ lw(AT, AT, 0); + __ slt(AT, T3, AT); + } + + __ beq_far(AT, R0, *overflow); + __ delayed()->nop(); + __ bind(done); + } +} + +void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { + + // Asm interpreter on entry + // S7 - locals + // S0 - bcp + // Rmethod - method + // FP - interpreter frame + + // On return (i.e. jump to entry_point) + // Rmethod - method + // RA - return address of interpreter caller + // tos - the last parameter to Java method + // SP - sender_sp + + + // the bcp is valid if and only if it's not null + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), R0); + __ ld(Rmethod, FP, method_offset); + // Preserve invariant that S0/S7 contain bcp/locals of sender frame + __ b_far(*do_continue); + __ delayed()->nop(); +} + +// See if we've got enough room on the stack for locals plus overhead. +// The expression stack grows down incrementally, so the normal guard +// page mechanism will work for that. +// +// NOTE: Since the additional locals are also always pushed (wasn't +// obvious in generate_method_entry) so the guard should work for them +// too. +// +// Args: +// T2: number of additional locals this frame needs (what we must check) +// T0: Method* +// +void InterpreterGenerator::generate_stack_overflow_check(void) { + // see if we've got enough room on the stack for locals plus overhead. + // the expression stack grows down incrementally, so the normal guard + // page mechanism will work for that. + // + // Registers live on entry: + // + // T0: Method* + // T2: number of additional locals this frame needs (what we must check) + + // NOTE: since the additional locals are also always pushed (wasn't obvious in + // generate_method_entry) so the guard should work for them too. + // + + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + // total overhead size: entry_size + (saved fp thru expr stack bottom). + // be sure to change this if you add/subtract anything to/from the overhead area + const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) + + entry_size; + + const int page_size = os::vm_page_size(); + + Label after_frame_check; + + // see if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining + // for the additional locals. + __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize); + __ slt(AT, AT, T2); + __ beq(AT, R0, after_frame_check); + __ delayed()->nop(); + + // compute sp as if this were going to be the last frame on + // the stack before the red zone +#ifndef OPT_THREAD + Register thread = T1; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + + // locals + overhead, in bytes + __ dsll(T3, T2, Interpreter::stackElementScale()); + __ daddiu(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 + +#ifdef ASSERT + Label stack_base_okay, stack_size_okay; + // verify that thread stack base is non-zero + __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); + __ bne(AT, R0, stack_base_okay); + __ delayed()->nop(); + __ stop("stack base is zero"); + __ bind(stack_base_okay); + // verify that thread stack size is non-zero + __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); + __ bne(AT, R0, stack_size_okay); + __ delayed()->nop(); + __ stop("stack size is zero"); + __ bind(stack_size_okay); +#endif + + // Add stack base to locals and subtract stack size + __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT + __ daddu(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 + __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT + __ dsubu(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 + + + // add in the redzone and yellow size + __ move(AT, (StackRedPages+StackYellowPages) * page_size); + __ addu(T3, T3, AT); + + // check against the current stack bottom + __ slt(AT, T3, SP); + __ bne(AT, R0, after_frame_check); + __ delayed()->nop(); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + __ move(SP, Rsender); + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); + __ delayed()->nop(); + + // all done with frame size check + __ bind(after_frame_check); +} + +// Allocate monitor and lock method (asm interpreter) +// Rmethod - Method* +void InterpreterGenerator::lock_method(void) { + // synchronize method + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + +#ifdef ASSERT + { Label L; + __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); + __ bne(T0, R0, L); + __ delayed()->nop(); + __ stop("method doesn't need synchronization"); + __ bind(L); + } +#endif // ASSERT + // get synchronization object + { + Label done; + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T2, T0, JVM_ACC_STATIC); + __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0)); + __ beq(T2, R0, done); + __ delayed()->nop(); + __ ld(T0, Rmethod, in_bytes(Method::const_offset())); + __ ld(T0, T0, in_bytes(ConstMethod::constants_offset())); + __ ld(T0, T0, ConstantPool::pool_holder_offset_in_bytes()); + __ ld(T0, T0, mirror_offset); + __ bind(done); + } + // add space for monitor & lock + __ daddiu(SP, SP, (-1) * entry_size); // add space for a monitor entry + __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + // set new monitor block top + __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object + // FIXME: I do not know what lock_object will do and what it will need + __ move(c_rarg0, SP); // object address + __ lock_object(c_rarg0); +} + +// Generate a fixed interpreter frame. This is identical setup for +// interpreted methods and for native methods hence the shared code. +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + + // [ local var m-1 ] <--- sp + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- T0(sender's sp) + // ... + // [ argument word 0 ] <--- S7 + + // initialize fixed part of activation frame + // sender's sp in Rsender + int i = 0; + int frame_size = 9; +#ifndef CORE + ++frame_size; +#endif + __ daddiu(SP, SP, (-frame_size) * wordSize); + __ sd(RA, SP, (frame_size - 1) * wordSize); // save return address + __ sd(FP, SP, (frame_size - 2) * wordSize); // save sender's fp + __ daddiu(FP, SP, (frame_size - 2) * wordSize); + __ sd(Rsender, FP, (-++i) * wordSize); // save sender's sp + __ sd(R0, FP,(-++i) * wordSize); //save last_sp as null + __ sd(LVP, FP, (-++i) * wordSize); // save locals offset + __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop + __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase + __ sd(Rmethod, FP, (-++i) * wordSize); // save Method* +#ifndef CORE + if (ProfileInterpreter) { + Label method_data_continue; + __ ld(AT, Rmethod, in_bytes(Method::method_data_offset())); + __ beq(AT, R0, method_data_continue); + __ delayed()->nop(); + __ daddiu(AT, AT, in_bytes(MethodData::data_offset())); + __ bind(method_data_continue); + __ sd(AT, FP, (-++i) * wordSize); + } else { + __ sd(R0, FP, (-++i) * wordSize); + } +#endif // !CORE + + __ ld(T2, Rmethod, in_bytes(Method::const_offset())); + __ ld(T2, T2, in_bytes(ConstMethod::constants_offset())); + __ ld(T2, T2, ConstantPool::cache_offset_in_bytes()); + __ sd(T2, FP, (-++i) * wordSize); // set constant pool cache + if (native_call) { + __ sd(R0, FP, (-++i) * wordSize); // no bcp + } else { + __ sd(BCP, FP, (-++i) * wordSize); // set bcp + } + __ sd(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom + assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); +} + +// End of helpers + +// Various method entries +//------------------------------------------------------------------------------------------------------------------------ +// +// + +// Call an accessor method (assuming it is resolved, otherwise drop +// into vanilla (slow path) entry +address InterpreterGenerator::generate_accessor_entry(void) { + + // Rmethod: Method* + // V0: receiver (preserve for slow entry into asm interpreter) + // Rsender: senderSP must preserved for slow path, set SP to it on fast path + + address entry_point = __ pc(); + Label xreturn_path; + // do fastpath for resolved accessor methods + if (UseFastAccessorMethods) { + Label slow_path; + __ li(T2, SafepointSynchronize::address_of_state()); + __ lw(AT, T2, 0); + __ daddiu(AT, AT, -(SafepointSynchronize::_not_synchronized)); + __ bne(AT, R0, slow_path); + __ delayed()->nop(); + // Code: _aload_0, _(i|a)getfield, _(i|a)return or any rewrites thereof; + // parameter size = 1 + // Note: We can only use this code if the getfield has been resolved + // and if we don't have a null-pointer exception => check for + // these conditions first and use slow path if necessary. + // Rmethod: method + // V0: receiver + + // [ receiver ] <-- sp + __ ld(T0, SP, 0); + + // check if local 0 != NULL and read field + __ beq(T0, R0, slow_path); + __ delayed()->nop(); + __ ld(T2, Rmethod, in_bytes(Method::const_offset())); + __ ld(T2, T2, in_bytes(ConstMethod::constants_offset())); + // read first instruction word and extract bytecode @ 1 and index @ 2 + __ ld(T3, Rmethod, in_bytes(Method::const_offset())); + __ lw(T3, T3, in_bytes(ConstMethod::codes_offset())); + // Shift codes right to get the index on the right. + // The bytecode fetched looks like <0xb4><0x2a> + __ dsrl(T3, T3, 2 * BitsPerByte); + // FIXME: maybe it's wrong + __ dsll(T3, T3, exact_log2(in_words(ConstantPoolCacheEntry::size()))); + __ ld(T2, T2, ConstantPool::cache_offset_in_bytes()); + + // T0: local 0 + // Rmethod: method + // V0: receiver - do not destroy since it is needed for slow path! + // T1: scratch use which register instead ? + // T3: constant pool cache index + // T2: constant pool cache + // Rsender: send's sp + // check if getfield has been resolved and read constant pool cache entry + // check the validity of the cache entry by testing whether _indices field + // contains Bytecode::_getfield in b1 byte. + assert(in_words(ConstantPoolCacheEntry::size()) == 4, "adjust shift below"); + + __ dsll(T8, T3, Address::times_8); + __ move(T1, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::indices_offset())); + __ daddu(T1, T8, T1); + __ daddu(T1, T1, T2); + __ lw(T1, T1, 0); + __ dsrl(T1, T1, 2 * BitsPerByte); + __ andi(T1, T1, 0xFF); + __ daddiu(T1, T1, (-1) * Bytecodes::_getfield); + __ bne(T1, R0, slow_path); + __ delayed()->nop(); + + // Note: constant pool entry is not valid before bytecode is resolved + + __ move(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + __ daddu(T1, T1, T8); + __ daddu(T1, T1, T2); + __ lw(AT, T1, 0); + + __ move(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ daddu(T1, T1, T8); + __ daddu(T1, T1, T2); + __ lw(T3, T1, 0); + + Label notByte, notBool, notShort, notChar, notObj; + + // Need to differentiate between igetfield, agetfield, bgetfield etc. + // because they are different sizes. + // Use the type from the constant pool cache + __ srl(T3, T3, ConstantPoolCacheEntry::tos_state_shift); + // Make sure we don't need to mask T3 for tosBits after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + // btos = 0 + __ bne(T3, R0, notByte); + __ delayed()->daddu(T0, T0, AT); + + __ lb(V0, T0, 0); + __ b(xreturn_path); + __ delayed()->nop(); + + //ztos + __ bind(notByte); + __ daddiu(T1, T3, (-1) * ztos); + __ bne(T1, R0, notBool); + __ delayed()->nop(); + __ lb(V0, T0, 0); + __ b(xreturn_path); + __ delayed()->nop(); + + //stos + __ bind(notBool); + __ daddiu(T1, T3, (-1) * stos); + __ bne(T1, R0, notShort); + __ delayed()->nop(); + __ lh(V0, T0, 0); + __ b(xreturn_path); + __ delayed()->nop(); + + //ctos + __ bind(notShort); + __ daddiu(T1, T3, (-1) * ctos); + __ bne(T1, R0, notChar); + __ delayed()->nop(); + __ lhu(V0, T0, 0); + __ b(xreturn_path); + __ delayed()->nop(); + + //atos + __ bind(notChar); + __ daddiu(T1, T3, (-1) * atos); + __ bne(T1, R0, notObj); + __ delayed()->nop(); + //add for compressedoops + __ load_heap_oop(V0, Address(T0, 0)); + __ b(xreturn_path); + __ delayed()->nop(); + + //itos + __ bind(notObj); +#ifdef ASSERT + Label okay; + __ daddiu(T1, T3, (-1) * itos); + __ beq(T1, R0, okay); + __ delayed()->nop(); + __ stop("what type is this?"); + __ bind(okay); +#endif // ASSERT + __ lw(V0, T0, 0); + + __ bind(xreturn_path); + + // _ireturn/_areturn + //FIXME + __ move(SP, Rsender);//FIXME, set sender's fp to SP + __ jr(RA); + __ delayed()->nop(); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + (void) generate_normal_entry(false); + } else { + (void) generate_normal_entry(false); + } + + return entry_point; +} + +// Method entry for java.lang.ref.Reference.get. +address InterpreterGenerator::generate_Reference_get_entry(void) { +#if INCLUDE_ALL_GCS + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code for G1 (or any SATB based GC), + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * In the G1 code we do not check whether we need to block for + // a safepoint. If G1 is enabled then we must execute the specialized + // code for Reference.get (except when the Reference object is null) + // so that we can log the value in the referent field with an SATB + // update buffer. + // If the code for the getfield template is modified so that the + // G1 pre-barrier code is executed when the current method is + // Reference.get() then going through the normal method entry + // will be fine. + // * The G1 code can, however, check the receiver object (the instance + // of java.lang.Reference) and jump to the slow path if null. If the + // Reference object is null then we obviously cannot fetch the referent + // and so we don't need to call the G1 pre-barrier. Thus we can use the + // regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_enty. + // + // Rmethod: Method* + + // Rsender: senderSP must preserve for slow path, set SP to it on fast path (Rsender) + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + if (UseG1GC) { + Label slow_path; + + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ld(V0, SP, 0); + + __ beq(V0, R0, slow_path); + __ delayed()->nop(); + + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + + // Load the value of the referent field. + const Address field_address(V0, referent_offset); + __ load_heap_oop(V0, field_address); + + __ push(RA); + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + __ g1_write_barrier_pre(noreg /* obj */, + V0 /* pre_val */, + TREG /* thread */, + Rmethod /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ pop(RA); + + __ jr(RA); + __ delayed()->daddu(SP, Rsender, R0); // set sp to sender sp + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + (void) generate_normal_entry(false); + + return entry; + } +#endif // INCLUDE_ALL_GCS + + // If G1 is not enabled then attempt to go through the accessor entry point + // Reference.get is an accessor + return generate_accessor_entry(); +} + +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +address InterpreterGenerator::generate_native_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls; + // Rsender: sender's sp + // Rmethod: Method* + address entry_point = __ pc(); + +#ifndef CORE + const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset())); +#endif + + // get parameter size (always needed) + // the size in the java stack + __ ld(V0, Rmethod, in_bytes(Method::const_offset())); + __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); + + // native calls don't need the stack size check since they have no expression stack + // and the arguments are already on the stack and we only add a handful of words + // to the stack + + // Rmethod: Method* + // V0: size of parameters + // Layout of frame at this point + // + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + + // for natives the size of locals is zero + + // compute beginning of parameters (S7) + __ dsll(LVP, V0, Address::times_8); + __ daddiu(LVP, LVP, (-1) * wordSize); + __ daddu(LVP, LVP, SP); + + + // add 2 zero-initialized slots for native calls + // 1 slot for native oop temp offset (setup via runtime) + // 1 slot for static native result handler3 (setup via runtime) + __ push2(R0, R0); + + // Layout of frame at this point + // [ method holder mirror ] <--- sp + // [ result type info ] + // [ argument word n-1 ] <--- T0 + // ... + // [ argument word 0 ] <--- LVP + + +#ifndef CORE + if (inc_counter) __ lw(T3, invocation_counter); // (pre-)fetch invocation count +#endif + + // initialize fixed part of activation frame + generate_fixed_frame(true); + // after this function, the layout of frame is as following + // + // [ monitor block top ] <--- sp ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- sender's sp + // ... + // [ argument word 0 ] <--- S7 + + + // make sure method is native & not abstract +#ifdef ASSERT + __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); + { + Label L; + __ andi(AT, T0, JVM_ACC_NATIVE); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ andi(AT, T0, JVM_ACC_ABSTRACT); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation will + // check this flag. + Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ move(AT, (int)true); + __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + +#ifndef CORE + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; + __ bind(continue_after_compile); +#endif // CORE + + bang_stack_shadow_pages(true); + + // reset the _do_not_unlock_if_synchronized flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // after method_lock, the layout of frame is as following + // + // [ monitor entry ] <--- sp + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // start execution +#ifdef ASSERT + { + Label L; + __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ beq(AT, SP, L); + __ delayed()->nop(); + __ stop("broken stack frame setup in interpreter in asm"); + __ bind(L); + } +#endif + + // jvmti/jvmpi support + __ notify_method_entry(); + + // work registers + const Register method = Rmethod; + //const Register thread = T2; + const Register t = T8; + + __ get_method(method); + __ verify_oop(method); + { + Label L, Lstatic; + __ ld(t,method,in_bytes(Method::const_offset())); + __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); + // MIPS n64 ABI: caller does not reserve space for the register auguments. + // A0 and A1(if needed) + __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(AT, AT, JVM_ACC_STATIC); + __ beq(AT, R0, Lstatic); + __ delayed()->nop(); + __ daddiu(t, t, 1); + __ bind(Lstatic); + __ daddiu(t, t, -7); + __ blez(t, L); + __ delayed()->nop(); + __ dsll(t, t, Address::times_8); + __ dsubu(SP, SP, t); + __ bind(L); + } + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP, SP, AT); + __ move(AT, SP); + // [ ] <--- sp + // ... (size of parameters - 8 ) + // [ monitor entry ] + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer (0) ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ method holder mirror ] + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + // get signature handler + { + Label L; + __ ld(T9, method, in_bytes(Method::signature_handler_offset())); + __ bne(T9, R0, L); + __ delayed()->nop(); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), method); + __ get_method(method); + __ ld(T9, method, in_bytes(Method::signature_handler_offset())); + __ bind(L); + } + + // call signature handler + // FIXME: when change codes in InterpreterRuntime, note this point + // from: begin of parameters + assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); + // to: current sp + assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); + // temp: T3 + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); + + __ jalr(T9); + __ delayed()->nop(); + __ get_method(method); + + // + // if native function is static, and its second parameter has type length of double word, + // and first parameter has type length of word, we have to reserve one word + // for the first parameter, according to mips o32 abi. + // if native function is not static, and its third parameter has type length of double word, + // and second parameter has type length of word, we have to reserve one word for the second + // parameter. + // + + + // result handler is in V0 + // set result handler + __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); + +#define FIRSTPARA_SHIFT_COUNT 5 +#define SECONDPARA_SHIFT_COUNT 9 +#define THIRDPARA_SHIFT_COUNT 13 +#define PARA_MASK 0xf + + // pass mirror handle if static call + { + Label L; + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ lw(t, method, in_bytes(Method::access_flags_offset())); + __ andi(AT, t, JVM_ACC_STATIC); + __ beq(AT, R0, L); + __ delayed()->nop(); + + // get mirror + __ ld(t, method, in_bytes(Method:: const_offset())); + __ ld(t, t, in_bytes(ConstMethod::constants_offset())); //?? + __ ld(t, t, ConstantPool::pool_holder_offset_in_bytes()); + __ ld(t, t, mirror_offset); + // copy mirror into activation frame + //__ sw(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + // pass handle to mirror + __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); + __ move(A1, t); + __ bind(L); + } + + // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) + // [ ] | + // ... size of parameters(or +1) | + // [ monitor entry ] | + // ... | + // [ monitor entry ] | + // [ monitor block top ] ( the top monitor entry ) | + // [ byte code pointer (0) ] (if native, bcp = 0) | + // [ constant pool cache ] | + // [ Method* ] | + // [ locals offset ] | + // [ sender's sp ] | + // [ sender's fp ] | + // [ return address ] <--- fp | + // [ method holder mirror ] <----------------------------| + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // get native function entry point + { Label L; + __ ld(T9, method, in_bytes(Method::native_function_offset())); + __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); + __ bne(V1, T9, L); + __ delayed()->nop(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); + __ get_method(method); + __ verify_oop(method); + __ ld(T9, method, in_bytes(Method::native_function_offset())); + __ bind(L); + } + + // pass JNIEnv + // native function in T9 +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset())); + __ move(A0, t); + // [ jni environment ] <--- sp + // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) + // [ ] | + // ... size of parameters | + // [ monitor entry ] | + // ... | + // [ monitor entry ] | + // [ monitor block top ] ( the top monitor entry ) | + // [ byte code pointer (0) ] (if native, bcp = 0) | + // [ constant pool cache ] | + // [ Method* ] | + // [ locals offset ] | + // [ sender's sp ] | + // [ sender's fp ] | + // [ return address ] <--- fp | + // [ method holder mirror ] <----------------------------| + // [ result type info ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- S7 + + // set_last_Java_frame_before_call + __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset())); + // Change state to native (we save the return address in the thread, since it might not + // be pushed on the stack when we do a a stack traversal). It is enough that the pc() + // points into the right code segment. It does not have to be the correct return pc. + __ li(t, __ pc()); + __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset())); + __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); + + // change thread state +#ifdef ASSERT + { + Label L; + __ lw(t, thread, in_bytes(JavaThread::thread_state_offset())); + __ daddiu(t, t, (-1) * _thread_in_Java); + __ beq(t, R0, L); + __ delayed()->nop(); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif + + __ move(t, _thread_in_native); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); + + // call native method + __ jalr(T9); + __ delayed()->nop(); + // result potentially in V0 or F0 + + + // via _last_native_pc and not via _last_jave_sp + // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. + // If the order changes or anything else is added to the stack the code in + // interpreter_frame_result will have to be changed. + //FIXME, should modify here + // save return value to keep the value from being destroyed by other calls + __ push(dtos); + __ push(ltos); + + // change thread state + __ get_thread(thread); + __ move(t, _thread_in_native_trans); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); + + if(os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ sync(); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(thread, A0); + } + } + + // check for safepoint operation in progress and/or pending suspend requests + { Label Continue; + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are + // preserved and correspond to the bcp/locals pointers. So we do a runtime call + // by hand. + // + Label L; + __ li(AT, SafepointSynchronize::address_of_state()); + __ lw(AT, AT, 0); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); + __ beq(AT, R0, Continue); + __ delayed()->nop(); + __ bind(L); + __ move(A0, thread); + __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), + relocInfo::runtime_call_type); + __ delayed()->nop(); + +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + //add for compressedoops + __ reinit_heapbase(); + __ bind(Continue); + } + + // change thread state + __ move(t, _thread_in_Java); + if(os::is_MP()) { + __ sync(); // store release + } + __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); + __ reset_last_Java_frame(thread, true); + + // reset handle block + __ ld(t, thread, in_bytes(JavaThread::active_handles_offset())); + __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes()); + + // If result was an oop then unbox and save it in the frame + { + Label no_oop; + //FIXME, addiu only support 16-bit imeditate + __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); + __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); + __ bne(AT, T0, no_oop); + __ delayed()->nop(); + __ pop(ltos); + // Unbox oop result, e.g. JNIHandles::resolve value. + __ resolve_jobject(V0, thread, T9); + __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); + // keep stack depth as expected by pushing oop which will eventually be discarded + __ push(ltos); + __ bind(no_oop); + } + { + Label no_reguard; + __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); + __ move(AT,(int) JavaThread::stack_guard_yellow_disabled); + __ bne(t, AT, no_reguard); + __ delayed()->nop(); + __ pushad(); + __ move(S5_heapbase, SP); + __ move(AT, -StackAlignmentInBytes); + __ andr(SP, SP, AT); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); + __ delayed()->nop(); + __ move(SP, S5_heapbase); + __ popad(); + //add for compressedoops + __ reinit_heapbase(); + __ bind(no_reguard); + } + // restore BCP to have legal interpreter frame, + // i.e., bci == 0 <=> BCP == code_base() + // Can't call_VM until bcp is within reasonable. + __ get_method(method); // method is junk from thread_in_native to now. + __ verify_oop(method); + __ ld(BCP, method, in_bytes(Method::const_offset())); + __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); + // handle exceptions (exception handling will handle unlocking!) + { + Label L; + __ ld(t, thread, in_bytes(Thread::pending_exception_offset())); + __ beq(t, R0, L); + __ delayed()->nop(); + // Note: At some point we may want to unify this with the code used in + // call_VM_base(); + // i.e., we should use the StubRoutines::forward_exception code. For now this + // doesn't work here because the sp is not correctly set at this point. + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + // do unlocking if necessary + { + Label L; + __ lw(t, method, in_bytes(Method::access_flags_offset())); + __ andi(t, t, JVM_ACC_SYNCHRONIZED); + __ beq(t, R0, L); + // the code below should be shared with interpreter macro assembler implementation + { + Label unlock; + // BasicObjectLock will be first in list, + // since this is a synchronized method. However, need + // to check that the object has not been unlocked by + // an explicit monitorexit bytecode. + __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); + // address of first monitor + + __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ bne(t, R0, unlock); + __ delayed()->nop(); + + // Entry already unlocked, need to throw exception + __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + __ bind(unlock); + __ unlock_object(c_rarg0); + } + __ bind(L); + } + + // jvmti/jvmpi support + // Note: This must happen _after_ handling/throwing any exceptions since + // the exception handler code notifies the runtime of method exits + // too. If this happens before, method entry/exit notifications are + // not properly paired (was bug - gri 11/22/99). + __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); + + // restore potential result in V0, + // call result handler to restore potential result in ST0 & handle result + + __ pop(ltos); + __ pop(dtos); + + __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); + __ jalr(t); + __ delayed()->nop(); + + + // remove activation + __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp + __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address + __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp + __ jr(RA); + __ delayed()->nop(); + +#ifndef CORE + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(&continue_after_compile); + // entry_point is the beginning of this + // function and checks again for compiled code + } +#endif + return entry_point; +} + +// +// Generic interpreted method entry to (asm) interpreter +// +// Layout of frame just at the entry +// +// [ argument word n-1 ] <--- sp +// ... +// [ argument word 0 ] +// assume Method* in Rmethod before call this method. +// prerequisites to the generated stub : the callee Method* in Rmethod +// note you must save the caller bcp before call the generated stub +// +address InterpreterGenerator::generate_normal_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls; + + // Rmethod: Method* + // Rsender: sender 's sp + address entry_point = __ pc(); + + const Address invocation_counter(Rmethod, + in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); + + // get parameter size (always needed) + __ ld(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod + __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); + + // Rmethod: Method* + // V0: size of parameters + // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i + // get size of locals in words to T2 + __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); + // T2 = no. of additional locals, locals include parameters + __ dsubu(T2, T2, V0); + + // see if we've got enough room on the stack for locals plus overhead. + // Layout of frame at this point + // + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] + generate_stack_overflow_check(); + // after this function, the layout of frame does not change + + // compute beginning of parameters (LVP) + __ dsll(LVP, V0, LogBytesPerWord); + __ daddiu(LVP, LVP, (-1) * wordSize); + __ daddu(LVP, LVP, SP); + + // T2 - # of additional locals + // allocate space for locals + // explicitly initialize locals + { + Label exit, loop; + __ beq(T2, R0, exit); + __ delayed()->nop(); + + __ bind(loop); + __ daddiu(SP, SP, (-1) * wordSize); + __ daddiu(T2, T2, -1); // until everything initialized + __ bne(T2, R0, loop); + __ delayed()->sd(R0, SP, 0); // initialize local variables + + __ bind(exit); + } + + // + // [ local var m-1 ] <--- sp + // ... + // [ local var 0 ] + // [ argument word n-1 ] <--- T0? + // ... + // [ argument word 0 ] <--- LVP + + // initialize fixed part of activation frame + + generate_fixed_frame(false); + + + // after this function, the layout of frame is as following + // + // [ monitor block top ] <--- sp ( the top monitor entry ) + // [ byte code pointer ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] <--- fp + // [ return address ] + // [ local var m-1 ] + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + + // make sure method is not native & not abstract +#ifdef ASSERT + __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset())); + { + Label L; + __ andi(T2, AT, JVM_ACC_NATIVE); + __ beq(T2, R0, L); + __ delayed()->nop(); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ andi(T2, AT, JVM_ACC_ABSTRACT); + __ beq(T2, R0, L); + __ delayed()->nop(); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation will + // check this flag. + +#ifndef OPT_THREAD + Register thread = T8; + __ get_thread(thread); +#else + Register thread = TREG; +#endif + __ move(AT, (int)true); + __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + +#ifndef CORE + + // mdp : T8 + // tmp1: T9 + // tmp2: T2 + __ profile_parameters_type(T8, T9, T2); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + Label profile_method; + Label profile_method_continue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, + &profile_method, + &profile_method_continue); + if (ProfileInterpreter) { + __ bind(profile_method_continue); + } + } + + Label continue_after_compile; + __ bind(continue_after_compile); + +#endif // CORE + + bang_stack_shadow_pages(false); + + // reset the _do_not_unlock_if_synchronized flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + // + if (synchronized) { + // Allocate monitor and lock method + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { Label L; + __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); + __ beq(T2, R0, L); + __ delayed()->nop(); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // layout of frame after lock_method + // [ monitor entry ] <--- sp + // ... + // [ monitor entry ] + // [ monitor block top ] ( the top monitor entry ) + // [ byte code pointer ] (if native, bcp = 0) + // [ constant pool cache ] + // [ Method* ] + // [ locals offset ] + // [ sender's sp ] + // [ sender's fp ] + // [ return address ] <--- fp + // [ local var m-1 ] + // ... + // [ local var 0 ] + // [ argumnet word n-1 ] <--- ( sender's sp ) + // ... + // [ argument word 0 ] <--- LVP + + + // start execution +#ifdef ASSERT + { + Label L; + __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ beq(AT, SP, L); + __ delayed()->nop(); + __ stop("broken stack frame setup in interpreter in native"); + __ bind(L); + } +#endif + + // jvmti/jvmpi support + __ notify_method_entry(); + + __ dispatch_next(vtos); + + // invocation counter overflow + if (inc_counter) { + if (ProfileInterpreter) { + // We have decided to profile this method in the interpreter + __ bind(profile_method); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + __ get_method(Rmethod); + __ b(profile_method_continue); + __ delayed()->nop(); + } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(&continue_after_compile); + } + + return entry_point; +} + +// Entry points +// +// Here we generate the various kind of entries into the interpreter. +// The two main entry type are generic bytecode methods and native +// call method. These both come in synchronized and non-synchronized +// versions but the frame layout they create is very similar. The +// other method entry types are really just special purpose entries +// that are really entry and interpretation all in one. These are for +// trivial methods like accessor, empty, or special math methods. +// +// When control flow reaches any of the entry types for the interpreter +// the following holds -> +// +// Arguments: +// +// Rmethod: Method* +// V0: receiver +// +// +// Stack layout immediately at entry +// +// [ parameter n-1 ] <--- sp +// ... +// [ parameter 0 ] +// [ expression stack ] (caller's java expression stack) + +// Assuming that we don't go to one of the trivial specialized entries +// the stack will look like below when we are ready to execute the +// first bytecode (or call the native routine). The register usage +// will be as the template based interpreter expects (see +// interpreter_mips_64.hpp). +// +// local variables follow incoming parameters immediately; i.e. +// the return address is moved to the end of the locals). +// +// [ monitor entry ] <--- sp +// ... +// [ monitor entry ] +// [ monitor block top ] ( the top monitor entry ) +// [ byte code pointer ] (if native, bcp = 0) +// [ constant pool cache ] +// [ Method* ] +// [ locals offset ] +// [ sender's sp ] +// [ sender's fp ] +// [ return address ] <--- fp +// [ local var m-1 ] +// ... +// [ local var 0 ] +// [ argumnet word n-1 ] <--- ( sender's sp ) +// ... +// [ argument word 0 ] <--- S7 + +address AbstractInterpreterGenerator::generate_method_entry( + AbstractInterpreter::MethodKind kind) { + // determine code generation flags + bool synchronized = false; + address entry_point = NULL; + switch (kind) { + case Interpreter::zerolocals : + break; + case Interpreter::zerolocals_synchronized: + synchronized = true; + break; + case Interpreter::native : + entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false); + break; + case Interpreter::native_synchronized : + entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true); + break; + case Interpreter::empty : + entry_point = ((InterpreterGenerator*)this)->generate_empty_entry(); + break; + case Interpreter::accessor : + entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); + break; + case Interpreter::abstract : + entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); + break; + + case Interpreter::java_lang_math_sin : // fall thru + case Interpreter::java_lang_math_cos : // fall thru + case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_log : // fall thru + case Interpreter::java_lang_math_log10 : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : break; + case Interpreter::java_lang_math_abs : // fall thru + case Interpreter::java_lang_math_sqrt : + entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind); break; + case Interpreter::java_lang_ref_reference_get: + entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; + default: + fatal(err_msg("unexpected method kind: %d", kind)); + break; + } + if (entry_point) return entry_point; + + return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized); +} + +// These should never be compiled since the interpreter will prefer +// the compiled version to the intrinsic version. +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + switch (method_kind(m)) { + case Interpreter::java_lang_math_sin : // fall thru + case Interpreter::java_lang_math_cos : // fall thru + case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_abs : // fall thru + case Interpreter::java_lang_math_log : // fall thru + case Interpreter::java_lang_math_log10 : // fall thru + case Interpreter::java_lang_math_sqrt : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : + return false; + default: + return true; + } +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + + const int entry_size = frame::interpreter_frame_monitor_size(); + + // total overhead size: entry_size + (saved fp thru expr stack bottom). + // be sure to change this if you add/subtract anything to/from the overhead area + const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; + + const int stub_code = 6; // see generate_call_stub + // return overhead_size + method->max_locals() + method->max_stack() + stub_code; + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return overhead_size + method_stack + stub_code; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // Note: This calculation must exactly parallel the frame setup + // in AbstractInterpreterGenerator::generate_method_entry. + // If interpreter_frame!=NULL, set up the method, locals, and monitors. + // The frame interpreter_frame, if not NULL, is guaranteed to be the + // right size, as determined by a previous call to this method. + // It is also guaranteed to be walkable even though it is in a skeletal state + + // fixed size of an interpreter frame: + + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; + +#ifdef ASSERT + if (!EnableInvokeDynamic) { + // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences? + // Probably, since deoptimization doesn't work yet. + assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable"); + } + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and interpreter_frame_sender_sp + // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) + // and sender_sp is fp+8 + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); + + //set last sp; + intptr_t* sp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(sp); + // All frames but the initial interpreter frame we fill in have a + // value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + // + if (extra_locals != 0 && + interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); + } + *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() { + // Entry point in previous activation (i.e., if the caller was + // interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + // Restore sp to interpreter_frame_last_sp even though we are going + // to empty the expression stack for the exception processing. + __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); + + // V0: exception + // V1: return address/pc that threw exception + __ restore_bcp(); // BCP points to call/send + __ restore_locals(); + + //add for compressedoops + __ reinit_heapbase(); + // Entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + // expression stack is undefined here + // V0: exception + // BCP: exception bcp + __ verify_oop(V0); + + // expression stack must be empty before entering the VM in case of an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + __ move(A1, V0); + __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); + // V0: exception handler entry point + // V1: preserved exception oop + // S0: bcp for exception handler + __ push(V1); // push exception which is now the only value on the stack + __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) + __ delayed()->nop(); + + // If the exception is not handled in the current frame the frame is removed and + // the exception is rethrown (i.e. exception continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bxi for the instruction which caused + // the exception and the expression stack is empty. Thus, for any VM calls + // at this point, GC will find a legal oop map (with empty expression stack). + + // In current activation + // V0: exception + // BCP: exception bcp + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + __ empty_expression_stack(); + // Set the popframe_processing bit in pending_popframe_condition indicating that we are + // currently handling popframe, so that call_VMs that may happen later do not trigger new + // popframe handling cycles. +#ifndef OPT_THREAD + Register thread = T2; + __ get_thread(T2); +#else + Register thread = TREG; +#endif + __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); + __ ori(T3, T3, JavaThread::popframe_processing_bit); + __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); + +#ifndef CORE + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + __ ld(A0, FP, frame::return_addr_offset * wordSize); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); + __ bne(V0, R0, caller_not_deoptimized); + __ delayed()->nop(); + + // Compute size of arguments for saving when returning to deoptimized caller + __ get_method(A1); + __ verify_oop(A1); + __ ld(A1, A1, in_bytes(Method::const_offset())); + __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); + __ shl(A1, Interpreter::logStackElementSize); + __ restore_locals(); + __ dsubu(A2, LVP, A1); + __ daddiu(A2, A2, wordSize); + // Save these arguments +#ifndef OPT_THREAD + __ get_thread(A0); +#else + __ move(A0, TREG); +#endif + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); + + __ remove_activation(vtos, T9, false, false, false); + + // Inform deoptimization that it is responsible for restoring these arguments +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit); + __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + // Continue in deoptimization handler + __ jr(T9); + __ delayed()->nop(); + + __ bind(caller_not_deoptimized); + } +#endif /* !CORE */ + + __ remove_activation(vtos, T3, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Clear the popframe condition flag + // Finish with popframe handling + // A previous I2C followed by a deoptimization might have moved the + // outgoing arguments further up the stack. PopFrame expects the + // mutations to those outgoing arguments to be preserved and other + // constraints basically require this frame to look exactly as + // though it had previously invoked an interpreted activation with + // no space between the top of the expression stack (current + // last_sp) and the top of stack. Rather than force deopt to + // maintain this kind of invariant all the time we call a small + // fixup routine to move the mutated arguments onto the top of our + // expression stack if necessary. + __ move(T8, SP); + __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + // PC must point into interpreter here + __ set_last_Java_frame(thread, noreg, FP, __ pc()); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); + __ reset_last_Java_frame(thread, true); + // Restore the last_sp and null it out + __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + + + + __ move(AT, JavaThread::popframe_inactive); + __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + + // Finish with popframe handling + __ restore_bcp(); + __ restore_locals(); +#ifndef CORE + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } +#endif // !CORE + // Clear the popframe condition flag +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ move(AT, JavaThread::popframe_inactive); + __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); + +#if INCLUDE_JVMTI + { + Label L_done; + + __ lbu(AT, BCP, 0); + __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic); + __ bne(AT, R0, L_done); + __ delayed()->nop(); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ get_method(T9); + __ ld(T8, LVP, 0); + __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP); + + __ beq(T8, R0, L_done); + __ delayed()->nop(); + + __ sd(T8, SP, 0); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + + __ dispatch_next(vtos); + // end of PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + + // preserve exception over this code sequence + __ pop(T0); +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset())); + // remove the activation (without doing throws on illegalMonitorExceptions) + __ remove_activation(vtos, T3, false, true, false); + // restore exception + __ get_vm_result(T0, thread); + __ verify_oop(T0); + + // In between activations - previous activation type unknown yet + // compute continuation point - the continuation point expects + // the following registers set up: + // + // T0: exception + // T1: return address/pc that threw exception + // SP: expression stack of caller + // FP: fp of caller + __ push2(T0, T3); // save exception and return address + __ move(A1, T3); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); + __ move(T9, V0); // save exception handler + __ pop2(V0, V1); // restore return address and exception + + // Note that an "issuing PC" is actually the next PC after the call + __ jr(T9); // jump to exception handler of caller + __ delayed()->nop(); +} + + +// +// JVMTI ForceEarlyReturn support +// +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + address entry = __ pc(); + __ restore_bcp(); + __ restore_locals(); + __ empty_expression_stack(); + __ empty_FPU_stack(); + __ load_earlyret_value(state); + +#ifndef OPT_THREAD + __ get_thread(TREG); +#endif + __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); + const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset())); + // Clear the earlyret state + __ move(AT, JvmtiThreadState::earlyret_inactive); + __ sw(AT, cond_addr); + __ sync(); + + + __ remove_activation(state, T0, + false, /* throw_monitor_exception */ + false, /* install_monitor_exception */ + true); /* notify_jvmdi */ + __ sync(); + __ jr(T0); + __ delayed()->nop(); + return entry; +} // end of ForceEarlyReturn support + + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop(); + dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop(); + lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop(); + aep =__ pc(); __ push(atos); __ b(L); __ delayed()->nop(); + bep = cep = sep = + iep = __ pc(); __ push(itos); + vep = __ pc(); + __ bind(L); + generate_and_dispatch(t); +} + + +//----------------------------------------------------------------------------- +// Generation of individual instructions + +// helpers for generate_and_dispatch + + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : TemplateInterpreterGenerator(code) { + generate_all(); // down here so it can be "virtual" +} + +//----------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + + // prepare expression stack + __ push(state); // save tosca + + // tos & tos2 + // trace_bytecode need actually 4 args, the last two is tos&tos2 + // this work fine for x86. but mips o32 call convention will store A2-A3 + // to the stack position it think is the tos&tos2 + // when the expression stack have no more than 2 data, error occur. + __ ld(A2, SP, 0); + __ ld(A3, SP, 1 * wordSize); + + // pass arguments & call tracer + __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), RA, A2, A3); + __ move(RA, V0); // make sure return address is not destroyed by pop(state) + + // restore expression stack + __ pop(state); // restore tosca + + // return + __ jr(RA); + __ delayed()->nop(); + + return entry; +} + +void TemplateInterpreterGenerator::count_bytecode() { + __ li(T8, (long)&BytecodeCounter::_counter_value); + __ lw(AT, T8, 0); + __ daddiu(AT, AT, 1); + __ sw(AT, T8, 0); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { + __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); + __ lw(AT, T8, 0); + __ daddiu(AT, AT, 1); + __ sw(AT, T8, 0); +} + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { + __ li(T8, (long)&BytecodePairHistogram::_index); + __ lw(T9, T8, 0); + __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes); + __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); + __ orr(T9, T9, T8); + __ li(T8, (long)&BytecodePairHistogram::_index); + __ sw(T9, T8, 0); + __ dsll(T9, T9, 2); + __ li(T8, (long)BytecodePairHistogram::_counters); + __ daddu(T8, T8, T9); + __ lw(AT, T8, 0); + __ daddiu(AT, AT, 1); + __ sw(AT, T8, 0); +} + + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + + address entry = Interpreter::trace_code(t->tos_in()); + assert(entry != NULL, "entry must have been generated"); + __ call(entry, relocInfo::none); + __ delayed()->nop(); + //add for compressedoops + __ reinit_heapbase(); +} + + +void TemplateInterpreterGenerator::stop_interpreter_at() { + Label L; + __ li(T8, long(&BytecodeCounter::_counter_value)); + __ lw(T8, T8, 0); + __ move(AT, StopInterpreterAt); + __ bne(T8, AT, L); + __ delayed()->nop(); + __ brk(5); + __ delayed()->nop(); + __ bind(L); +} +#endif // !PRODUCT +#endif // ! CC_INTERP diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips.hpp b/hotspot/src/cpu/mips/vm/templateTable_mips.hpp new file mode 100644 index 00000000000..d879e6dc924 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/templateTable_mips.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + + static void prepare_invoke(Register method, Register index, int byte_no, + Bytecodes::Code code); + static void invokevirtual_helper(Register index, Register recv, + Register flags); + static void volatile_barrier(); + + // Helpers + static void index_check(Register array, Register index); + static void index_check_without_pop(Register array, Register index); diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp b/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp new file mode 100644 index 00000000000..7415511b99c --- /dev/null +++ b/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp @@ -0,0 +1,4623 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.inline.hpp" +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "utilities/macros.hpp" + + +#ifndef CC_INTERP + +#define __ _masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +// Platform-dependent initialization + +void TemplateTable::pd_initialize() { + // No mips specific initialization +} + +// Address computation: local variables + +static inline Address iaddress(int n) { + return Address(LVP, Interpreter::local_offset_in_bytes(n)); +} + +static inline Address laddress(int n) { + return iaddress(n + 1); +} + +static inline Address faddress(int n) { + return iaddress(n); +} + +static inline Address daddress(int n) { + return laddress(n); +} + +static inline Address aaddress(int n) { + return iaddress(n); +} +static inline Address haddress(int n) { return iaddress(n + 0); } + + +static inline Address at_sp() { return Address(SP, 0); } +static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } +static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } + +// At top of Java expression stack which may be different than sp(). It +// isn't for category 1 objects. +static inline Address at_tos () { + Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); + return tos; +} + +static inline Address at_tos_p1() { + return Address(SP, Interpreter::expr_offset_in_bytes(1)); +} + +static inline Address at_tos_p2() { + return Address(SP, Interpreter::expr_offset_in_bytes(2)); +} + +static inline Address at_tos_p3() { + return Address(SP, Interpreter::expr_offset_in_bytes(3)); +} + +// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator +Address TemplateTable::at_bcp(int offset) { + assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); + return Address(BCP, offset); +} + +// Miscelaneous helper routines +// Store an oop (or NULL) at the address described by obj. +// If val == noreg this means store a NULL + +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address obj, + Register val, + BarrierSet::Name barrier, + bool precise) { + assert(val == noreg || val == V0, "parameter is just for looks"); + switch (barrier) { +#if INCLUDE_ALL_GCS + case BarrierSet::G1SATBCT: + case BarrierSet::G1SATBCTLogging: + { + // flatten object address if needed + if (obj.index() == noreg && obj.disp() == 0) { + if (obj.base() != T3) { + __ move(T3, obj.base()); + } + } else { + __ lea(T3, obj); + } + __ g1_write_barrier_pre(T3 /* obj */, + T1 /* pre_val */, + TREG /* thread */, + T9 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + if (val == noreg) { + __ store_heap_oop_null(Address(T3, 0)); + } else { + // G1 barrier needs uncompressed oop for region cross check. + Register new_val = val; + if (UseCompressedOops) { + new_val = T1; + __ move(new_val, val); + } + __ store_heap_oop(Address(T3, 0), val); + __ g1_write_barrier_post(T3 /* store_adr */, + new_val /* new_val */, + TREG /* thread */, + T9 /* tmp */, + T1 /* tmp2 */); + } + } + break; +#endif // INCLUDE_ALL_GCS + case BarrierSet::CardTableModRef: + case BarrierSet::CardTableExtension: + { + if (val == noreg) { + __ store_heap_oop_null(obj); + } else { + __ store_heap_oop(obj, val); + // flatten object address if needed + if (!precise || (obj.index() == noreg && obj.disp() == 0)) { + __ store_check(obj.base()); + } else { + __ lea(T9, obj); + __ store_check(T9); + } + } + } + break; + case BarrierSet::ModRef: + case BarrierSet::Other: + if (val == noreg) { + __ store_heap_oop_null(obj); + } else { + __ store_heap_oop(obj, val); + } + break; + default : + ShouldNotReachHere(); + + } +} + +// bytecode folding +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, + Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, + int byte_no) { + if (!RewriteBytecodes) return; + Label L_patch_done; + + switch (bc) { + case Bytecodes::_fast_aputfield: + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_dputfield: + case Bytecodes::_fast_fputfield: + case Bytecodes::_fast_iputfield: + case Bytecodes::_fast_lputfield: + case Bytecodes::_fast_sputfield: + { + // We skip bytecode quickening for putfield instructions when + // the put_code written to the constant pool cache is zero. + // This is required so that every execution of this instruction + // calls out to InterpreterRuntime::resolve_get_put to do + // additional, required work. + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + assert(load_bc_into_bc_reg, "we use bc_reg as temp"); + __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); + __ daddiu(bc_reg, R0, bc); + __ beq(tmp_reg, R0, L_patch_done); + __ delayed()->nop(); + } + break; + default: + assert(byte_no == -1, "sanity"); + // the pair bytecodes have already done the load. + if (load_bc_into_bc_reg) { + __ move(bc_reg, bc); + } + } + + if (JvmtiExport::can_post_breakpoint()) { + Label L_fast_patch; + // if a breakpoint is present we can't rewrite the stream directly + __ lbu(tmp_reg, at_bcp(0)); + __ move(AT, Bytecodes::_breakpoint); + __ bne(tmp_reg, AT, L_fast_patch); + __ delayed()->nop(); + + __ get_method(tmp_reg); + // Let breakpoint table handling rewrite to quicker bytecode + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); + + __ b(L_patch_done); + __ delayed()->nop(); + __ bind(L_fast_patch); + } + +#ifdef ASSERT + Label L_okay; + __ lbu(tmp_reg, at_bcp(0)); + __ move(AT, (int)Bytecodes::java_code(bc)); + __ beq(tmp_reg, AT, L_okay); + __ delayed()->nop(); + __ beq(tmp_reg, bc_reg, L_patch_done); + __ delayed()->nop(); + __ stop("patching the wrong bytecode"); + __ bind(L_okay); +#endif + + // patch bytecode + __ sb(bc_reg, at_bcp(0)); + __ bind(L_patch_done); +} + + +// Individual instructions + +void TemplateTable::nop() { + transition(vtos, vtos); + // nothing to do +} + +void TemplateTable::shouldnotreachhere() { + transition(vtos, vtos); + __ stop("shouldnotreachhere bytecode"); +} + +void TemplateTable::aconst_null() { + transition(vtos, atos); + __ move(FSR, R0); +} + +void TemplateTable::iconst(int value) { + transition(vtos, itos); + if (value == 0) { + __ move(FSR, R0); + } else { + __ move(FSR, value); + } +} + +void TemplateTable::lconst(int value) { + transition(vtos, ltos); + if (value == 0) { + __ move(FSR, R0); + } else { + __ move(FSR, value); + } +} + +void TemplateTable::fconst(int value) { + transition(vtos, ftos); + switch( value ) { + case 0: __ mtc1(R0, FSF); return; + case 1: __ addiu(AT, R0, 1); break; + case 2: __ addiu(AT, R0, 2); break; + default: ShouldNotReachHere(); + } + __ mtc1(AT, FSF); + __ cvt_s_w(FSF, FSF); +} + +void TemplateTable::dconst(int value) { + transition(vtos, dtos); + switch( value ) { + case 0: __ dmtc1(R0, FSF); + return; + case 1: __ daddiu(AT, R0, 1); + __ dmtc1(AT, FSF); + __ cvt_d_w(FSF, FSF); + break; + default: ShouldNotReachHere(); + } +} + +void TemplateTable::bipush() { + transition(vtos, itos); + __ lb(FSR, at_bcp(1)); +} + +void TemplateTable::sipush() { + transition(vtos, itos); + __ lb(FSR, BCP, 1); + __ lbu(AT, BCP, 2); + __ dsll(FSR, FSR, 8); + __ orr(FSR, FSR, AT); +} + +// T1 : tags +// T2 : index +// T3 : cpool +// T8 : tag +void TemplateTable::ldc(bool wide) { + transition(vtos, vtos); + Label call_ldc, notFloat, notClass, Done; + // get index in cpool + if (wide) { + __ get_unsigned_2_byte_index_at_bcp(T2, 1); + } else { + __ lbu(T2, at_bcp(1)); + } + + __ get_cpool_and_tags(T3, T1); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) { + __ gslbx(T1, T1, T2, tags_offset); + } else { + __ daddu(AT, T1, T2); + __ lb(T1, AT, tags_offset); + } + if(os::is_MP()) { + __ sync(); // load acquire + } + //now T1 is the tag + + // unresolved class - get the resolved class + __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass); + __ beq(AT, R0, call_ldc); + __ delayed()->nop(); + + // unresolved class in error (resolution failed) - call into runtime + // so that the same error from first resolution attempt is thrown. + __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); + __ beq(AT, R0, call_ldc); + __ delayed()->nop(); + + // resolved class - need to call vm to get java mirror of the class + __ daddiu(AT, T1, - JVM_CONSTANT_Class); + __ bne(AT, R0, notClass); + __ delayed()->dsll(T2, T2, Address::times_8); + + __ bind(call_ldc); + __ move(A1, wide); + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); + //__ push(atos); + __ daddiu(SP, SP, - Interpreter::stackElementSize); + __ b(Done); + __ delayed()->sd(FSR, SP, 0); // added for performance issue + + __ bind(notClass); + __ daddiu(AT, T1, -JVM_CONSTANT_Float); + __ bne(AT, R0, notFloat); + __ delayed()->nop(); + // ftos + if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { + __ gslwxc1(FSF, T3, T2, base_offset); + } else { + __ daddu(AT, T3, T2); + __ lwc1(FSF, AT, base_offset); + } + //__ push_f(); + __ daddiu(SP, SP, - Interpreter::stackElementSize); + __ b(Done); + __ delayed()->swc1(FSF, SP, 0); + + __ bind(notFloat); +#ifdef ASSERT + { + Label L; + __ daddiu(AT, T1, -JVM_CONSTANT_Integer); + __ beq(AT, R0, L); + __ delayed()->nop(); + __ stop("unexpected tag type in ldc"); + __ bind(L); + } +#endif + // itos JVM_CONSTANT_Integer only + if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { + __ gslwx(FSR, T3, T2, base_offset); + } else { + __ daddu(T0, T3, T2); + __ lw(FSR, T0, base_offset); + } + __ push(itos); + __ bind(Done); +} + +// Fast path for caching oop constants. +void TemplateTable::fast_aldc(bool wide) { + transition(vtos, atos); + + Register result = FSR; + Register tmp = SSR; + int index_size = wide ? sizeof(u2) : sizeof(u1); + + Label resolved; + + // We are resolved if the resolved reference cache entry contains a + // non-null object (String, MethodType, etc.) + assert_different_registers(result, tmp); + __ get_cache_index_at_bcp(tmp, 1, index_size); + __ load_resolved_reference_at_index(result, tmp); + __ bne(result, R0, resolved); + __ delayed()->nop(); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + // first time invocation - must resolve first + int i = (int)bytecode(); + __ move(tmp, i); + __ call_VM(result, entry, tmp); + + __ bind(resolved); + + if (VerifyOops) { + __ verify_oop(result); + } +} + + +// used register: T2, T3, T1 +// T2 : index +// T3 : cpool +// T1 : tag +void TemplateTable::ldc2_w() { + transition(vtos, vtos); + Label Long, Done; + + // get index in cpool + __ get_unsigned_2_byte_index_at_bcp(T2, 1); + + __ get_cpool_and_tags(T3, T1); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type in T1 + if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { + __ gslbx(T1, T1, T2, tags_offset); + } else { + __ daddu(AT, T1, T2); + __ lb(T1, AT, tags_offset); + } + + __ daddiu(AT, T1, - JVM_CONSTANT_Double); + __ bne(AT, R0, Long); + __ delayed()->dsll(T2, T2, Address::times_8); + + // dtos + if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { + __ gsldxc1(FSF, T3, T2, base_offset); + } else { + __ daddu(AT, T3, T2); + __ ldc1(FSF, AT, base_offset); + } + __ push(dtos); + __ b(Done); + __ delayed()->nop(); + + // ltos + __ bind(Long); + if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { + __ gsldx(FSR, T3, T2, base_offset); + } else { + __ daddu(AT, T3, T2); + __ ld(FSR, AT, base_offset); + } + __ push(ltos); + + __ bind(Done); +} + +// we compute the actual local variable address here +// the x86 dont do so for it has scaled index memory access model, we dont have, so do here +void TemplateTable::locals_index(Register reg, int offset) { + __ lbu(reg, at_bcp(offset)); + __ dsll(reg, reg, Address::times_8); + __ dsubu(reg, LVP, reg); +} + +// this method will do bytecode folding of the two form: +// iload iload iload caload +// used register : T2, T3 +// T2 : bytecode +// T3 : folded code +void TemplateTable::iload() { + transition(vtos, itos); + if (RewriteFrequentPairs) { + Label rewrite, done; + // get the next bytecode in T2 + __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ move(AT, Bytecodes::_iload); + __ beq(AT, T2, done); + __ delayed()->nop(); + + __ move(T3, Bytecodes::_fast_iload2); + __ move(AT, Bytecodes::_fast_iload); + __ beq(AT, T2, rewrite); + __ delayed()->nop(); + + // if _caload, rewrite to fast_icaload + __ move(T3, Bytecodes::_fast_icaload); + __ move(AT, Bytecodes::_caload); + __ beq(AT, T2, rewrite); + __ delayed()->nop(); + + // rewrite so iload doesn't check again. + __ move(T3, Bytecodes::_fast_iload); + + // rewrite + // T3 : fast bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, T3, T2, false); + __ bind(done); + } + + // Get the local value into tos + locals_index(T2); + __ lw(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::fast_iload2() { + transition(vtos, itos); + locals_index(T2); + __ lw(FSR, T2, 0); + __ push(itos); + locals_index(T2, 3); + __ lw(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::fast_iload() { + transition(vtos, itos); + locals_index(T2); + __ lw(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::lload() { + transition(vtos, ltos); + locals_index(T2); + __ ld(FSR, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::fload() { + transition(vtos, ftos); + locals_index(T2); + __ lwc1(FSF, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::dload() { + transition(vtos, dtos); + locals_index(T2); + __ ldc1(FSF, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::aload() { + transition(vtos, atos); + locals_index(T2); + __ ld(FSR, T2, 0); +} + +void TemplateTable::locals_index_wide(Register reg) { + __ get_unsigned_2_byte_index_at_bcp(reg, 2); + __ dsll(reg, reg, Address::times_8); + __ dsubu(reg, LVP, reg); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_iload() { + transition(vtos, itos); + locals_index_wide(T2); + __ ld(FSR, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_lload() { + transition(vtos, ltos); + locals_index_wide(T2); + __ ld(FSR, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_fload() { + transition(vtos, ftos); + locals_index_wide(T2); + __ lwc1(FSF, T2, 0); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_dload() { + transition(vtos, dtos); + locals_index_wide(T2); + __ ldc1(FSF, T2, -wordSize); +} + +// used register T2 +// T2 : index +void TemplateTable::wide_aload() { + transition(vtos, atos); + locals_index_wide(T2); + __ ld(FSR, T2, 0); +} + +// we use A2 as the regiser for index, BE CAREFUL! +// we dont use our tge 29 now, for later optimization +void TemplateTable::index_check(Register array, Register index) { + // Pop ptr into array + __ pop_ptr(array); + index_check_without_pop(array, index); +} + +void TemplateTable::index_check_without_pop(Register array, Register index) { + // destroys A2 + // check array + __ null_check(array, arrayOopDesc::length_offset_in_bytes()); + + // sign extend since tos (index) might contain garbage in upper bits + __ sll(index, index, 0); + + // check index + Label ok; + __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); +#ifndef OPT_RANGECHECK + __ sltu(AT, index, AT); + __ bne(AT, R0, ok); + __ delayed()->nop(); + + //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 + if (A2 != index) __ move(A2, index); + __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ delayed()->nop(); + __ bind(ok); +#else + __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); + __ move(A2, index); + __ tgeu(A2, AT, 29); +#endif +} + +void TemplateTable::iaload() { + transition(itos, itos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR: index + __ dsll(FSR, FSR, 2); + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound + __ dsll(AT, AT, 2); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); + + __ gslwle(FSR, FSR, AT); + } else { + index_check(SSR, FSR); + __ dsll(FSR, FSR, 2); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_INT), 8)) { + __ gslwx(FSR, FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); + } else { + __ daddu(FSR, SSR, FSR); + __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); + } + } +} + +void TemplateTable::laload() { + transition(itos, ltos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR: index + __ dsll(FSR, FSR, Address::times_8); + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound + __ dsll(AT, AT, Address::times_8); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); + + __ gsldle(FSR, FSR, AT); + } else { + index_check(SSR, FSR); + __ dsll(AT, FSR, Address::times_8); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_LONG), 8)) { + __ gsldx(FSR, SSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG)); + } else { + __ daddu(AT, SSR, AT); + __ ld(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG)); + } + } +} + +void TemplateTable::faload() { + transition(itos, ftos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR: index + __ shl(FSR, 2); + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound + __ shl(AT, 2); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); + + __ gslwlec1(FSF, FSR, AT); + } else { + index_check(SSR, FSR); + __ shl(FSR, 2); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_FLOAT), 8)) { + __ gslwxc1(FSF, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); + } else { + __ daddu(FSR, SSR, FSR); + __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); + } + } +} + +void TemplateTable::daload() { + transition(itos, dtos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR: index + __ dsll(FSR, FSR, 3); + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound + __ dsll(AT, AT, 3); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); + + __ gsldlec1(FSF, FSR, AT); + } else { + index_check(SSR, FSR); + __ dsll(AT, FSR, 3); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_DOUBLE), 8)) { + __ gsldxc1(FSF, SSR, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); + } else { + __ daddu(AT, SSR, AT); + __ ldc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); + } + } +} + +void TemplateTable::aaload() { + transition(itos, atos); + index_check(SSR, FSR); + __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8); + __ daddu(FSR, SSR, FSR); + //add for compressedoops + __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); +} + +void TemplateTable::baload() { + transition(itos, itos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR:index + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound + + __ gslble(FSR, FSR, AT); + } else { + index_check(SSR, FSR); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_BYTE), 8)) { + __ gslbx(FSR, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); + } else { + __ daddu(FSR, SSR, FSR); + __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); + } + } +} + +void TemplateTable::caload() { + transition(itos, itos); + index_check(SSR, FSR); + __ dsll(FSR, FSR, Address::times_2); + __ daddu(FSR, SSR, FSR); + __ lhu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); +} + +// iload followed by caload frequent pair +// used register : T2 +// T2 : index +void TemplateTable::fast_icaload() { + transition(vtos, itos); + // load index out of locals + locals_index(T2); + __ lw(FSR, T2, 0); + index_check(SSR, FSR); + __ dsll(FSR, FSR, 1); + __ daddu(FSR, SSR, FSR); + __ lhu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); +} + +void TemplateTable::saload() { + transition(itos, itos); + if(UseBoundCheckInstruction) { + __ pop(SSR); //SSR:array FSR: index + __ dsll(FSR, FSR, Address::times_2); + __ daddu(FSR, SSR, FSR); + __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); + + __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound + __ dsll(AT, AT, Address::times_2); + __ daddu(AT, SSR, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT)); + + __ gslhle(FSR, FSR, AT); + } else { + index_check(SSR, FSR); + __ dsll(FSR, FSR, Address::times_2); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_SHORT), 8)) { + __ gslhx(FSR, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); + } else { + __ daddu(FSR, SSR, FSR); + __ lh(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); + } + } +} + +void TemplateTable::iload(int n) { + transition(vtos, itos); + __ lw(FSR, iaddress(n)); +} + +void TemplateTable::lload(int n) { + transition(vtos, ltos); + __ ld(FSR, laddress(n)); +} + +void TemplateTable::fload(int n) { + transition(vtos, ftos); + __ lwc1(FSF, faddress(n)); +} + +void TemplateTable::dload(int n) { + transition(vtos, dtos); + __ ldc1(FSF, laddress(n)); +} + +void TemplateTable::aload(int n) { + transition(vtos, atos); + __ ld(FSR, aaddress(n)); +} + +// used register : T2, T3 +// T2 : bytecode +// T3 : folded code +void TemplateTable::aload_0() { + transition(vtos, atos); + // According to bytecode histograms, the pairs: + // + // _aload_0, _fast_igetfield + // _aload_0, _fast_agetfield + // _aload_0, _fast_fgetfield + // + // occur frequently. If RewriteFrequentPairs is set, the (slow) + // _aload_0 bytecode checks if the next bytecode is either + // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then + // rewrites the current bytecode into a pair bytecode; otherwise it + // rewrites the current bytecode into _fast_aload_0 that doesn't do + // the pair check anymore. + // + // Note: If the next bytecode is _getfield, the rewrite must be + // delayed, otherwise we may miss an opportunity for a pair. + // + // Also rewrite frequent pairs + // aload_0, aload_1 + // aload_0, iload_1 + // These bytecodes with a small amount of code are most profitable + // to rewrite + if (RewriteFrequentPairs) { + Label rewrite, done; + // get the next bytecode in T2 + __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + + // do actual aload_0 + aload(0); + + // if _getfield then wait with rewrite + __ move(AT, Bytecodes::_getfield); + __ beq(AT, T2, done); + __ delayed()->nop(); + + // if _igetfield then reqrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ move(T3, Bytecodes::_fast_iaccess_0); + __ move(AT, Bytecodes::_fast_igetfield); + __ beq(AT, T2, rewrite); + __ delayed()->nop(); + + // if _agetfield then reqrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ move(T3, Bytecodes::_fast_aaccess_0); + __ move(AT, Bytecodes::_fast_agetfield); + __ beq(AT, T2, rewrite); + __ delayed()->nop(); + + // if _fgetfield then reqrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ move(T3, Bytecodes::_fast_faccess_0); + __ move(AT, Bytecodes::_fast_fgetfield); + __ beq(AT, T2, rewrite); + __ delayed()->nop(); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == + Bytecodes::_aload_0, + "fix bytecode definition"); + __ move(T3, Bytecodes::_fast_aload_0); + + // rewrite + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, T3, T2, false); + + __ bind(done); + } else { + aload(0); + } +} + +void TemplateTable::istore() { + transition(itos, vtos); + locals_index(T2); + __ sw(FSR, T2, 0); +} + +void TemplateTable::lstore() { + transition(ltos, vtos); + locals_index(T2); + __ sd(FSR, T2, -wordSize); +} + +void TemplateTable::fstore() { + transition(ftos, vtos); + locals_index(T2); + __ swc1(FSF, T2, 0); +} + +void TemplateTable::dstore() { + transition(dtos, vtos); + locals_index(T2); + __ sdc1(FSF, T2, -wordSize); +} + +void TemplateTable::astore() { + transition(vtos, vtos); + __ pop_ptr(FSR); + locals_index(T2); + __ sd(FSR, T2, 0); +} + +void TemplateTable::wide_istore() { + transition(vtos, vtos); + __ pop_i(FSR); + locals_index_wide(T2); + __ sd(FSR, T2, 0); +} + +void TemplateTable::wide_lstore() { + transition(vtos, vtos); + __ pop_l(FSR); + locals_index_wide(T2); + __ sd(FSR, T2, -wordSize); +} + +void TemplateTable::wide_fstore() { + wide_istore(); +} + +void TemplateTable::wide_dstore() { + wide_lstore(); +} + +void TemplateTable::wide_astore() { + transition(vtos, vtos); + __ pop_ptr(FSR); + locals_index_wide(T2); + __ sd(FSR, T2, 0); +} + +// used register : T2 +void TemplateTable::iastore() { + transition(itos, vtos); + __ pop_i(SSR); // T2: array SSR: index + if(UseBoundCheckInstruction) { + __ pop_ptr(T2); + __ dsll(SSR, SSR, Address::times_4); + __ daddu(SSR, T2, SSR); + __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); // base + + __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); + __ dsll(AT, AT, Address::times_4); + __ daddu(AT, T2, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); //bound + + __ gsswle(FSR, SSR, AT); + } else { + index_check(T2, SSR); // prefer index in SSR + __ dsll(SSR, SSR, Address::times_4); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_INT), 8)) { + __ gsswx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); + } else { + __ daddu(T2, T2, SSR); + __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT)); + } + } +} + + + +// used register T2, T3 +void TemplateTable::lastore() { + transition(ltos, vtos); + __ pop_i (T2); + if(UseBoundCheckInstruction) { + __ pop_ptr(T3); + __ dsll(T2, T2, Address::times_8); + __ daddu(T2, T3, T2); + __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); // base + + __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); + __ dsll(AT, AT, Address::times_8); + __ daddu(AT, T3, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); //bound + + __ gssdle(FSR, T2, AT); + } else { + index_check(T3, T2); + __ dsll(T2, T2, Address::times_8); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_LONG), 8)) { + __ gssdx(FSR, T3, T2, arrayOopDesc::base_offset_in_bytes(T_LONG)); + } else { + __ daddu(T3, T3, T2); + __ sd(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG)); + } + } +} + +// used register T2 +void TemplateTable::fastore() { + transition(ftos, vtos); + __ pop_i(SSR); + if(UseBoundCheckInstruction) { + __ pop_ptr(T2); + __ dsll(SSR, SSR, Address::times_4); + __ daddu(SSR, T2, SSR); + __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); // base + + __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); + __ dsll(AT, AT, Address::times_4); + __ daddu(AT, T2, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); //bound + + __ gsswlec1(FSF, SSR, AT); + } else { + index_check(T2, SSR); + __ dsll(SSR, SSR, Address::times_4); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_FLOAT), 8)) { + __ gsswxc1(FSF, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); + } else { + __ daddu(T2, T2, SSR); + __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); + } + } +} + +// used register T2, T3 +void TemplateTable::dastore() { + transition(dtos, vtos); + __ pop_i (T2); + if(UseBoundCheckInstruction) { + __ pop_ptr(T3); + __ dsll(T2, T2, Address::times_8); + __ daddu(T2, T3, T2); + __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); // base + + __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); + __ dsll(AT, AT, Address::times_8); + __ daddu(AT, T3, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); //bound + + __ gssdlec1(FSF, T2, AT); + } else { + index_check(T3, T2); + __ dsll(T2, T2, Address::times_8); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_DOUBLE), 8)) { + __ gssdxc1(FSF, T3, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); + } else { + __ daddu(T3, T3, T2); + __ sdc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)); + } + } +} + +// used register : T2, T3, T8 +// T2 : array +// T3 : subklass +// T8 : supklass +void TemplateTable::aastore() { + Label is_null, ok_is_subtype, done; + transition(vtos, vtos); + // stack: ..., array, index, value + __ ld(FSR, at_tos()); // Value + __ lw(SSR, at_tos_p1()); // Index + __ ld(T2, at_tos_p2()); // Array + + // index_check(T2, SSR); + index_check_without_pop(T2, SSR); + // do array store check - check for NULL value first + __ beq(FSR, R0, is_null); + __ delayed()->nop(); + + // Move subklass into T3 + //add for compressedoops + __ load_klass(T3, FSR); + // Move superklass into T8 + //add for compressedoops + __ load_klass(T8, T2); + __ ld(T8, Address(T8, ObjArrayKlass::element_klass_offset())); + // Compress array+index*4+12 into a single register. T2 + __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); + __ daddu(T2, T2, AT); + __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + + // Generate subtype check. + // Superklass in T8. Subklass in T3. + __ gen_subtype_check(T8, T3, ok_is_subtype); + // Come here on failure + // object is at FSR + __ jmp(Interpreter::_throw_ArrayStoreException_entry); + __ delayed()->nop(); + // Come here on success + __ bind(ok_is_subtype); + do_oop_store(_masm, Address(T2, 0), FSR, _bs->kind(), true); + __ b(done); + __ delayed()->nop(); + + // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] + __ bind(is_null); + __ profile_null_seen(T9); + __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); + __ daddu(T2, T2, AT); + do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, _bs->kind(), true); + + __ bind(done); + __ daddiu(SP, SP, 3 * Interpreter::stackElementSize); +} + +void TemplateTable::bastore() { + transition(itos, vtos); + __ pop_i(SSR); + if(UseBoundCheckInstruction) { + guarantee(false, "unimplemented yet!"); + __ pop_ptr(T2); + __ daddu(SSR, T2, SSR); + __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // base + + __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); + __ daddu(AT, T2, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound + + __ gssble(FSR, SSR, AT); + } else { + index_check(T2, SSR); + + // Need to check whether array is boolean or byte + // since both types share the bastore bytecode. + __ load_klass(T9, T2); + __ lw(T9, T9, in_bytes(Klass::layout_helper_offset())); + + int diffbit = Klass::layout_helper_boolean_diffbit(); + __ move(AT, diffbit); + + Label L_skip; + __ andr(AT, T9, AT); + __ beq(AT, R0, L_skip); + __ delayed()->nop(); + __ andi(FSR, FSR, 0x1); + __ bind(L_skip); + + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_BYTE), 8)) { + __ gssbx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); + } else { + __ daddu(SSR, T2, SSR); + __ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); + } + } +} + +void TemplateTable::castore() { + transition(itos, vtos); + __ pop_i(SSR); + if(UseBoundCheckInstruction) { + __ pop_ptr(T2); + __ dsll(SSR, SSR, Address::times_2); + __ daddu(SSR, T2, SSR); + __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); // base + + __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); + __ dsll(AT, AT, Address::times_2); + __ daddu(AT, T2, AT); + __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR)); //bound + + __ gsshle(FSR, SSR, AT); + } else { + index_check(T2, SSR); + __ dsll(SSR, SSR, Address::times_2); + if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_CHAR), 8)) { + __ gsshx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); + } else { + __ daddu(SSR, T2, SSR); + __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); + } + } +} + +void TemplateTable::sastore() { + castore(); +} + +void TemplateTable::istore(int n) { + transition(itos, vtos); + __ sw(FSR, iaddress(n)); +} + +void TemplateTable::lstore(int n) { + transition(ltos, vtos); + __ sd(FSR, laddress(n)); +} + +void TemplateTable::fstore(int n) { + transition(ftos, vtos); + __ swc1(FSF, faddress(n)); +} + +void TemplateTable::dstore(int n) { + transition(dtos, vtos); + __ sdc1(FSF, laddress(n)); +} + +void TemplateTable::astore(int n) { + transition(vtos, vtos); + __ pop_ptr(FSR); + __ sd(FSR, aaddress(n)); +} + +void TemplateTable::pop() { + transition(vtos, vtos); + __ daddiu(SP, SP, Interpreter::stackElementSize); +} + +void TemplateTable::pop2() { + transition(vtos, vtos); + __ daddiu(SP, SP, 2 * Interpreter::stackElementSize); +} + +void TemplateTable::dup() { + transition(vtos, vtos); + // stack: ..., a + __ load_ptr(0, FSR); + __ push_ptr(FSR); + // stack: ..., a, a +} + +// blows FSR +void TemplateTable::dup_x1() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr(0, FSR); // load b + __ load_ptr(1, A5); // load a + __ store_ptr(1, FSR); // store b + __ store_ptr(0, A5); // store a + __ push_ptr(FSR); // push b + // stack: ..., b, a, b +} + +// blows FSR +void TemplateTable::dup_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr(0, FSR); // load c + __ load_ptr(2, A5); // load a + __ store_ptr(2, FSR); // store c in a + __ push_ptr(FSR); // push c + // stack: ..., c, b, c, c + __ load_ptr(2, FSR); // load b + __ store_ptr(2, A5); // store a in b + // stack: ..., c, a, c, c + __ store_ptr(1, FSR); // store b in c + // stack: ..., c, a, b, c +} + +// blows FSR +void TemplateTable::dup2() { + transition(vtos, vtos); + // stack: ..., a, b + __ load_ptr(1, FSR); // load a + __ push_ptr(FSR); // push a + __ load_ptr(1, FSR); // load b + __ push_ptr(FSR); // push b + // stack: ..., a, b, a, b +} + +// blows FSR +void TemplateTable::dup2_x1() { + transition(vtos, vtos); + // stack: ..., a, b, c + __ load_ptr(0, T2); // load c + __ load_ptr(1, FSR); // load b + __ push_ptr(FSR); // push b + __ push_ptr(T2); // push c + // stack: ..., a, b, c, b, c + __ store_ptr(3, T2); // store c in b + // stack: ..., a, c, c, b, c + __ load_ptr(4, T2); // load a + __ store_ptr(2, T2); // store a in 2nd c + // stack: ..., a, c, a, b, c + __ store_ptr(4, FSR); // store b in a + // stack: ..., b, c, a, b, c + + // stack: ..., b, c, a, b, c +} + +// blows FSR, SSR +void TemplateTable::dup2_x2() { + transition(vtos, vtos); + // stack: ..., a, b, c, d + // stack: ..., a, b, c, d + __ load_ptr(0, T2); // load d + __ load_ptr(1, FSR); // load c + __ push_ptr(FSR); // push c + __ push_ptr(T2); // push d + // stack: ..., a, b, c, d, c, d + __ load_ptr(4, FSR); // load b + __ store_ptr(2, FSR); // store b in d + __ store_ptr(4, T2); // store d in b + // stack: ..., a, d, c, b, c, d + __ load_ptr(5, T2); // load a + __ load_ptr(3, FSR); // load c + __ store_ptr(3, T2); // store a in c + __ store_ptr(5, FSR); // store c in a + // stack: ..., c, d, a, b, c, d + + // stack: ..., c, d, a, b, c, d +} + +// blows FSR +void TemplateTable::swap() { + transition(vtos, vtos); + // stack: ..., a, b + + __ load_ptr(1, A5); // load a + __ load_ptr(0, FSR); // load b + __ store_ptr(0, A5); // store a in b + __ store_ptr(1, FSR); // store b in a + + // stack: ..., b, a +} + +void TemplateTable::iop2(Operation op) { + transition(itos, itos); + + __ pop_i(SSR); + switch (op) { + case add : __ addu32(FSR, SSR, FSR); break; + case sub : __ subu32(FSR, SSR, FSR); break; + case mul : __ mul(FSR, SSR, FSR); break; + case _and : __ andr(FSR, SSR, FSR); break; + case _or : __ orr(FSR, SSR, FSR); break; + case _xor : __ xorr(FSR, SSR, FSR); break; + case shl : __ sllv(FSR, SSR, FSR); break; + case shr : __ srav(FSR, SSR, FSR); break; + case ushr : __ srlv(FSR, SSR, FSR); break; + default : ShouldNotReachHere(); + } +} + +// the result stored in FSR, SSR, +// used registers : T2, T3 +void TemplateTable::lop2(Operation op) { + transition(ltos, ltos); + __ pop_l(T2); + + switch (op) { + case add : __ daddu(FSR, T2, FSR); break; + case sub : __ dsubu(FSR, T2, FSR); break; + case _and: __ andr(FSR, T2, FSR); break; + case _or : __ orr(FSR, T2, FSR); break; + case _xor: __ xorr(FSR, T2, FSR); break; + default : ShouldNotReachHere(); + } +} + +// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, +// the result is 0x80000000 +// the godson2 cpu do the same, so we need not handle this specially like x86 +void TemplateTable::idiv() { + transition(itos, itos); + Label not_zero; + + __ bne(FSR, R0, not_zero); + __ delayed()->nop(); + __ jmp(Interpreter::_throw_ArithmeticException_entry); + __ delayed()->nop(); + __ bind(not_zero); + + __ pop_i(SSR); + if (UseLEXT1) { + __ gsdiv(FSR, SSR, FSR); + } else { + __ div(SSR, FSR); + __ mflo(FSR); + } +} + +void TemplateTable::irem() { + transition(itos, itos); + Label not_zero; + __ pop_i(SSR); + __ div(SSR, FSR); + + __ bne(FSR, R0, not_zero); + __ delayed()->nop(); + //__ brk(7); + __ jmp(Interpreter::_throw_ArithmeticException_entry); + __ delayed()->nop(); + + __ bind(not_zero); + __ mfhi(FSR); +} + +void TemplateTable::lmul() { + transition(ltos, ltos); + __ pop_l(T2); + if (UseLEXT1) { + __ gsdmult(FSR, T2, FSR); + } else { + __ dmult(T2, FSR); + __ mflo(FSR); + } +} + +// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry +void TemplateTable::ldiv() { + transition(ltos, ltos); + Label normal; + + __ bne(FSR, R0, normal); + __ delayed()->nop(); + + //__ brk(7); //generate FPE + __ jmp(Interpreter::_throw_ArithmeticException_entry); + __ delayed()->nop(); + + __ bind(normal); + __ pop_l(A2); + if (UseLEXT1) { + __ gsddiv(FSR, A2, FSR); + } else { + __ ddiv(A2, FSR); + __ mflo(FSR); + } +} + +// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry +void TemplateTable::lrem() { + transition(ltos, ltos); + Label normal; + + __ bne(FSR, R0, normal); + __ delayed()->nop(); + + __ jmp(Interpreter::_throw_ArithmeticException_entry); + __ delayed()->nop(); + + __ bind(normal); + __ pop_l (A2); + + if (UseLEXT1) { + __ gsdmod(FSR, A2, FSR); + } else { + __ ddiv(A2, FSR); + __ mfhi(FSR); + } +} + +// result in FSR +// used registers : T0 +void TemplateTable::lshl() { + transition(itos, ltos); + __ pop_l(T0); + __ dsllv(FSR, T0, FSR); +} + +// used registers : T0 +void TemplateTable::lshr() { + transition(itos, ltos); + __ pop_l(T0); + __ dsrav(FSR, T0, FSR); +} + +// used registers : T0 +void TemplateTable::lushr() { + transition(itos, ltos); + __ pop_l(T0); + __ dsrlv(FSR, T0, FSR); +} + +// result in FSF +void TemplateTable::fop2(Operation op) { + transition(ftos, ftos); + switch (op) { + case add: + __ lwc1(FTF, at_sp()); + __ add_s(FSF, FTF, FSF); + break; + case sub: + __ lwc1(FTF, at_sp()); + __ sub_s(FSF, FTF, FSF); + break; + case mul: + __ lwc1(FTF, at_sp()); + __ mul_s(FSF, FTF, FSF); + break; + case div: + __ lwc1(FTF, at_sp()); + __ div_s(FSF, FTF, FSF); + break; + case rem: + __ mov_s(F13, FSF); + __ lwc1(F12, at_sp()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); + break; + default : ShouldNotReachHere(); + } + + __ daddiu(SP, SP, 1 * wordSize); +} + +// result in SSF||FSF +// i dont handle the strict flags +void TemplateTable::dop2(Operation op) { + transition(dtos, dtos); + switch (op) { + case add: + __ ldc1(FTF, at_sp()); + __ add_d(FSF, FTF, FSF); + break; + case sub: + __ ldc1(FTF, at_sp()); + __ sub_d(FSF, FTF, FSF); + break; + case mul: + __ ldc1(FTF, at_sp()); + __ mul_d(FSF, FTF, FSF); + break; + case div: + __ ldc1(FTF, at_sp()); + __ div_d(FSF, FTF, FSF); + break; + case rem: + __ mov_d(F13, FSF); + __ ldc1(F12, at_sp()); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); + break; + default : ShouldNotReachHere(); + } + + __ daddiu(SP, SP, 2 * wordSize); +} + +void TemplateTable::ineg() { + transition(itos, itos); + __ subu32(FSR, R0, FSR); +} + +void TemplateTable::lneg() { + transition(ltos, ltos); + __ dsubu(FSR, R0, FSR); +} + +void TemplateTable::fneg() { + transition(ftos, ftos); + __ neg_s(FSF, FSF); +} + +void TemplateTable::dneg() { + transition(dtos, dtos); + __ neg_d(FSF, FSF); +} + +// used registers : T2 +void TemplateTable::iinc() { + transition(vtos, vtos); + locals_index(T2); + __ lw(FSR, T2, 0); + __ lb(AT, at_bcp(2)); // get constant + __ daddu(FSR, FSR, AT); + __ sw(FSR, T2, 0); +} + +// used register : T2 +void TemplateTable::wide_iinc() { + transition(vtos, vtos); + locals_index_wide(T2); + __ get_2_byte_integer_at_bcp(FSR, AT, 4); + __ hswap(FSR); + __ lw(AT, T2, 0); + __ daddu(FSR, AT, FSR); + __ sw(FSR, T2, 0); +} + +void TemplateTable::convert() { + // Checking +#ifdef ASSERT + { + TosState tos_in = ilgl; + TosState tos_out = ilgl; + switch (bytecode()) { + case Bytecodes::_i2l: // fall through + case Bytecodes::_i2f: // fall through + case Bytecodes::_i2d: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_in = itos; break; + case Bytecodes::_l2i: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_l2d: tos_in = ltos; break; + case Bytecodes::_f2i: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_f2d: tos_in = ftos; break; + case Bytecodes::_d2i: // fall through + case Bytecodes::_d2l: // fall through + case Bytecodes::_d2f: tos_in = dtos; break; + default : ShouldNotReachHere(); + } + switch (bytecode()) { + case Bytecodes::_l2i: // fall through + case Bytecodes::_f2i: // fall through + case Bytecodes::_d2i: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_out = itos; break; + case Bytecodes::_i2l: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_d2l: tos_out = ltos; break; + case Bytecodes::_i2f: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_d2f: tos_out = ftos; break; + case Bytecodes::_i2d: // fall through + case Bytecodes::_l2d: // fall through + case Bytecodes::_f2d: tos_out = dtos; break; + default : ShouldNotReachHere(); + } + transition(tos_in, tos_out); + } +#endif // ASSERT + + // Conversion + switch (bytecode()) { + case Bytecodes::_i2l: + __ sll(FSR, FSR, 0); + break; + case Bytecodes::_i2f: + __ mtc1(FSR, FSF); + __ cvt_s_w(FSF, FSF); + break; + case Bytecodes::_i2d: + __ mtc1(FSR, FSF); + __ cvt_d_w(FSF, FSF); + break; + case Bytecodes::_i2b: + __ seb(FSR, FSR); + break; + case Bytecodes::_i2c: + __ andi(FSR, FSR, 0xFFFF); // truncate upper 56 bits + break; + case Bytecodes::_i2s: + __ seh(FSR, FSR); + break; + case Bytecodes::_l2i: + __ sll(FSR, FSR, 0); + break; + case Bytecodes::_l2f: + __ dmtc1(FSR, FSF); + __ cvt_s_l(FSF, FSF); + break; + case Bytecodes::_l2d: + __ dmtc1(FSR, FSF); + __ cvt_d_l(FSF, FSF); + break; + case Bytecodes::_f2i: + { + Label L; + + __ trunc_w_s(F12, FSF); + __ move(AT, 0x7fffffff); + __ mfc1(FSR, F12); + __ c_un_s(FSF, FSF); //NaN? + __ movt(FSR, R0); + + __ bne(AT, FSR, L); + __ delayed()->lui(T9, 0x8000); + + __ mfc1(AT, FSF); + __ andr(AT, AT, T9); + + __ movn(FSR, T9, AT); + + __ bind(L); + } + break; + case Bytecodes::_f2l: + { + Label L; + + __ trunc_l_s(F12, FSF); + __ daddiu(AT, R0, -1); + __ dsrl(AT, AT, 1); + __ dmfc1(FSR, F12); + __ c_un_s(FSF, FSF); //NaN? + __ movt(FSR, R0); + + __ bne(AT, FSR, L); + __ delayed()->lui(T9, 0x8000); + + __ mfc1(AT, FSF); + __ andr(AT, AT, T9); + + __ dsll32(T9, T9, 0); + __ movn(FSR, T9, AT); + + __ bind(L); + } + break; + case Bytecodes::_f2d: + __ cvt_d_s(FSF, FSF); + break; + case Bytecodes::_d2i: + { + Label L; + + __ trunc_w_d(F12, FSF); + __ move(AT, 0x7fffffff); + __ mfc1(FSR, F12); + + __ bne(FSR, AT, L); + __ delayed()->mtc1(R0, F12); + + __ cvt_d_w(F12, F12); + __ c_ult_d(FSF, F12); + __ bc1f(L); + __ delayed()->addiu(T9, R0, -1); + + __ c_un_d(FSF, FSF); //NaN? + __ subu32(FSR, T9, AT); + __ movt(FSR, R0); + + __ bind(L); + } + break; + case Bytecodes::_d2l: + { + Label L; + + __ trunc_l_d(F12, FSF); + __ daddiu(AT, R0, -1); + __ dsrl(AT, AT, 1); + __ dmfc1(FSR, F12); + + __ bne(FSR, AT, L); + __ delayed()->mtc1(R0, F12); + + __ cvt_d_w(F12, F12); + __ c_ult_d(FSF, F12); + __ bc1f(L); + __ delayed()->daddiu(T9, R0, -1); + + __ c_un_d(FSF, FSF); //NaN? + __ subu(FSR, T9, AT); + __ movt(FSR, R0); + + __ bind(L); + } + break; + case Bytecodes::_d2f: + __ cvt_s_d(FSF, FSF); + break; + default : + ShouldNotReachHere(); + } +} + +void TemplateTable::lcmp() { + transition(ltos, itos); + + Label low, high, done; + __ pop(T0); + __ pop(R0); + __ slt(AT, T0, FSR); + __ bne(AT, R0, low); + __ delayed()->nop(); + + __ bne(T0, FSR, high); + __ delayed()->nop(); + + __ li(FSR, (long)0); + __ b(done); + __ delayed()->nop(); + + __ bind(low); + __ li(FSR, (long)-1); + __ b(done); + __ delayed()->nop(); + + __ bind(high); + __ li(FSR, (long)1); + __ b(done); + __ delayed()->nop(); + + __ bind(done); +} + +void TemplateTable::float_cmp(bool is_float, int unordered_result) { + Label less, done; + + __ move(FSR, R0); + + if (is_float) { + __ lwc1(FTF, at_sp()); + __ c_eq_s(FTF, FSF); + __ bc1t(done); + __ delayed()->daddiu(SP, SP, 1 * wordSize); + + if (unordered_result<0) + __ c_ult_s(FTF, FSF); + else + __ c_olt_s(FTF, FSF); + } else { + __ ldc1(FTF, at_sp()); + __ c_eq_d(FTF, FSF); + __ bc1t(done); + __ delayed()->daddiu(SP, SP, 2 * wordSize); + + if (unordered_result<0) + __ c_ult_d(FTF, FSF); + else + __ c_olt_d(FTF, FSF); + } + __ bc1t(less); + __ delayed()->nop(); + __ move(FSR, 1); + __ b(done); + __ delayed()->nop(); + __ bind(less); + __ move(FSR, -1); + __ bind(done); +} + + +// used registers : T3, A7, Rnext +// FSR : return bci, this is defined by the vm specification +// T2 : MDO taken count +// T3 : method +// A7 : offset +// Rnext : next bytecode, this is required by dispatch_base +void TemplateTable::branch(bool is_jsr, bool is_wide) { + __ get_method(T3); + __ profile_taken_branch(A7, T2); // only C2 meaningful + + const ByteSize be_offset = MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset(); + const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset(); + + // Load up T4 with the branch displacement + if (!is_wide) { + __ lb(A7, BCP, 1); + __ lbu(AT, BCP, 2); + __ dsll(A7, A7, 8); + __ orr(A7, A7, AT); + } else { + __ get_4_byte_integer_at_bcp(A7, AT, 1); + __ swap(A7); + } + + // Handle all the JSR stuff here, then exit. + // It's much shorter and cleaner than intermingling with the non-JSR + // normal-branch stuff occuring below. + if (is_jsr) { + // Pre-load the next target bytecode into Rnext + __ daddu(AT, BCP, A7); + __ lbu(Rnext, AT, 0); + + // compute return address as bci in FSR + __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); + __ ld(AT, T3, in_bytes(Method::const_offset())); + __ dsubu(FSR, FSR, AT); + // Adjust the bcp in BCP by the displacement in A7 + __ daddu(BCP, BCP, A7); + // jsr returns atos that is not an oop + // Push return address + __ push_i(FSR); + // jsr returns vtos + __ dispatch_only_noverify(vtos); + + return; + } + + // Normal (non-jsr) branch handling + + // Adjust the bcp in S0 by the displacement in T4 + __ daddu(BCP, BCP, A7); + + assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; + Label profile_method; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches + // T3: method + // T4: target offset + // BCP: target bcp + // LVP: locals pointer + __ bgtz(A7, dispatch); // check if forward or backward branch + __ delayed()->nop(); + + // check if MethodCounters exists + Label has_counters; + __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP + __ bne(AT, R0, has_counters); + __ delayed()->nop(); + __ push(T3); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), + T3); + __ pop(T3); + __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP + __ beq(AT, R0, dispatch); + __ delayed()->nop(); + __ bind(has_counters); + + if (TieredCompilation) { + Label no_mdo; + int increment = InvocationCounter::count_increment; + int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; + if (ProfileInterpreter) { + // Are we profiling? + __ ld(T0, Address(T3, in_bytes(Method::method_data_offset()))); + __ beq(T0, R0, no_mdo); + __ delayed()->nop(); + // Increment the MDO backedge counter + const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, + T1, false, Assembler::zero, &backedge_counter_overflow); + __ beq(R0, R0, dispatch); + __ delayed()->nop(); + } + __ bind(no_mdo); + // Increment backedge counter in MethodCounters* + __ ld(T0, Address(T3, Method::method_counters_offset())); + __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, + T1, false, Assembler::zero, &backedge_counter_overflow); + if (!UseOnStackReplacement) { + __ bind(backedge_counter_overflow); + } + } else { + // increment back edge counter + __ ld(T1, T3, in_bytes(Method::method_counters_offset())); + __ lw(T0, T1, in_bytes(be_offset)); + __ increment(T0, InvocationCounter::count_increment); + __ sw(T0, T1, in_bytes(be_offset)); + + // load invocation counter + __ lw(T1, T1, in_bytes(inv_offset)); + // buffer bit added, mask no needed + + // daddu backedge counter & invocation counter + __ daddu(T1, T1, T0); + + if (ProfileInterpreter) { + // Test to see if we should create a method data oop + // T1 : backedge counter & invocation counter + if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { + __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); + __ lw(AT, AT, 0); + __ slt(AT, T1, AT); + } + + __ bne(AT, R0, dispatch); + __ delayed()->nop(); + + // if no method data exists, go to profile method + __ test_method_data_pointer(T1, profile_method); + + if (UseOnStackReplacement) { + if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) { + __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); + } else { + __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); + __ lw(AT, AT, 0); + __ slt(AT, T2, AT); + } + + __ bne(AT, R0, dispatch); + __ delayed()->nop(); + + // When ProfileInterpreter is on, the backedge_count comes + // from the methodDataOop, which value does not get reset on + // the call to frequency_counter_overflow(). + // To avoid excessive calls to the overflow routine while + // the method is being compiled, daddu a second test to make + // sure the overflow function is called only once every + // overflow_frequency. + const int overflow_frequency = 1024; + __ andi(AT, T2, overflow_frequency-1); + __ beq(AT, R0, backedge_counter_overflow); + __ delayed()->nop(); + } + } else { + if (UseOnStackReplacement) { + // check for overflow against AT, which is the sum of the counters + __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); + __ lw(AT, AT, 0); + __ slt(AT, T1, AT); + __ beq(AT, R0, backedge_counter_overflow); + __ delayed()->nop(); + } + } + } + __ bind(dispatch); + } + + // Pre-load the next target bytecode into Rnext + __ lbu(Rnext, BCP, 0); + + // continue with the bytecode @ target + // FSR: return bci for jsr's, unused otherwise + // Rnext: target bytecode + // BCP: target bcp + __ dispatch_only(vtos); + + if (UseLoopCounter) { + if (ProfileInterpreter) { + // Out-of-line code to allocate method data oop. + __ bind(profile_method); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ lbu(Rnext, BCP, 0); + __ set_method_data_pointer_for_bcp(); + __ b(dispatch); + __ delayed()->nop(); + } + + if (UseOnStackReplacement) { + // invocation counter overflow + __ bind(backedge_counter_overflow); + __ subu(A7, BCP, A7); // branch bcp + call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), A7); + __ lbu(Rnext, BCP, 0); + + // V0: osr nmethod (osr ok) or NULL (osr not possible) + // V1: osr adapter frame return address + // Rnext: target bytecode + // LVP: locals pointer + // BCP: bcp + __ beq(V0, R0, dispatch); + __ delayed()->nop(); + // nmethod may have been invalidated (VM may block upon call_VM return) + __ lw(T3, V0, nmethod::entry_bci_offset()); + __ move(AT, InvalidOSREntryBci); + __ beq(AT, T3, dispatch); + __ delayed()->nop(); + // We need to prepare to execute the OSR method. First we must + // migrate the locals and monitors off of the stack. + //V0: osr nmethod (osr ok) or NULL (osr not possible) + //V1: osr adapter frame return address + //Rnext: target bytecode + //LVP: locals pointer + //BCP: bcp + __ move(BCP, V0); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + + // V0 is OSR buffer, move it to expected parameter location + // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp + __ move(T0, V0); + + // pop the interpreter frame + __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); + //FIXME, shall we keep the return address on the stack? + __ leave(); // remove frame anchor + __ move(LVP, RA); + __ move(SP, A7); + + __ move(AT, -(StackAlignmentInBytes)); + __ andr(SP , SP , AT); + + // push the (possibly adjusted) return address + //refer to osr_entry in c1_LIRAssembler_mips.cpp + __ ld(AT, BCP, nmethod::osr_entry_point_offset()); + __ jr(AT); + __ delayed()->nop(); + } + } +} + + +void TemplateTable::if_0cmp(Condition cc) { + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + switch(cc) { + case not_equal: + __ beq(FSR, R0, not_taken); + break; + case equal: + __ bne(FSR, R0, not_taken); + break; + case less: + __ bgez(FSR, not_taken); + break; + case less_equal: + __ bgtz(FSR, not_taken); + break; + case greater: + __ blez(FSR, not_taken); + break; + case greater_equal: + __ bltz(FSR, not_taken); + break; + } + __ delayed()->nop(); + + branch(false, false); + + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_icmp(Condition cc) { + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + + __ pop_i(SSR); + switch(cc) { + case not_equal: + __ beq(SSR, FSR, not_taken); + break; + case equal: + __ bne(SSR, FSR, not_taken); + break; + case less: + __ slt(AT, SSR, FSR); + __ beq(AT, R0, not_taken); + break; + case less_equal: + __ slt(AT, FSR, SSR); + __ bne(AT, R0, not_taken); + break; + case greater: + __ slt(AT, FSR, SSR); + __ beq(AT, R0, not_taken); + break; + case greater_equal: + __ slt(AT, SSR, FSR); + __ bne(AT, R0, not_taken); + break; + } + __ delayed()->nop(); + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +void TemplateTable::if_nullcmp(Condition cc) { + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + switch(cc) { + case not_equal: + __ beq(FSR, R0, not_taken); + break; + case equal: + __ bne(FSR, R0, not_taken); + break; + default: + ShouldNotReachHere(); + } + __ delayed()->nop(); + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + + +void TemplateTable::if_acmp(Condition cc) { + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + // __ lw(SSR, SP, 0); + __ pop_ptr(SSR); + switch(cc) { + case not_equal: + __ beq(SSR, FSR, not_taken); + break; + case equal: + __ bne(SSR, FSR, not_taken); + break; + default: + ShouldNotReachHere(); + } + __ delayed()->nop(); + + branch(false, false); + + __ bind(not_taken); + __ profile_not_taken_branch(FSR); +} + +// used registers : T1, T2, T3 +// T1 : method +// T2 : returb bci +void TemplateTable::ret() { + transition(vtos, vtos); + + locals_index(T2); + __ ld(T2, T2, 0); + __ profile_ret(T2, T3); + + __ get_method(T1); + __ ld(BCP, T1, in_bytes(Method::const_offset())); + __ daddu(BCP, BCP, T2); + __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); + + __ dispatch_next(vtos); +} + +// used registers : T1, T2, T3 +// T1 : method +// T2 : returb bci +void TemplateTable::wide_ret() { + transition(vtos, vtos); + + locals_index_wide(T2); + __ ld(T2, T2, 0); // get return bci, compute return bcp + __ profile_ret(T2, T3); + + __ get_method(T1); + __ ld(BCP, T1, in_bytes(Method::const_offset())); + __ daddu(BCP, BCP, T2); + __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); + + __ dispatch_next(vtos); +} + +// used register T2, T3, A7, Rnext +// T2 : bytecode pointer +// T3 : low +// A7 : high +// Rnext : dest bytecode, required by dispatch_base +void TemplateTable::tableswitch() { + Label default_case, continue_execution; + transition(itos, vtos); + + // align BCP + __ daddiu(T2, BCP, BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(T2, T2, AT); + + // load lo & hi + __ lw(T3, T2, 1 * BytesPerInt); + __ swap(T3); + __ lw(A7, T2, 2 * BytesPerInt); + __ swap(A7); + + // check against lo & hi + __ slt(AT, FSR, T3); + __ bne(AT, R0, default_case); + __ delayed()->nop(); + + __ slt(AT, A7, FSR); + __ bne(AT, R0, default_case); + __ delayed()->nop(); + + // lookup dispatch offset, in A7 big endian + __ dsubu(FSR, FSR, T3); + __ dsll(AT, FSR, Address::times_4); + __ daddu(AT, T2, AT); + __ lw(A7, AT, 3 * BytesPerInt); + __ profile_switch_case(FSR, T9, T3); + + __ bind(continue_execution); + __ swap(A7); + __ daddu(BCP, BCP, A7); + __ lbu(Rnext, BCP, 0); + __ dispatch_only(vtos); + + // handle default + __ bind(default_case); + __ profile_switch_default(FSR); + __ lw(A7, T2, 0); + __ b(continue_execution); + __ delayed()->nop(); +} + +void TemplateTable::lookupswitch() { + transition(itos, itos); + __ stop("lookupswitch bytecode should have been rewritten"); +} + +// used registers : T2, T3, A7, Rnext +// T2 : bytecode pointer +// T3 : pair index +// A7 : offset +// Rnext : dest bytecode +// the data after the opcode is the same as lookupswitch +// see Rewriter::rewrite_method for more information +void TemplateTable::fast_linearswitch() { + transition(itos, vtos); + Label loop_entry, loop, found, continue_execution; + + // swap FSR so we can avoid swapping the table entries + __ swap(FSR); + + // align BCP + __ daddiu(T2, BCP, BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(T2, T2, AT); + + // set counter + __ lw(T3, T2, BytesPerInt); + __ swap(T3); + __ b(loop_entry); + __ delayed()->nop(); + + // table search + __ bind(loop); + // get the entry value + __ dsll(AT, T3, Address::times_8); + __ daddu(AT, T2, AT); + __ lw(AT, AT, 2 * BytesPerInt); + + // found? + __ beq(FSR, AT, found); + __ delayed()->nop(); + + __ bind(loop_entry); + __ bgtz(T3, loop); + __ delayed()->daddiu(T3, T3, -1); + + // default case + __ profile_switch_default(FSR); + __ lw(A7, T2, 0); + __ b(continue_execution); + __ delayed()->nop(); + + // entry found -> get offset + __ bind(found); + __ dsll(AT, T3, Address::times_8); + __ daddu(AT, T2, AT); + __ lw(A7, AT, 3 * BytesPerInt); + __ profile_switch_case(T3, FSR, T2); + + // continue execution + __ bind(continue_execution); + __ swap(A7); + __ daddu(BCP, BCP, A7); + __ lbu(Rnext, BCP, 0); + __ dispatch_only(vtos); +} + +// used registers : T0, T1, T2, T3, A7, Rnext +// T2 : pairs address(array) +// Rnext : dest bytecode +// the data after the opcode is the same as lookupswitch +// see Rewriter::rewrite_method for more information +void TemplateTable::fast_binaryswitch() { + transition(itos, vtos); + // Implementation using the following core algorithm: + // + // int binary_search(int key, LookupswitchPair* array, int n) { + // // Binary search according to "Methodik des Programmierens" by + // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. + // int i = 0; + // int j = n; + // while (i+1 < j) { + // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) + // // with Q: for all i: 0 <= i < n: key < a[i] + // // where a stands for the array and assuming that the (inexisting) + // // element a[n] is infinitely big. + // int h = (i + j) >> 1; + // // i < h < j + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // } + // // R: a[i] <= key < a[i+1] or Q + // // (i.e., if key is within array, i is the correct index) + // return i; + // } + + // register allocation + const Register array = T2; + const Register i = T3, j = A7; + const Register h = T1; + const Register temp = T0; + const Register key = FSR; + + // setup array + __ daddiu(array, BCP, 3*BytesPerInt); + __ li(AT, -BytesPerInt); + __ andr(array, array, AT); + + // initialize i & j + __ move(i, R0); + __ lw(j, array, - 1 * BytesPerInt); + // Convert j into native byteordering + __ swap(j); + + // and start + Label entry; + __ b(entry); + __ delayed()->nop(); + + // binary search loop + { + Label loop; + __ bind(loop); + // int h = (i + j) >> 1; + __ daddu(h, i, j); + __ dsrl(h, h, 1); + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // Convert array[h].match to native byte-ordering before compare + __ dsll(AT, h, Address::times_8); + __ daddu(AT, array, AT); + __ lw(temp, AT, 0 * BytesPerInt); + __ swap(temp); + + { + Label set_i, end_of_if; + __ slt(AT, key, temp); + __ beq(AT, R0, set_i); + __ delayed()->nop(); + + __ b(end_of_if); + __ delayed(); __ move(j, h); + + __ bind(set_i); + __ move(i, h); + + __ bind(end_of_if); + } + // while (i+1 < j) + __ bind(entry); + __ daddiu(h, i, 1); + __ slt(AT, h, j); + __ bne(AT, R0, loop); + __ delayed()->nop(); + } + + // end of binary search, result index is i (must check again!) + Label default_case; + // Convert array[i].match to native byte-ordering before compare + __ dsll(AT, i, Address::times_8); + __ daddu(AT, array, AT); + __ lw(temp, AT, 0 * BytesPerInt); + __ swap(temp); + __ bne(key, temp, default_case); + __ delayed()->nop(); + + // entry found -> j = offset + __ dsll(AT, i, Address::times_8); + __ daddu(AT, array, AT); + __ lw(j, AT, 1 * BytesPerInt); + __ profile_switch_case(i, key, array); + __ swap(j); + + __ daddu(BCP, BCP, j); + __ lbu(Rnext, BCP, 0); + __ dispatch_only(vtos); + + // default case -> j = default offset + __ bind(default_case); + __ profile_switch_default(i); + __ lw(j, array, - 2 * BytesPerInt); + __ swap(j); + __ daddu(BCP, BCP, j); + __ lbu(Rnext, BCP, 0); + __ dispatch_only(vtos); +} + +void TemplateTable::_return(TosState state) { + transition(state, state); + assert(_desc->calls_vm(), + "inconsistent calls_vm information"); // call in remove_activation + + if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { + assert(state == vtos, "only valid state"); + __ ld(T1, aaddress(0)); + __ load_klass(LVP, T1); + __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset())); + __ move(AT, JVM_ACC_HAS_FINALIZER); + __ andr(AT, AT, LVP); + Label skip_register_finalizer; + __ beq(AT, R0, skip_register_finalizer); + __ delayed()->nop(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::register_finalizer), T1); + __ bind(skip_register_finalizer); + } + + // Narrow result if state is itos but result type is smaller. + // Need to narrow in the return bytecode rather than in generate_return_entry + // since compiled code callers expect the result to already be narrowed. + if (state == itos) { + __ narrow(FSR); + } + + __ remove_activation(state, T9); + __ sync(); + + __ jr(T9); + __ delayed()->nop(); +} + +// ---------------------------------------------------------------------------- +// Volatile variables demand their effects be made known to all CPU's +// in order. Store buffers on most chips allow reads & writes to +// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode +// without some kind of memory barrier (i.e., it's not sufficient that +// the interpreter does not reorder volatile references, the hardware +// also must not reorder them). +// +// According to the new Java Memory Model (JMM): +// (1) All volatiles are serialized wrt to each other. ALSO reads & +// writes act as aquire & release, so: +// (2) A read cannot let unrelated NON-volatile memory refs that +// happen after the read float up to before the read. It's OK for +// non-volatile memory refs that happen before the volatile read to +// float down below it. +// (3) Similar a volatile write cannot let unrelated NON-volatile +// memory refs that happen BEFORE the write float down to after the +// write. It's OK for non-volatile memory refs that happen after the +// volatile write to float up before it. +// +// We only put in barriers around volatile refs (they are expensive), +// not _between_ memory refs (that would require us to track the +// flavor of the previous memory refs). Requirements (2) and (3) +// require some barriers before volatile stores and after volatile +// loads. These nearly cover requirement (1) but miss the +// volatile-store-volatile-load case. This final case is placed after +// volatile-stores although it could just as well go before +// volatile-loads. +void TemplateTable::volatile_barrier() { + if(os::is_MP()) __ sync(); +} + +// we dont shift left 2 bits in get_cache_and_index_at_bcp +// for we always need shift the index we use it. the ConstantPoolCacheEntry +// is 16-byte long, index is the index in +// ConstantPoolCache, so cache + base_offset() + index * 16 is +// the corresponding ConstantPoolCacheEntry +// used registers : T2 +// NOTE : the returned index need also shift left 4 to get the address! +void TemplateTable::resolve_cache_and_index(int byte_no, + Register Rcache, + Register index, + size_t index_size) { + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + const Register temp = A1; + assert_different_registers(Rcache, index); + + Label resolved; + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); + // is resolved? + int i = (int)bytecode(); + __ addiu(temp, temp, -i); + __ beq(temp, R0, resolved); + __ delayed()->nop(); + // resolve first time through + address entry; + switch (bytecode()) { + case Bytecodes::_getstatic : // fall through + case Bytecodes::_putstatic : // fall through + case Bytecodes::_getfield : // fall through + case Bytecodes::_putfield : + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); + break; + case Bytecodes::_invokevirtual : // fall through + case Bytecodes::_invokespecial : // fall through + case Bytecodes::_invokestatic : // fall through + case Bytecodes::_invokeinterface: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); + break; + case Bytecodes::_invokehandle: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); + break; + case Bytecodes::_invokedynamic: + entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); + break; + default : + fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode()))); + break; + } + + __ move(temp, i); + __ call_VM(NOREG, entry, temp); + + // Update registers with resolved info + __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); + __ bind(resolved); +} + +// The Rcache and index registers must be set before call +void TemplateTable::load_field_cp_cache_entry(Register obj, + Register cache, + Register index, + Register off, + Register flags, + bool is_static = false) { + assert_different_registers(cache, index, flags, off); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + // Field offset + __ dsll(AT, index, Address::times_ptr); + __ daddu(AT, cache, AT); + __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); + // Flags + __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); + + // klass overwrite register + if (is_static) { + __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ld(obj, Address(obj, mirror_offset)); + + __ verify_oop(obj); + } +} + +// get the method, itable_index and flags of the current invoke +void TemplateTable::load_invoke_cp_cache_entry(int byte_no, + Register method, + Register itable_index, + Register flags, + bool is_invokevirtual, + bool is_invokevfinal, /*unused*/ + bool is_invokedynamic) { + // setup registers + const Register cache = T3; + const Register index = T1; + assert_different_registers(method, flags); + assert_different_registers(method, cache, index); + assert_different_registers(itable_index, flags); + assert_different_registers(itable_index, cache, index); + assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); + // determine constant pool cache field offsets + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + ((byte_no == f2_byte) + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset())); + const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()); + // access constant pool cache fields + const int index_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()); + + size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); + resolve_cache_and_index(byte_no, cache, index, index_size); + + //assert(wordSize == 8, "adjust code below"); + // note we shift 4 not 2, for we get is the true inde + // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version + __ dsll(AT, index, Address::times_ptr); + __ daddu(AT, cache, AT); + __ ld(method, AT, method_offset); + + if (itable_index != NOREG) { + __ ld(itable_index, AT, index_offset); + } + __ ld(flags, AT, flags_offset); +} + +// The registers cache and index expected to be set before call. +// Correct values of the cache and index registers are preserved. +void TemplateTable::jvmti_post_field_access(Register cache, Register index, + bool is_static, bool has_tos) { + // do the JVMTI work here to avoid disturbing the register state below + // We use c_rarg registers here because we want to use the register used in + // the call to the VM + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + // kill FSR + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + assert_different_registers(cache, index, AT); + __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); + __ lw(AT, AT, 0); + __ beq(AT, R0, L1); + __ delayed()->nop(); + + __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); + + // cache entry pointer + __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); + __ shl(tmp3, LogBytesPerWord); + __ daddu(tmp2, tmp2, tmp3); + if (is_static) { + __ move(tmp1, R0); + } else { + __ ld(tmp1, SP, 0); + __ verify_oop(tmp1); + } + // tmp1: object pointer or NULL + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), + tmp1, tmp2, tmp3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::pop_and_check_object(Register r) { + __ pop_ptr(r); + __ null_check(r); // for field access must check obj. + __ verify_oop(r); +} + +// used registers : T1, T2, T3, T1 +// T1 : flags +// T2 : off +// T3 : obj +// T1 : field address +// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the +// following mapping to the TosState states: +// btos: 0 +// ctos: 1 +// stos: 2 +// itos: 3 +// ltos: 4 +// ftos: 5 +// dtos: 6 +// atos: 7 +// vtos: 8 +// see ConstantPoolCacheEntry::set_field for more info +void TemplateTable::getfield_or_static(int byte_no, bool is_static) { + transition(vtos, vtos); + + const Register cache = T3; + const Register index = T0; + + const Register obj = T3; + const Register off = T2; + const Register flags = T1; + + const Register scratch = T8; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_access(cache, index, is_static, false); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + { + __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, flags); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } + + if (!is_static) pop_and_check_object(obj); + __ daddu(index, obj, off); + + + Label Done, notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble; + + assert(btos == 0, "change code, btos != 0"); + __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); + __ bne(flags, R0, notByte); + __ delayed()->nop(); + + // btos + __ lb(FSR, index, 0); + __ push(btos); + + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + + __ bind(notByte); + __ move(AT, ztos); + __ bne(flags, AT, notBool); + __ delayed()->nop(); + + // ztos + __ lb(FSR, index, 0); + __ push(ztos); + + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + + __ bind(notBool); + __ move(AT, itos); + __ bne(flags, AT, notInt); + __ delayed()->nop(); + + // itos + __ lw(FSR, index, 0); + __ push(itos); + + // Rewrite bytecode to be faster + if (!is_static) { + patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + __ bind(notInt); + __ move(AT, atos); + __ bne(flags, AT, notObj); + __ delayed()->nop(); + + // atos + //add for compressedoops + __ load_heap_oop(FSR, Address(index, 0)); + __ push(atos); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + __ bind(notObj); + __ move(AT, ctos); + __ bne(flags, AT, notChar); + __ delayed()->nop(); + + // ctos + __ lhu(FSR, index, 0); + __ push(ctos); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + __ bind(notChar); + __ move(AT, stos); + __ bne(flags, AT, notShort); + __ delayed()->nop(); + + // stos + __ lh(FSR, index, 0); + __ push(stos); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + __ bind(notShort); + __ move(AT, ltos); + __ bne(flags, AT, notLong); + __ delayed()->nop(); + + // FIXME : the load/store should be atomic, we have no simple method to do this in mips32 + // ltos + __ ld(FSR, index, 0 * wordSize); + __ push(ltos); + + // Don't rewrite to _fast_lgetfield for potential volatile case. + __ b(Done); + __ delayed()->nop(); + + __ bind(notLong); + __ move(AT, ftos); + __ bne(flags, AT, notFloat); + __ delayed()->nop(); + + // ftos + __ lwc1(FSF, index, 0); + __ push(ftos); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); + } + __ b(Done); + __ delayed()->nop(); + + __ bind(notFloat); + __ move(AT, dtos); +#ifdef ASSERT + __ bne(flags, AT, notDouble); + __ delayed()->nop(); +#endif + + // dtos + __ ldc1(FSF, index, 0 * wordSize); + __ push(dtos); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); + } + + +#ifdef ASSERT + __ b(Done); + __ delayed()->nop(); + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } +} + + +void TemplateTable::getfield(int byte_no) { + getfield_or_static(byte_no, false); +} + +void TemplateTable::getstatic(int byte_no) { + getfield_or_static(byte_no, true); +} + +// The registers cache and index expected to be set before call. +// The function may destroy various registers, just not the cache and index registers. +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { + transition(vtos, vtos); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L1; + //kill AT, T1, T2, T3, T9 + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + Register tmp4 = T9; + assert_different_registers(cache, index, tmp4); + + __ li(AT, JvmtiExport::get_field_modification_count_addr()); + __ lw(AT, AT, 0); + __ beq(AT, R0, L1); + __ delayed()->nop(); + + __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); + + if (is_static) { + __ move(tmp1, R0); + } else { + // Life is harder. The stack holds the value on top, followed by + // the object. We don't know the size of the value, though; it + // could be one or two words depending on its type. As a result, + // we must find the type to determine where the object is. + Label two_word, valsize_known; + __ dsll(AT, tmp4, Address::times_8); + __ daddu(AT, tmp2, AT); + __ ld(tmp3, AT, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset())); + __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); + + ConstantPoolCacheEntry::verify_tos_state_shift(); + __ move(tmp1, SP); + __ move(AT, ltos); + __ beq(tmp3, AT, two_word); + __ delayed()->nop(); + __ move(AT, dtos); + __ beq(tmp3, AT, two_word); + __ delayed()->nop(); + __ b(valsize_known); + __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); + + __ bind(two_word); + __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); + + __ bind(valsize_known); + // setup object pointer + __ ld(tmp1, tmp1, 0*wordSize); + } + // cache entry pointer + __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset)); + __ shl(tmp4, LogBytesPerWord); + __ daddu(tmp2, tmp2, tmp4); + // object (tos) + __ move(tmp3, SP); + // tmp1: object pointer set up above (NULL if static) + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + tmp1, tmp2, tmp3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +// used registers : T0, T1, T2, T3, T8 +// T1 : flags +// T2 : off +// T3 : obj +// T8 : volatile bit +// see ConstantPoolCacheEntry::set_field for more info +void TemplateTable::putfield_or_static(int byte_no, bool is_static) { + transition(vtos, vtos); + + const Register cache = T3; + const Register index = T0; + const Register obj = T3; + const Register off = T2; + const Register flags = T1; + const Register bc = T3; + + const Register scratch = T8; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_mod(cache, index, is_static); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + Label Done; + { + __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, flags); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } + + + Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; + + assert(btos == 0, "change code, btos != 0"); + + // btos + __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); + __ bne(flags, R0, notByte); + __ delayed()->nop(); + + __ pop(btos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(AT, obj, off); + __ sb(FSR, AT, 0); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // ztos + __ bind(notByte); + __ move(AT, ztos); + __ bne(flags, AT, notBool); + __ delayed()->nop(); + + __ pop(ztos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(AT, obj, off); + __ andi(FSR, FSR, 0x1); + __ sb(FSR, AT, 0); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // itos + __ bind(notBool); + __ move(AT, itos); + __ bne(flags, AT, notInt); + __ delayed()->nop(); + + __ pop(itos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(AT, obj, off); + __ sw(FSR, AT, 0); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // atos + __ bind(notInt); + __ move(AT, atos); + __ bne(flags, AT, notObj); + __ delayed()->nop(); + + __ pop(atos); + if (!is_static) { + pop_and_check_object(obj); + } + + do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR, _bs->kind(), false); + + if (!is_static) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // ctos + __ bind(notObj); + __ move(AT, ctos); + __ bne(flags, AT, notChar); + __ delayed()->nop(); + + __ pop(ctos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(AT, obj, off); + __ sh(FSR, AT, 0); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // stos + __ bind(notChar); + __ move(AT, stos); + __ bne(flags, AT, notShort); + __ delayed()->nop(); + + __ pop(stos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(AT, obj, off); + __ sh(FSR, AT, 0); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // ltos + __ bind(notShort); + __ move(AT, ltos); + __ bne(flags, AT, notLong); + __ delayed()->nop(); + + __ pop(ltos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(AT, obj, off); + __ sd(FSR, AT, 0); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + // ftos + __ bind(notLong); + __ move(AT, ftos); + __ bne(flags, AT, notFloat); + __ delayed()->nop(); + + __ pop(ftos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(AT, obj, off); + __ swc1(FSF, AT, 0); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); + } + __ b(Done); + __ delayed()->nop(); + + + // dtos + __ bind(notFloat); + __ move(AT, dtos); +#ifdef ASSERT + __ bne(flags, AT, notDouble); + __ delayed()->nop(); +#endif + + __ pop(dtos); + if (!is_static) { + pop_and_check_object(obj); + } + __ daddu(AT, obj, off); + __ sdc1(FSF, AT, 0); + if (!is_static) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); + } + +#ifdef ASSERT + __ b(Done); + __ delayed()->nop(); + + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } +} + +void TemplateTable::putfield(int byte_no) { + putfield_or_static(byte_no, false); +} + +void TemplateTable::putstatic(int byte_no) { + putfield_or_static(byte_no, true); +} + +// used registers : T1, T2, T3 +// T1 : cp_entry +// T2 : obj +// T3 : value pointer +void TemplateTable::jvmti_post_fast_field_mod() { + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L2; + //kill AT, T1, T2, T3, T9 + Register tmp1 = T2; + Register tmp2 = T1; + Register tmp3 = T3; + Register tmp4 = T9; + __ li(AT, JvmtiExport::get_field_modification_count_addr()); + __ lw(tmp3, AT, 0); + __ beq(tmp3, R0, L2); + __ delayed()->nop(); + __ pop_ptr(tmp1); + __ verify_oop(tmp1); + __ push_ptr(tmp1); + switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ push_i(FSR); break; + case Bytecodes::_fast_dputfield: __ push_d(FSF); break; + case Bytecodes::_fast_fputfield: __ push_f(); break; + case Bytecodes::_fast_lputfield: __ push_l(FSR); break; + default: ShouldNotReachHere(); + } + __ move(tmp3, SP); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); + __ verify_oop(tmp1); + // tmp1: object pointer copied above + // tmp2: cache entry pointer + // tmp3: jvalue object on the stack + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + tmp1, tmp2, tmp3); + + switch (bytecode()) { // restore tos values + case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; + case Bytecodes::_fast_dputfield: __ pop_d(); break; + case Bytecodes::_fast_fputfield: __ pop_f(); break; + case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; + } + __ bind(L2); + } +} + +// used registers : T2, T3, T1 +// T2 : index & off & field address +// T3 : cache & obj +// T1 : flags +void TemplateTable::fast_storefield(TosState state) { + transition(state, vtos); + + const Register scratch = T8; + + ByteSize base = ConstantPoolCache::base_offset(); + + jvmti_post_fast_field_mod(); + + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ sync(); + + // test for volatile with T1 + __ dsll(AT, T2, Address::times_8); + __ daddu(AT, T3, AT); + __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); + + // replace index with field offset from cache entry + __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); + + Label Done; + { + __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, T1); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } + + // Get object from stack + pop_and_check_object(T3); + + if (bytecode() != Bytecodes::_fast_aputfield) { + // field address + __ daddu(T2, T3, T2); + } + + // access field + switch (bytecode()) { + case Bytecodes::_fast_zputfield: + __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 + // fall through to bputfield + case Bytecodes::_fast_bputfield: + __ sb(FSR, T2, 0); + break; + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: + __ sh(FSR, T2, 0); + break; + case Bytecodes::_fast_iputfield: + __ sw(FSR, T2, 0); + break; + case Bytecodes::_fast_lputfield: + __ sd(FSR, T2, 0 * wordSize); + break; + case Bytecodes::_fast_fputfield: + __ swc1(FSF, T2, 0); + break; + case Bytecodes::_fast_dputfield: + __ sdc1(FSF, T2, 0 * wordSize); + break; + case Bytecodes::_fast_aputfield: + do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR, _bs->kind(), false); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } +} + +// used registers : T2, T3, T1 +// T3 : cp_entry & cache +// T2 : index & offset +void TemplateTable::fast_accessfield(TosState state) { + transition(atos, state); + + const Register scratch = T8; + + // do the JVMTI work here to avoid disturbing the register state below + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we take + // the time to call into the VM. + Label L1; + __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); + __ lw(T3, AT, 0); + __ beq(T3, R0, L1); + __ delayed()->nop(); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(T3, T1, 1); + __ move(TSR, FSR); + __ verify_oop(FSR); + // FSR: object pointer copied above + // T3: cache entry pointer + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), + FSR, T3); + __ move(FSR, TSR); + __ bind(L1); + } + + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 1); + + // Must prevent reordering of the following cp cache loads with bytecode load + __ sync(); + + // replace index with field offset from cache entry + __ dsll(AT, T2, Address::times_8); + __ daddu(AT, T3, AT); + __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + + { + __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, AT); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } + + // FSR: object + __ verify_oop(FSR); + __ null_check(FSR); + // field addresses + __ daddu(FSR, FSR, T2); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_bgetfield: + __ lb(FSR, FSR, 0); + break; + case Bytecodes::_fast_sgetfield: + __ lh(FSR, FSR, 0); + break; + case Bytecodes::_fast_cgetfield: + __ lhu(FSR, FSR, 0); + break; + case Bytecodes::_fast_igetfield: + __ lw(FSR, FSR, 0); + break; + case Bytecodes::_fast_lgetfield: + __ stop("should not be rewritten"); + break; + case Bytecodes::_fast_fgetfield: + __ lwc1(FSF, FSR, 0); + break; + case Bytecodes::_fast_dgetfield: + __ ldc1(FSF, FSR, 0); + break; + case Bytecodes::_fast_agetfield: + //add for compressedoops + __ load_heap_oop(FSR, Address(FSR, 0)); + __ verify_oop(FSR); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } +} + +// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 +// used registers : T1, T2, T3, T1 +// T1 : obj & field address +// T2 : off +// T3 : cache +// T1 : index +void TemplateTable::fast_xaccess(TosState state) { + transition(vtos, state); + + const Register scratch = T8; + + // get receiver + __ ld(T1, aaddress(0)); + // access constant pool cache + __ get_cache_and_index_at_bcp(T3, T2, 2); + __ dsll(AT, T2, Address::times_8); + __ daddu(AT, T3, AT); + __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); + + { + __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); + __ andr(scratch, scratch, AT); + + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } + + // make sure exception is reported in correct bcp range (getfield is + // next instruction) + __ daddiu(BCP, BCP, 1); + __ null_check(T1); + __ daddu(T1, T1, T2); + + if (state == itos) { + __ lw(FSR, T1, 0); + } else if (state == atos) { + __ load_heap_oop(FSR, Address(T1, 0)); + __ verify_oop(FSR); + } else if (state == ftos) { + __ lwc1(FSF, T1, 0); + } else { + ShouldNotReachHere(); + } + __ daddiu(BCP, BCP, -1); + + { + Label notVolatile; + __ beq(scratch, R0, notVolatile); + __ delayed()->nop(); + volatile_barrier(); + __ bind(notVolatile); + } +} + + + +//----------------------------------------------------------------------------- +// Calls + +void TemplateTable::count_calls(Register method, Register temp) { + // implemented elsewhere + ShouldNotReachHere(); +} + +// method, index, recv, flags: T1, T2, T3, T1 +// byte_no = 2 for _invokevirtual, 1 else +// T0 : return address +// get the method & index of the invoke, and push the return address of +// the invoke(first word in the frame) +// this address is where the return code jmp to. +// NOTE : this method will set T3&T1 as recv&flags +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. + Register recv, // if caller wants to see it + Register flags // if caller wants to test it + ) { + // determine flags + const Bytecodes::Code code = bytecode(); + const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; + const bool is_invokehandle = code == Bytecodes::_invokehandle; + const bool is_invokevirtual = code == Bytecodes::_invokevirtual; + const bool is_invokespecial = code == Bytecodes::_invokespecial; + const bool load_receiver = (recv != noreg); + const bool save_flags = (flags != noreg); + assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); + assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); + assert(flags == noreg || flags == T1, "error flags reg."); + assert(recv == noreg || recv == T3, "error recv reg."); + + // setup registers & access constant pool cache + if(recv == noreg) recv = T3; + if(flags == noreg) flags = T1; + assert_different_registers(method, index, recv, flags); + + // save 'interpreter return address' + __ save_bcp(); + + load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + + if (is_invokedynamic || is_invokehandle) { + Label L_no_push; + __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); + __ andr(AT, AT, flags); + __ beq(AT, R0, L_no_push); + __ delayed()->nop(); + // Push the appendix as a trailing parameter. + // This must be done before we get the receiver, + // since the parameter_size includes it. + Register tmp = SSR; + __ push(tmp); + __ move(tmp, index); + assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); + __ load_resolved_reference_at_index(index, tmp); + __ pop(tmp); + __ push(index); // push appendix (MethodType, CallSite, etc.) + __ bind(L_no_push); + } + + // load receiver if needed (after appendix is pushed so parameter size is correct) + // Note: no return address pushed yet + if (load_receiver) { + __ move(AT, ConstantPoolCacheEntry::parameter_size_mask); + __ andr(recv, flags, AT); + // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. + const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address + const int receiver_is_at_end = -1; // back off one slot to get receiver + Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); + __ ld(recv, recv_addr); + __ verify_oop(recv); + } + if(save_flags) { + __ move(BCP, flags); + } + + // compute return type + __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); + __ andi(flags, flags, 0xf); + + // Make sure we don't need to mask flags for tos_state_shift after the above shift + ConstantPoolCacheEntry::verify_tos_state_shift(); + // load return address + { + const address table = (address) Interpreter::invoke_return_entry_table_for(code); + __ li(AT, (long)table); + __ dsll(flags, flags, LogBytesPerWord); + __ daddu(AT, AT, flags); + __ ld(RA, AT, 0); + } + + if (save_flags) { + __ move(flags, BCP); + __ restore_bcp(); + } +} + +// used registers : T0, T3, T1, T2 +// T3 : recv, this two register using convention is by prepare_invoke +// T1 : flags, klass +// Rmethod : method, index must be Rmethod +void TemplateTable::invokevirtual_helper(Register index, + Register recv, + Register flags) { + + assert_different_registers(index, recv, flags, T2); + + // Test for an invoke of a final method + Label notFinal; + __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); + __ andr(AT, flags, AT); + __ beq(AT, R0, notFinal); + __ delayed()->nop(); + + Register method = index; // method must be Rmethod + assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); + + // do the call - the index is actually the method to call + // the index is indeed methodOop, for this is vfinal, + // see ConstantPoolCacheEntry::set_method for more info + + __ verify_oop(method); + + // It's final, need a null check here! + __ null_check(recv); + + // profile this call + __ profile_final_call(T2); + + // T2: tmp, used for mdp + // method: callee + // T9: tmp + // is_virtual: true + __ profile_arguments_type(T2, method, T9, true); + + __ jump_from_interpreted(method, T2); + + __ bind(notFinal); + + // get receiver klass + __ null_check(recv, oopDesc::klass_offset_in_bytes()); + __ load_klass(T2, recv); + __ verify_oop(T2); + + // profile this call + __ profile_virtual_call(T2, T0, T1); + + // get target methodOop & entry point + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + __ dsll(AT, index, Address::times_ptr); + // T2: receiver + __ daddu(AT, T2, AT); + //this is a ualign read + __ ld(method, AT, base + vtableEntry::method_offset_in_bytes()); + __ profile_arguments_type(T2, method, T9, true); + __ jump_from_interpreted(method, T2); + +} + +void TemplateTable::invokevirtual(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); + // now recv & flags in T3, T1 + invokevirtual_helper(Rmethod, T3, T1); +} + +// T9 : entry +// Rmethod : method +void TemplateTable::invokespecial(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG, T3); + // now recv & flags in T3, T1 + __ verify_oop(T3); + __ null_check(T3); + __ profile_call(T9); + + // T8: tmp, used for mdp + // Rmethod: callee + // T9: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T9, false); + + __ jump_from_interpreted(Rmethod, T9); + __ move(T0, T3); +} + +void TemplateTable::invokestatic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, Rmethod, NOREG); + __ verify_oop(Rmethod); + + __ profile_call(T9); + + // T8: tmp, used for mdp + // Rmethod: callee + // T9: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T9, false); + + __ jump_from_interpreted(Rmethod, T9); +} + +// i have no idea what to do here, now. for future change. FIXME. +void TemplateTable::fast_invokevfinal(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + __ stop("fast_invokevfinal not used on mips64"); +} + +// used registers : T0, T1, T2, T3, T1, A7 +// T0 : itable, vtable, entry +// T1 : interface +// T3 : receiver +// T1 : flags, klass +// Rmethod : index, method, this is required by interpreter_entry +void TemplateTable::invokeinterface(int byte_no) { + transition(vtos, vtos); + //this method will use T1-T4 and T0 + assert(byte_no == f1_byte, "use this argument"); + prepare_invoke(byte_no, T2, Rmethod, T3, T1); + // T2: reference klass + // Rmethod: method + // T3: receiver + // T1: flags + + // Special case of invokeinterface called for virtual method of + // java.lang.Object. See cpCacheOop.cpp for details. + // This code isn't produced by javac, but could be produced by + // another compliant java compiler. + Label notMethod; + __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); + __ andr(AT, T1, AT); + __ beq(AT, R0, notMethod); + __ delayed()->nop(); + + invokevirtual_helper(Rmethod, T3, T1); + __ bind(notMethod); + // Get receiver klass into T1 - also a null check + //add for compressedoops + __ load_klass(T1, T3); + __ verify_oop(T1); + + Label no_such_interface, no_such_method; + + // Receiver subtype check against REFC. + // Superklass in T2. Subklass in T1. + __ lookup_interface_method(// inputs: rec. class, interface, itable index + T1, T2, noreg, + // outputs: scan temp. reg, scan temp. reg + T0, FSR, + no_such_interface, + /*return_method=*/false); + + + // profile this call + __ profile_virtual_call(T1, T0, FSR); + + // Get declaring interface class from method, and itable index + __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); + __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); + __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); + __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); + __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max); + __ subu32(Rmethod, R0, Rmethod); + + __ lookup_interface_method(// inputs: rec. class, interface, itable index + T1, T2, Rmethod, + // outputs: method, scan temp. reg + Rmethod, T0, + no_such_interface); + + // Rmethod: Method* to call + // T3: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ beq(Rmethod, R0, no_such_method); + __ delayed()->nop(); + + __ profile_arguments_type(T1, Rmethod, T0, true); + + // do the call + // T3: receiver + // Rmethod: Method* + __ jump_from_interpreted(Rmethod, T1); + __ should_not_reach_here(); + + // exception handling code follows... + // note: must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); + // throw exception + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + __ bind(no_such_interface); + // throw exception + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_IncompatibleClassChangeError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + +} + + +void TemplateTable::invokehandle(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + const Register T2_method = Rmethod; + const Register FSR_mtype = FSR; + const Register T3_recv = T3; + + if (!EnableInvokeDynamic) { + // rewriter does not generate this bytecode + __ should_not_reach_here(); + return; + } + + prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); + //??__ verify_method_ptr(T2_method); + __ verify_oop(T3_recv); + __ null_check(T3_recv); + + // T9: MethodType object (from cpool->resolved_references[f1], if necessary) + // T2_method: MH.invokeExact_MT method (from f2) + + // Note: T9 is already pushed (if necessary) by prepare_invoke + + // FIXME: profile the LambdaForm also + __ profile_final_call(T9); + + // T8: tmp, used for mdp + // T2_method: callee + // T9: tmp + // is_virtual: true + __ profile_arguments_type(T8, T2_method, T9, true); + + __ jump_from_interpreted(T2_method, T9); +} + + void TemplateTable::invokedynamic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + if (!EnableInvokeDynamic) { + // We should not encounter this bytecode if !EnableInvokeDynamic. + // The verifier will stop it. However, if we get past the verifier, + // this will stop the thread in a reasonable way, without crashing the JVM. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_IncompatibleClassChangeError)); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + return; + } + + //const Register Rmethod = T2; + const Register T2_callsite = T2; + + prepare_invoke(byte_no, Rmethod, T2_callsite); + + // T2: CallSite object (from cpool->resolved_references[f1]) + // Rmethod: MH.linkToCallSite method (from f2) + + // Note: T2_callsite is already pushed by prepare_invoke + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(T9); + + // T8: tmp, used for mdp + // Rmethod: callee + // T9: tmp + // is_virtual: false + __ profile_arguments_type(T8, Rmethod, T9, false); + + __ verify_oop(T2_callsite); + + __ jump_from_interpreted(Rmethod, T9); + } + +//----------------------------------------------------------------------------- +// Allocation +// T1 : tags & buffer end & thread +// T2 : object end +// T3 : klass +// T1 : object size +// A1 : cpool +// A2 : cp index +// return object in FSR +void TemplateTable::_new() { + transition(vtos, atos); + __ get_unsigned_2_byte_index_at_bcp(A2, 1); + + Label slow_case; + Label done; + Label initialize_header; + Label initialize_object; // including clearing the fields + Label allocate_shared; + + // get InstanceKlass in T3 + __ get_cpool_and_tags(A1, T1); + + __ dsll(AT, A2, Address::times_8); + if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) { + __ gsldx(T3, A1, AT, sizeof(ConstantPool)); + } else { + __ daddu(AT, A1, AT); + __ ld(T3, AT, sizeof(ConstantPool)); + } + + // make sure the class we're about to instantiate has been resolved. + // Note: slow_case does a pop of stack, which is why we loaded class/pushed above + const int tags_offset = Array::base_offset_in_bytes(); + if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { + __ gslbx(AT, T1, A2, tags_offset); + } else { + __ daddu(T1, T1, A2); + __ lb(AT, T1, tags_offset); + } + if(os::is_MP()) { + __ sync(); // load acquire + } + __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); + __ bne(AT, R0, slow_case); + __ delayed()->nop(); + + + // make sure klass is initialized & doesn't have finalizer + // make sure klass is fully initialized + __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); + __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized); + __ bne(AT, R0, slow_case); + __ delayed()->nop(); + + // has_finalizer + __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) ); + __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); + __ bne(AT, R0, slow_case); + __ delayed()->nop(); + + // Allocate the instance + // 1) Try to allocate in the TLAB + // 2) if fail and the object is large allocate in the shared Eden + // 3) if the above fails (or is not applicable), go to a slow case + // (creates a new TLAB, etc.) + + const bool allow_shared_alloc = + Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode; + +#ifndef OPT_THREAD + const Register thread = T8; + if (UseTLAB || allow_shared_alloc) { + __ get_thread(thread); + } +#else + const Register thread = TREG; +#endif + + if (UseTLAB) { + // get tlab_top + __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); + // get tlab_end + __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset())); + __ daddu(T2, FSR, T0); + __ slt(AT, AT, T2); + __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case); + __ delayed()->nop(); + __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset())); + + if (ZeroTLAB) { + // the fields have been already cleared + __ beq(R0, R0, initialize_header); + } else { + // initialize both the header and fields + __ beq(R0, R0, initialize_object); + } + __ delayed()->nop(); + } + + // Allocation in the shared Eden , if allowed + // T0 : instance size in words + if(allow_shared_alloc){ + __ bind(allocate_shared); + + Label retry; + Address heap_top(T1); + __ set64(T1, (long)Universe::heap()->top_addr()); + __ ld(FSR, heap_top); + + __ bind(retry); + __ set64(AT, (long)Universe::heap()->end_addr()); + __ ld(AT, AT, 0); + __ daddu(T2, FSR, T0); + __ slt(AT, AT, T2); + __ bne(AT, R0, slow_case); + __ delayed()->nop(); + + // Compare FSR with the top addr, and if still equal, store the new + // top addr in T2 at the address of the top addr pointer. Sets AT if was + // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. + // + // FSR: object begin + // T2: object end + // T0: instance size in words + + // if someone beat us on the allocation, try again, otherwise continue + __ cmpxchg(T2, heap_top, FSR); + __ beq(AT, R0, retry); + __ delayed()->nop(); + + __ incr_allocated_bytes(thread, T0, 0); + } + + if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { + // The object is initialized before the header. If the object size is + // zero, go directly to the header initialization. + __ bind(initialize_object); + __ set64(AT, - sizeof(oopDesc)); + __ daddu(T0, T0, AT); + __ beq(T0, R0, initialize_header); + __ delayed()->nop(); + + // initialize remaining object fields: T0 is a multiple of 2 + { + Label loop; + __ daddu(T1, FSR, T0); + __ daddiu(T1, T1, -oopSize); + + __ bind(loop); + __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize); + __ bne(T1, FSR, loop); //dont clear header + __ delayed()->daddiu(T1, T1, -oopSize); + } + + //klass in T3, + // initialize object header only. + __ bind(initialize_header); + if (UseBiasedLocking) { + __ ld(AT, T3, in_bytes(Klass::prototype_header_offset())); + __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ()); + } else { + __ set64(AT, (long)markOopDesc::prototype()); + __ sd(AT, FSR, oopDesc::mark_offset_in_bytes()); + } + + __ store_klass_gap(FSR, R0); + __ store_klass(FSR, T3); + + { + SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); + // Trigger dtrace event for fastpath + __ push(atos); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); + __ pop(atos); + + } + __ b(done); + __ delayed()->nop(); + } + + // slow case + __ bind(slow_case); + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); + + // continue + __ bind(done); + __ sync(); +} + +void TemplateTable::newarray() { + transition(itos, atos); + __ lbu(A1, at_bcp(1)); + //type, count + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); + __ sync(); +} + +void TemplateTable::anewarray() { + transition(itos, atos); + __ get_2_byte_integer_at_bcp(A2, AT, 1); + __ huswap(A2); + __ get_constant_pool(A1); + // cp, index, count + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); + __ sync(); +} + +void TemplateTable::arraylength() { + transition(atos, itos); + __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); + __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); +} + +// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) +// T2 : sub klass +// T3 : cpool +// T3 : super klass +void TemplateTable::checkcast() { + transition(atos, atos); + Label done, is_null, ok_is_subtype, quicked, resolved; + __ beq(FSR, R0, is_null); + __ delayed()->nop(); + + // Get cpool & tags index + __ get_cpool_and_tags(T3, T1); + __ get_2_byte_integer_at_bcp(T2, AT, 1); + __ huswap(T2); + + // See if bytecode has already been quicked + __ daddu(AT, T1, T2); + __ lb(AT, AT, Array::base_offset_in_bytes()); + if(os::is_MP()) { + __ sync(); // load acquire + } + __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); + __ beq(AT, R0, quicked); + __ delayed()->nop(); + + // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. + // Then, GC will move the object in V0 to another places in heap. + // Therefore, We should never save such an object in register. + // Instead, we should save it in the stack. It can be modified automatically by the GC thread. + // After GC, the object address in FSR is changed to a new place. + // + __ push(atos); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + __ get_vm_result_2(T3, thread); + __ pop_ptr(FSR); + __ b(resolved); + __ delayed()->nop(); + + // klass already in cp, get superklass in T3 + __ bind(quicked); + __ dsll(AT, T2, Address::times_8); + __ daddu(AT, T3, AT); + __ ld(T3, AT, sizeof(ConstantPool)); + + __ bind(resolved); + + // get subklass in T2 + //add for compressedoops + __ load_klass(T2, FSR); + // Superklass in T3. Subklass in T2. + __ gen_subtype_check(T3, T2, ok_is_subtype); + + // Come here on failure + // object is at FSR + __ jmp(Interpreter::_throw_ClassCastException_entry); + __ delayed()->nop(); + + // Come here on success + __ bind(ok_is_subtype); + + // Collect counts on whether this check-cast sees NULLs a lot or not. + if (ProfileInterpreter) { + __ b(done); + __ delayed()->nop(); + __ bind(is_null); + __ profile_null_seen(T3); + } else { + __ bind(is_null); + } + __ bind(done); +} + +// i use T3 as cpool, T1 as tags, T2 as index +// object always in FSR, superklass in T3, subklass in T2 +void TemplateTable::instanceof() { + transition(atos, itos); + Label done, is_null, ok_is_subtype, quicked, resolved; + + __ beq(FSR, R0, is_null); + __ delayed()->nop(); + + // Get cpool & tags index + __ get_cpool_and_tags(T3, T1); + // get index + __ get_2_byte_integer_at_bcp(T2, AT, 1); + __ huswap(T2); + + // See if bytecode has already been quicked + // quicked + __ daddu(AT, T1, T2); + __ lb(AT, AT, Array::base_offset_in_bytes()); + if(os::is_MP()) { + __ sync(); // load acquire + } + __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); + __ beq(AT, R0, quicked); + __ delayed()->nop(); + + __ push(atos); + const Register thread = TREG; +#ifndef OPT_THREAD + __ get_thread(thread); +#endif + call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + __ get_vm_result_2(T3, thread); + __ pop_ptr(FSR); + __ b(resolved); + __ delayed()->nop(); + + // get superklass in T3, subklass in T2 + __ bind(quicked); + __ dsll(AT, T2, Address::times_8); + __ daddu(AT, T3, AT); + __ ld(T3, AT, sizeof(ConstantPool)); + + __ bind(resolved); + // get subklass in T2 + //add for compressedoops + __ load_klass(T2, FSR); + + // Superklass in T3. Subklass in T2. + __ gen_subtype_check(T3, T2, ok_is_subtype); + // Come here on failure + __ b(done); + __ delayed(); __ move(FSR, R0); + + // Come here on success + __ bind(ok_is_subtype); + __ move(FSR, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ beq(R0, R0, done); + __ delayed()->nop(); + __ bind(is_null); + __ profile_null_seen(T3); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); + // FSR = 0: obj == NULL or obj is not an instanceof the specified klass + // FSR = 1: obj != NULL and obj is an instanceof the specified klass +} + +//-------------------------------------------------------- +//-------------------------------------------- +// Breakpoints +void TemplateTable::_breakpoint() { + // Note: We get here even if we are single stepping.. + // jbug inists on setting breakpoints at every bytecode + // even if we are in single step mode. + + transition(vtos, vtos); + + // get the unpatched byte code + __ get_method(A1); + __ call_VM(NOREG, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::get_original_bytecode_at), + A1, BCP); + __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal + + // post the breakpoint event + __ get_method(A1); + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); + + // complete the execution of original bytecode + __ dispatch_only_normal(vtos); +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateTable::athrow() { + transition(atos, vtos); + __ null_check(FSR); + __ jmp(Interpreter::throw_exception_entry()); + __ delayed()->nop(); +} + +//----------------------------------------------------------------------------- +// Synchronization +// +// Note: monitorenter & exit are symmetric routines; which is reflected +// in the assembly code structure as well +// +// Stack layout: +// +// [expressions ] <--- SP = expression stack top +// .. +// [expressions ] +// [monitor entry] <--- monitor block top = expression stack bot +// .. +// [monitor entry] +// [frame data ] <--- monitor block bot +// ... +// [return addr ] <--- FP + +// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer +// object always in FSR +void TemplateTable::monitorenter() { + transition(atos, vtos); + + // check for NULL object + __ null_check(FSR); + + const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset + * wordSize); + const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); + Label allocated; + + // initialize entry pointer + __ move(c_rarg0, R0); + + // find a free slot in the monitor block (result in c_rarg0) + { + Label entry, loop, exit, next; + __ ld(T2, monitor_block_top); + __ b(entry); + __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + + // free slot? + __ bind(loop); + __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes()); + __ bne(AT, R0, next); + __ delayed()->nop(); + __ move(c_rarg0, T2); + + __ bind(next); + __ beq(FSR, AT, exit); + __ delayed()->nop(); + __ daddiu(T2, T2, entry_size); + + __ bind(entry); + __ bne(T3, T2, loop); + __ delayed()->nop(); + __ bind(exit); + } + + __ bne(c_rarg0, R0, allocated); + __ delayed()->nop(); + + // allocate one if there's no free slot + { + Label entry, loop; + // 1. compute new pointers // SP: old expression stack top + __ ld(c_rarg0, monitor_block_top); + __ daddiu(SP, SP, - entry_size); + __ daddiu(c_rarg0, c_rarg0, - entry_size); + __ sd(c_rarg0, monitor_block_top); + __ b(entry); + __ delayed(); __ move(T3, SP); + + // 2. move expression stack contents + __ bind(loop); + __ ld(AT, T3, entry_size); + __ sd(AT, T3, 0); + __ daddiu(T3, T3, wordSize); + __ bind(entry); + __ bne(T3, c_rarg0, loop); + __ delayed()->nop(); + } + + __ bind(allocated); + // Increment bcp to point to the next bytecode, + // so exception handling for async. exceptions work correctly. + // The object has already been poped from the stack, so the + // expression stack looks correct. + __ daddiu(BCP, BCP, 1); + __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ lock_object(c_rarg0); + // check to make sure this monitor doesn't cause stack overflow after locking + __ save_bcp(); // in case of exception + __ generate_stack_overflow_check(0); + // The bcp has already been incremented. Just need to dispatch to next instruction. + + __ dispatch_next(vtos); +} + +// T2 : top +// c_rarg0 : entry +void TemplateTable::monitorexit() { + transition(atos, vtos); + + __ null_check(FSR); + + const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); + Label found; + + // find matching slot + { + Label entry, loop; + __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ b(entry); + __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); + + __ bind(loop); + __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); + __ beq(FSR, AT, found); + __ delayed()->nop(); + __ daddiu(c_rarg0, c_rarg0, entry_size); + __ bind(entry); + __ bne(T2, c_rarg0, loop); + __ delayed()->nop(); + } + + // error handling. Unlocking was not block-structured + Label end; + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + // call run-time routine + // c_rarg0: points to monitor entry + __ bind(found); + __ move(TSR, FSR); + __ unlock_object(c_rarg0); + __ move(FSR, TSR); + __ bind(end); +} + + +// Wide instructions +void TemplateTable::wide() { + transition(vtos, vtos); + __ lbu(Rnext, at_bcp(1)); + __ dsll(T9, Rnext, Address::times_8); + __ li(AT, (long)Interpreter::_wentry_point); + __ daddu(AT, T9, AT); + __ ld(T9, AT, 0); + __ jr(T9); + __ delayed()->nop(); +} + + +void TemplateTable::multianewarray() { + transition(vtos, atos); + // last dim is on top of stack; we want address of first one: + // first_addr = last_addr + (ndims - 1) * wordSize + __ lbu(A1, at_bcp(3)); // dimension + __ daddiu(A1, A1, -1); + __ dsll(A1, A1, Address::times_8); + __ daddu(A1, SP, A1); // now A1 pointer to the count array on the stack + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); + __ lbu(AT, at_bcp(3)); + __ dsll(AT, AT, Address::times_8); + __ daddu(SP, SP, AT); + __ sync(); +} +#endif // !CC_INTERP diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp b/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp new file mode 100644 index 00000000000..b63274a2064 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP +#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP + + static void prepare_invoke(int byte_no, + Register method, + Register index = noreg, + Register recv = noreg, + Register flags = noreg + ); + static void invokevirtual_helper(Register index, Register recv, + Register flags); + //static void volatile_barrier(Assembler::Membar_mask_bits order_constraint); + static void volatile_barrier(); + + // Helpers + static void index_check(Register array, Register index); + static void index_check_without_pop(Register array, Register index); + +#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP diff --git a/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp b/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp new file mode 100644 index 00000000000..6939914356d --- /dev/null +++ b/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP +#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* JavaCallWrapper */ \ + /******************************/ \ + /******************************/ \ + /* JavaFrameAnchor */ \ + /******************************/ \ + volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ + \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ + /* be present there) */ + + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ + /* be present there) */ + + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ + /* be present there) */ + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ + /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ + /* be present there) */ + +#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp new file mode 100644 index 00000000000..a98f70d9ff1 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "memory/allocation.inline.hpp" +#include "vm_version_ext_mips.hpp" + +// VM_Version_Ext statics +int VM_Version_Ext::_no_of_threads = 0; +int VM_Version_Ext::_no_of_cores = 0; +int VM_Version_Ext::_no_of_sockets = 0; +bool VM_Version_Ext::_initialized = false; +char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; +char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; + +void VM_Version_Ext::initialize_cpu_information(void) { + // do nothing if cpu info has been initialized + if (_initialized) { + return; + } + + _no_of_cores = os::processor_count(); + _no_of_threads = _no_of_cores; + _no_of_sockets = _no_of_cores; + if (is_loongson()) { + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features()); + } else { + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features()); + } + _initialized = true; +} + +int VM_Version_Ext::number_of_threads(void) { + initialize_cpu_information(); + return _no_of_threads; +} + +int VM_Version_Ext::number_of_cores(void) { + initialize_cpu_information(); + return _no_of_cores; +} + +int VM_Version_Ext::number_of_sockets(void) { + initialize_cpu_information(); + return _no_of_sockets; +} + +const char* VM_Version_Ext::cpu_name(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); + return tmp; +} + +const char* VM_Version_Ext::cpu_description(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); + return tmp; +} diff --git a/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp new file mode 100644 index 00000000000..a240fcc2e92 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP +#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP + +#include "runtime/vm_version.hpp" +#include "utilities/macros.hpp" + +class VM_Version_Ext : public VM_Version { + private: + static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; + static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; + + static int _no_of_threads; + static int _no_of_cores; + static int _no_of_sockets; + static bool _initialized; + static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; + static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + + public: + static int number_of_threads(void); + static int number_of_cores(void); + static int number_of_sockets(void); + + static const char* cpu_name(void); + static const char* cpu_description(void); + static void initialize_cpu_information(void); +}; + +#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/vm_version_mips.cpp b/hotspot/src/cpu/mips/vm/vm_version_mips.cpp new file mode 100644 index 00000000000..aef8f0746ae --- /dev/null +++ b/hotspot/src/cpu/mips/vm/vm_version_mips.cpp @@ -0,0 +1,510 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/java.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "vm_version_mips.hpp" +#ifdef TARGET_OS_FAMILY_linux +# include "os_linux.inline.hpp" +#endif + +#define A0 RA0 + +int VM_Version::_cpuFeatures; +const char* VM_Version::_features_str = ""; +VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; +volatile bool VM_Version::_is_determine_cpucfg_supported_running = false; +bool VM_Version::_is_cpucfg_instruction_supported = true; +bool VM_Version::_cpu_info_is_initialized = false; + +static BufferBlob* stub_blob; +static const int stub_size = 600; + +extern "C" { + typedef void (*get_cpu_info_stub_t)(void*); +} +static get_cpu_info_stub_t get_cpu_info_stub = NULL; + + +class VM_Version_StubGenerator: public StubCodeGenerator { + public: + + VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} + + address generate_get_cpu_info() { + assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); + StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); +# define __ _masm-> + + address start = __ pc(); + + __ enter(); + __ push(AT); + __ push(V0); + + __ li(AT, (long)0); + __ cpucfg(V0, AT); + __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); + __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); + + __ li(AT, 1); + __ cpucfg(V0, AT); + __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); + __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); + + __ li(AT, 2); + __ cpucfg(V0, AT); + __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); + __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); + + __ pop(V0); + __ pop(AT); + __ leave(); + __ jr(RA); + __ delayed()->nop(); +# undef __ + + return start; + }; +}; + +uint32_t VM_Version::get_feature_flags_by_cpucfg() { + uint32_t result = 0; + if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0) + result |= CPU_MMI; + if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0) + result |= CPU_MSA1_0; + if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0) + result |= CPU_MSA2_0; + if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0) + result |= CPU_CGP; + if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0) + result |= CPU_LSX1; + if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0) + result |= CPU_LSX2; + if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0) + result |= CPU_LASX; + if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0) + result |= CPU_LLSYNC; + if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0) + result |= CPU_TGTSYNC; + if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0) + result |= CPU_MUALP; + if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0) + result |= CPU_LEXT1; + if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0) + result |= CPU_LEXT2; + if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0) + result |= CPU_LEXT3; + if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0) + result |= CPU_LAMO; + if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0) + result |= CPU_LPIXU; + + result |= CPU_ULSYNC; + + return result; +} + +void read_cpu_info(const char *path, char *result) { + FILE *ptr; + char buf[1024]; + int i = 0; + if((ptr=fopen(path, "r")) != NULL) { + while(fgets(buf, 1024, ptr)!=NULL) { + strcat(result,buf); + i++; + if (i == 10) break; + } + fclose(ptr); + } else { + warning("Can't detect CPU info - cannot open %s", path); + } +} + +void strlwr(char *str) { + for (; *str!='\0'; str++) + *str = tolower(*str); +} + +int VM_Version::get_feature_flags_by_cpuinfo(int features) { + assert(!cpu_info_is_initialized(), "VM_Version should not be initialized"); + + char res[10240]; + int i; + memset(res, '\0', 10240 * sizeof(char)); + read_cpu_info("/proc/cpuinfo", res); + // res is converted to lower case + strlwr(res); + + if (strstr(res, "loongson")) { + // Loongson CPU + features |= CPU_LOONGSON; + + const struct Loongson_Cpuinfo loongson_cpuinfo[] = { + {L_3A1000, "3a1000"}, + {L_3B1500, "3b1500"}, + {L_3A2000, "3a2000"}, + {L_3B2000, "3b2000"}, + {L_3A3000, "3a3000"}, + {L_3B3000, "3b3000"}, + {L_2K1000, "2k1000"}, + {L_UNKNOWN, "unknown"} + }; + + // Loongson Family + int detected = 0; + for (i = 0; i <= L_UNKNOWN; i++) { + switch (i) { + // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed + // test PRID REV in /proc/cpuinfo + // 3A1000: V0.5, model name: ICT Loongson-3A V0.5 FPU V0.1 + // 3B1500: V0.7, model name: ICT Loongson-3B V0.7 FPU V0.1 + case L_3A1000: + if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) { + features |= CPU_LOONGSON_GS464; + detected++; + //tty->print_cr("3A1000 platform"); + } + break; + case L_3B1500: + if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) { + features |= CPU_LOONGSON_GS464; + detected++; + //tty->print_cr("3B1500 platform"); + } + break; + case L_3A2000: + case L_3B2000: + case L_3A3000: + case L_3B3000: + if (strstr(res, loongson_cpuinfo[i].match_str)) { + features |= CPU_LOONGSON_GS464E; + detected++; + //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform"); + } + break; + case L_2K1000: + if (strstr(res, loongson_cpuinfo[i].match_str)) { + features |= CPU_LOONGSON_GS264; + detected++; + //tty->print_cr("2K1000 platform"); + } + break; + case L_UNKNOWN: + if (detected == 0) { + detected++; + //tty->print_cr("unknown Loongson platform"); + } + break; + default: + ShouldNotReachHere(); + } + } + assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected"); + } else { // not Loongson + // Not Loongson CPU + //tty->print_cr("MIPS platform"); + } + + if (features & CPU_LOONGSON_GS264) { + features |= CPU_LEXT1; + features |= CPU_LEXT2; + features |= CPU_TGTSYNC; + features |= CPU_ULSYNC; + features |= CPU_MSA1_0; + features |= CPU_LSX1; + } else if (features & CPU_LOONGSON_GS464) { + features |= CPU_LEXT1; + features |= CPU_LLSYNC; + features |= CPU_TGTSYNC; + } else if (features & CPU_LOONGSON_GS464E) { + features |= CPU_LEXT1; + features |= CPU_LEXT2; + features |= CPU_LEXT3; + features |= CPU_TGTSYNC; + features |= CPU_ULSYNC; + } else if (features & CPU_LOONGSON) { + // unknow loongson + features |= CPU_LLSYNC; + features |= CPU_TGTSYNC; + features |= CPU_ULSYNC; + } + VM_Version::_cpu_info_is_initialized = true; + + return features; +} + +void VM_Version::get_processor_features() { + + clean_cpuFeatures(); + + // test if cpucfg instruction is supported + VM_Version::_is_determine_cpucfg_supported_running = true; + __asm__ __volatile__( + ".insn \n\t" + ".word (0xc8080118)\n\t" // cpucfg zero, zero + : + : + : + ); + VM_Version::_is_determine_cpucfg_supported_running = false; + + if (supports_cpucfg()) { + get_cpu_info_stub(&_cpuid_info); + _cpuFeatures = get_feature_flags_by_cpucfg(); + // Only Loongson CPUs support cpucfg + _cpuFeatures |= CPU_LOONGSON; + } else { + _cpuFeatures = get_feature_flags_by_cpuinfo(0); + } + + _supports_cx8 = true; + + if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { + FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650); + } + +#ifdef COMPILER2 + if (MaxVectorSize > 0) { + if (!is_power_of_2(MaxVectorSize)) { + warning("MaxVectorSize must be a power of 2"); + MaxVectorSize = 8; + } + if (MaxVectorSize > 0 && supports_ps()) { + MaxVectorSize = 8; + } else { + MaxVectorSize = 0; + } + } + // + // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java. + // Vector optimization was closed by default. + // The reasons: + // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal. + // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions. + // + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + MaxVectorSize = 0; + } + +#endif + + if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 1000); + } + } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 2000); + } + } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 3000); + } + } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 4000); + } + } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) { + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 10000); + } + } else { + assert(false, "Should Not Reach Here, what is the cpu type?"); + if (FLAG_IS_DEFAULT(UseSyncLevel)) { + FLAG_SET_DEFAULT(UseSyncLevel, 10000); + } + } + + if (supports_lext1()) { + if (FLAG_IS_DEFAULT(UseLEXT1)) { + FLAG_SET_DEFAULT(UseLEXT1, true); + } + } else if (UseLEXT1) { + warning("LEXT1 instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseLEXT1, false); + } + + if (supports_lext2()) { + if (FLAG_IS_DEFAULT(UseLEXT2)) { + FLAG_SET_DEFAULT(UseLEXT2, true); + } + } else if (UseLEXT2) { + warning("LEXT2 instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseLEXT2, false); + } + + if (supports_lext3()) { + if (FLAG_IS_DEFAULT(UseLEXT3)) { + FLAG_SET_DEFAULT(UseLEXT3, true); + } + } else if (UseLEXT3) { + warning("LEXT3 instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseLEXT3, false); + } + + if (UseLEXT2) { + if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) { + FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1); + } + } else if (UseCountTrailingZerosInstructionMIPS64) { + if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) + warning("ctz/dctz instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0); + } + + if (TieredCompilation) { + if (!FLAG_IS_DEFAULT(TieredCompilation)) + warning("TieredCompilation not supported"); + FLAG_SET_DEFAULT(TieredCompilation, false); + } + + char buf[256]; + bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg(); + + // A note on the _features_string format: + // There are jtreg tests checking the _features_string for various properties. + // For some strange reason, these tests require the string to contain + // only _lowercase_ characters. Keep that in mind when being surprised + // about the unusual notation of features - and when adding new ones. + // Features may have one comma at the end. + // Furthermore, use one, and only one, separator space between features. + // Multiple spaces are considered separate tokens, messing up everything. + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d", + (is_loongson() ? "mips-compatible loongson cpu" : "mips cpu"), + (is_gs464() ? ", gs464 (3a1000/3b1500)" : ""), + (is_gs464e() ? ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""), + (is_gs264() ? ", gs264 (2k1000)" : ""), + (is_unknown_loongson_cpu ? ", unknown loongson cpu" : ""), + (supports_dsp() ? ", dsp" : ""), + (supports_ps() ? ", ps" : ""), + (supports_3d() ? ", 3d" : ""), + (supports_mmi() ? ", mmi" : ""), + (supports_msa1_0() ? ", msa1_0" : ""), + (supports_msa2_0() ? ", msa2_0" : ""), + (supports_lsx1() ? ", lsx1" : ""), + (supports_lsx2() ? ", lsx2" : ""), + (supports_lasx() ? ", lasx" : ""), + (supports_lext1() ? ", lext1" : ""), + (supports_lext2() ? ", lext2" : ""), + (supports_lext3() ? ", lext3" : ""), + (supports_cgp() ? ", aes, crc, sha1, sha256, sha512" : ""), + (supports_lamo() ? ", lamo" : ""), + (supports_lpixu() ? ", lpixu" : ""), + (needs_llsync() ? ", llsync" : ""), + (needs_tgtsync() ? ", tgtsync": ""), + (needs_ulsync() ? ", ulsync": ""), + (supports_mualp() ? ", mualp" : ""), + UseSyncLevel); + _features_str = strdup(buf); + + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { + FLAG_SET_DEFAULT(AllocatePrefetchLines, 1); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); + } + + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); + } + + if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { + FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); + } + + if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + + if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) { + warning("SHA intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + + if (UseAES) { + if (!FLAG_IS_DEFAULT(UseAES)) { + warning("AES instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseAES, false); + } + } + + if (UseCRC32Intrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + warning("CRC32Intrinsics instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); + } + } + + if (UseAESIntrinsics) { + if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) { + warning("AES intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseAESIntrinsics, false); + } + } + + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + UseMontgomeryMultiplyIntrinsic = true; + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + UseMontgomerySquareIntrinsic = true; + } + + if (CriticalJNINatives) { + if (FLAG_IS_CMDLINE(CriticalJNINatives)) { + warning("CriticalJNINatives specified, but not supported in this VM"); + } + FLAG_SET_DEFAULT(CriticalJNINatives, false); + } +} + +void VM_Version::initialize() { + ResourceMark rm; + // Making this stub must be FIRST use of assembler + + stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); + if (stub_blob == NULL) { + vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); + } + CodeBuffer c(stub_blob); + VM_Version_StubGenerator g(&c); + get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, + g.generate_get_cpu_info()); + + get_processor_features(); +} diff --git a/hotspot/src/cpu/mips/vm/vm_version_mips.hpp b/hotspot/src/cpu/mips/vm/vm_version_mips.hpp new file mode 100644 index 00000000000..0de01e5f64c --- /dev/null +++ b/hotspot/src/cpu/mips/vm/vm_version_mips.hpp @@ -0,0 +1,221 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP +#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP + +#include "runtime/globals_extension.hpp" +#include "runtime/vm_version.hpp" + + +class VM_Version: public Abstract_VM_Version { +public: + + union Loongson_Cpucfg_Id1 { + uint32_t value; + struct { + uint32_t FP : 1, + FPREV : 3, + MMI : 1, + MSA1 : 1, + MSA2 : 1, + CGP : 1, + WRP : 1, + LSX1 : 1, + LSX2 : 1, + LASX : 1, + R6FXP : 1, + R6CRCP : 1, + R6FPP : 1, + CNT64 : 1, + LSLDR0 : 1, + LSPREF : 1, + LSPREFX : 1, + LSSYNCI : 1, + LSUCA : 1, + LLSYNC : 1, + TGTSYNC : 1, + LLEXC : 1, + SCRAND : 1, + MUALP : 1, + KMUALEn : 1, + ITLBT : 1, + LSUPERF : 1, + SFBP : 1, + CDMAP : 1, + : 1; + } bits; + }; + + union Loongson_Cpucfg_Id2 { + uint32_t value; + struct { + uint32_t LEXT1 : 1, + LEXT2 : 1, + LEXT3 : 1, + LSPW : 1, + LBT1 : 1, + LBT2 : 1, + LBT3 : 1, + LBTMMU : 1, + LPMP : 1, + LPMRev : 3, + LAMO : 1, + LPIXU : 1, + LPIXNU : 1, + LVZP : 1, + LVZRev : 3, + LGFTP : 1, + LGFTRev : 3, + LLFTP : 1, + LLFTRev : 3, + LCSRP : 1, + DISBLKLY : 1, + : 3; + } bits; + }; + +protected: + + enum { + CPU_LOONGSON = (1 << 1), + CPU_LOONGSON_GS464 = (1 << 2), + CPU_LOONGSON_GS464E = (1 << 3), + CPU_LOONGSON_GS264 = (1 << 4), + CPU_MMI = (1 << 11), + CPU_MSA1_0 = (1 << 12), + CPU_MSA2_0 = (1 << 13), + CPU_CGP = (1 << 14), + CPU_LSX1 = (1 << 15), + CPU_LSX2 = (1 << 16), + CPU_LASX = (1 << 17), + CPU_LEXT1 = (1 << 18), + CPU_LEXT2 = (1 << 19), + CPU_LEXT3 = (1 << 20), + CPU_LAMO = (1 << 21), + CPU_LPIXU = (1 << 22), + CPU_LLSYNC = (1 << 23), + CPU_TGTSYNC = (1 << 24), + CPU_ULSYNC = (1 << 25), + CPU_MUALP = (1 << 26), + + //////////////////////add some other feature here////////////////// + } cpuFeatureFlags; + + enum Loongson_Family { + L_3A1000 = 0, + L_3B1500 = 1, + L_3A2000 = 2, + L_3B2000 = 3, + L_3A3000 = 4, + L_3B3000 = 5, + L_2K1000 = 6, + L_UNKNOWN = 7 + }; + + struct Loongson_Cpuinfo { + Loongson_Family id; + const char* const match_str; + }; + + static int _cpuFeatures; + static const char* _features_str; + static volatile bool _is_determine_cpucfg_supported_running; + static bool _is_cpucfg_instruction_supported; + static bool _cpu_info_is_initialized; + + struct CpuidInfo { + uint32_t cpucfg_info_id0; + Loongson_Cpucfg_Id1 cpucfg_info_id1; + Loongson_Cpucfg_Id2 cpucfg_info_id2; + uint32_t cpucfg_info_id3; + uint32_t cpucfg_info_id4; + uint32_t cpucfg_info_id5; + uint32_t cpucfg_info_id6; + uint32_t cpucfg_info_id8; + }; + + // The actual cpuid info block + static CpuidInfo _cpuid_info; + + static uint32_t get_feature_flags_by_cpucfg(); + static int get_feature_flags_by_cpuinfo(int features); + static void get_processor_features(); + +public: + // Offsets for cpuid asm stub + static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } + static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } + static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } + static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } + static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } + static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } + static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } + static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); } + + static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; } + + static void clean_cpuFeatures() { _cpuFeatures = 0; } + + // Initialization + static void initialize(); + + static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } + + static bool supports_cpucfg() { return _is_cpucfg_instruction_supported; } + static bool set_supports_cpucfg(bool value) { return _is_cpucfg_instruction_supported = value; } + + static bool is_loongson() { return _cpuFeatures & CPU_LOONGSON; } + static bool is_gs264() { return _cpuFeatures & CPU_LOONGSON_GS264; } + static bool is_gs464() { return _cpuFeatures & CPU_LOONGSON_GS464; } + static bool is_gs464e() { return _cpuFeatures & CPU_LOONGSON_GS464E; } + static bool supports_dsp() { return 0; /*not supported yet*/} + static bool supports_ps() { return 0; /*not supported yet*/} + static bool supports_3d() { return 0; /*not supported yet*/} + static bool supports_msa1_0() { return _cpuFeatures & CPU_MSA1_0; } + static bool supports_msa2_0() { return _cpuFeatures & CPU_MSA2_0; } + static bool supports_cgp() { return _cpuFeatures & CPU_CGP; } + static bool supports_mmi() { return _cpuFeatures & CPU_MMI; } + static bool supports_lsx1() { return _cpuFeatures & CPU_LSX1; } + static bool supports_lsx2() { return _cpuFeatures & CPU_LSX2; } + static bool supports_lasx() { return _cpuFeatures & CPU_LASX; } + static bool supports_lext1() { return _cpuFeatures & CPU_LEXT1; } + static bool supports_lext2() { return _cpuFeatures & CPU_LEXT2; } + static bool supports_lext3() { return _cpuFeatures & CPU_LEXT3; } + static bool supports_lamo() { return _cpuFeatures & CPU_LAMO; } + static bool supports_lpixu() { return _cpuFeatures & CPU_LPIXU; } + static bool needs_llsync() { return _cpuFeatures & CPU_LLSYNC; } + static bool needs_tgtsync() { return _cpuFeatures & CPU_TGTSYNC; } + static bool needs_ulsync() { return _cpuFeatures & CPU_ULSYNC; } + static bool supports_mualp() { return _cpuFeatures & CPU_MUALP; } + + //mips has no such instructions, use ll/sc instead + static bool supports_compare_and_exchange() { return false; } + + static const char* cpu_features() { return _features_str; } + +}; + +#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.cpp b/hotspot/src/cpu/mips/vm/vmreg_mips.cpp new file mode 100644 index 00000000000..86bd74d4305 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/vmreg_mips.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "code/vmreg.hpp" + + + +void VMRegImpl::set_regName() { + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { + regName[i++] = reg->name(); + regName[i++] = reg->name(); + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { + regName[i++] = freg->name(); + regName[i++] = freg->name(); + freg = freg->successor(); + } + + for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { + regName[i] = "NON-GPR-FPR"; + } +} diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.hpp b/hotspot/src/cpu/mips/vm/vmreg_mips.hpp new file mode 100644 index 00000000000..6a970ea91aa --- /dev/null +++ b/hotspot/src/cpu/mips/vm/vmreg_mips.hpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP +#define CPU_MIPS_VM_VMREG_MIPS_HPP + +bool is_Register(); +Register as_Register(); + +bool is_FloatRegister(); +FloatRegister as_FloatRegister(); + +#endif // CPU_MIPS_VM_VMREG_MIPS_HPP diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp b/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp new file mode 100644 index 00000000000..77e18ce57d2 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP +#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if( this==noreg ) return VMRegImpl::Bad(); + return VMRegImpl::as_VMReg(encoding() << 1 ); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); +} + +inline bool VMRegImpl::is_Register() { + return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; +} + +inline bool VMRegImpl::is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; +} + +inline Register VMRegImpl::as_Register() { + + assert( is_Register(), "must be"); + // Yuk + return ::as_Register(value() >> 1); +} + +inline FloatRegister VMRegImpl::as_FloatRegister() { + assert( is_FloatRegister(), "must be" ); + // Yuk + assert( is_even(value()), "must be" ); + return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); +} + +inline bool VMRegImpl::is_concrete() { + assert(is_reg(), "must be"); + if(is_Register()) return true; + if(is_FloatRegister()) return true; + assert(false, "what register?"); + return false; +} + +#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP diff --git a/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp b/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp new file mode 100644 index 00000000000..7779c58e0a6 --- /dev/null +++ b/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_mips_64.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_mips.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + + +// machine-dependent part of VtableStubs: create VtableStub of correct size and +// initialize its code + +#define __ masm-> + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, + oop receiver, + int index); +#endif + +// used by compiler only; reciever in T0. +// used registers : +// Rmethod : receiver klass & method +// NOTE: If this code is used by the C1, the receiver_location is always 0. +// when reach here, receiver in T0, klass in T8 +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + const int gs2_code_length = VtableStub::pd_code_size_limit(true); + VtableStub* s = new(gs2_code_length) VtableStub(true, vtable_index); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), gs2_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + Register t1 = T8, t2 = Rmethod; +#ifndef PRODUCT + if (CountCompiledCalls) { + __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); + __ lw(t1, AT , 0); + __ addiu(t1, t1, 1); + __ sw(t1, AT,0); + } +#endif + + // get receiver (need to skip return address on top of stack) + //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); + + // get receiver klass + address npe_addr = __ pc(); + //add for compressedoops + __ load_klass(t1, T0); + // compute entry offset (in words) + int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size(); +#ifndef PRODUCT + if (DebugVtables) { + Label L; + // check offset vs vtable length + __ lw(t2, t1, InstanceKlass::vtable_length_offset()*wordSize); + assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); + __ move(AT, vtable_index*vtableEntry::size()); + __ slt(AT, AT, t2); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ move(A2, vtable_index); + __ move(A1, A0); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); + __ bind(L); + } +#endif // PRODUCT + // load methodOop and target address + const Register method = Rmethod; + int offset = entry_offset*wordSize + vtableEntry::method_offset_in_bytes(); + guarantee(Assembler::is_simm16(offset), "not a signed 16-bit int"); + __ ld_ptr(method, t1, offset); + if (DebugVtables) { + Label L; + __ beq(method, R0, L); + __ delayed()->nop(); + __ ld(AT, method,in_bytes(Method::from_compiled_offset())); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ stop("Vtable entry is NULL"); + __ bind(L); + } + // T8: receiver klass + // T0: receiver + // Rmethod: methodOop + // T9: entry + address ame_addr = __ pc(); + __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); + __ jr(T9); + __ delayed()->nop(); + masm->flush(); + s->set_exception_points(npe_addr, ame_addr); + return s; +} + + +// used registers : +// T1 T2 +// when reach here, the receiver in T0, klass in T1 +VtableStub* VtableStubs::create_itable_stub(int itable_index) { + // Note well: pd_code_size_limit is the absolute minimum we can get + // away with. If you add code here, bump the code stub size + // returned by pd_code_size_limit! + const int gs2_code_length = VtableStub::pd_code_size_limit(false); + VtableStub* s = new(gs2_code_length) VtableStub(false, itable_index); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), gs2_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + // we T8,T9 as temparary register, they are free from register allocator + Register t1 = T8, t2 = T2; + // Entry arguments: + // T1: Interface + // T0: Receiver + +#ifndef PRODUCT + if (CountCompiledCalls) { + __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); + __ lw(T8, AT, 0); + __ addiu(T8, T8,1); + __ sw(T8, AT, 0); + } +#endif /* PRODUCT */ + const Register holder_klass_reg = T1; // declaring interface klass (DECC) + const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) + const Register icholder_reg = T1; + __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); + __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); + + // get receiver klass (also an implicit null-check) + address npe_addr = __ pc(); + __ load_klass(t1, T0); + { + // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS. + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); + assert(Assembler::is_simm16(base), "change this code"); + __ daddiu(t2, t1, base); + assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code"); + __ lw(AT, t1, InstanceKlass::vtable_length_offset() * wordSize); + __ dsll(AT, AT, Address::times_8); + __ daddu(t2, t2, AT); + if (HeapWordsPerLong > 1) { + __ round_to(t2, BytesPerLong); + } + + Label hit, entry; + assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code"); + __ bind(entry); + +#ifdef ASSERT + // Check that the entry is non-null + if (DebugVtables) { + Label L; + assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); + __ lw(AT, t1, itableOffsetEntry::interface_offset_in_bytes()); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ stop("null entry point found in itable's offset table"); + __ bind(L); + } +#endif + assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); + __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); + __ bne(AT, resolved_klass_reg, entry); + __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); + + } + + // add for compressedoops + __ load_klass(t1, T0); + // compute itable entry offset (in words) + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); + assert(Assembler::is_simm16(base), "change this code"); + __ daddiu(t2, t1, base); + assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code"); + __ lw(AT, t1, InstanceKlass::vtable_length_offset() * wordSize); + __ dsll(AT, AT, Address::times_8); + __ daddu(t2, t2, AT); + if (HeapWordsPerLong > 1) { + __ round_to(t2, BytesPerLong); + } + + Label hit, entry; + assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code"); + __ bind(entry); + +#ifdef ASSERT + // Check that the entry is non-null + if (DebugVtables) { + Label L; + assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); + __ lw(AT, t1, itableOffsetEntry::interface_offset_in_bytes()); + __ bne(AT, R0, L); + __ delayed()->nop(); + __ stop("null entry point found in itable's offset table"); + __ bind(L); + } +#endif + assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code"); + __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); + __ bne(AT, holder_klass_reg, entry); + __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); + + // We found a hit, move offset into T9 + __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize); + + // Compute itableMethodEntry. + const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + + itableMethodEntry::method_offset_in_bytes(); + + // Get methodOop and entrypoint for compiler + const Register method = Rmethod; + __ dsll(AT, t2, Address::times_1); + __ addu(AT, AT, t1); + guarantee(Assembler::is_simm16(method_offset), "not a signed 16-bit int"); + __ ld_ptr(method, AT, method_offset); + +#ifdef ASSERT + if (DebugVtables) { + Label L1; + __ beq(method, R0, L1); + __ delayed()->nop(); + __ ld(AT, method,in_bytes(Method::from_compiled_offset())); + __ bne(AT, R0, L1); + __ delayed()->nop(); + __ stop("methodOop is null"); + __ bind(L1); + } +#endif // ASSERT + + // Rmethod: methodOop + // T0: receiver + // T9: entry point + address ame_addr = __ pc(); + __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); + __ jr(T9); + __ delayed()->nop(); + masm->flush(); + s->set_exception_points(npe_addr, ame_addr); + return s; +} + +// NOTE : whenever you change the code above, dont forget to change the const here +int VtableStub::pd_code_size_limit(bool is_vtable_stub) { + if (is_vtable_stub) { + return ( DebugVtables ? 600 : 28) + (CountCompiledCalls ? 24 : 0)+ + (UseCompressedOops ? 16 : 0); + } else { + return ( DebugVtables ? 636 : 152) + (CountCompiledCalls ? 24 : 0)+ + (UseCompressedOops ? 32 : 0); + } +} + +int VtableStub::pd_code_alignment() { + return wordSize; +} diff --git a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp index c1c053e66c3..5c90df1079f 100644 --- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp @@ -1513,6 +1513,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { } } +void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { + ShouldNotReachHere(); +} + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { LIR_Opr src = op->in_opr(); LIR_Opr dest = op->result_opr(); @@ -2102,6 +2106,12 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L } +void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, + LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { + ShouldNotReachHere(); +} + + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); diff --git a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp index 92b73e1c712..45da327efb7 100644 --- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp @@ -242,20 +242,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); } -void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { __ cmp_mem_int(condition, base, disp, c, info); + __ branch(condition, T_INT, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); -void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { - __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); -} - - -void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info) { +template +void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); + __ branch(condition, type, tgt); } +// Explicit instantiation for all supported types. +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); +template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { if (tmp->is_valid() && c > 0 && c < max_jint) { diff --git a/hotspot/src/os/linux/vm/os_linux.cpp b/hotspot/src/os/linux/vm/os_linux.cpp index ba1bce4239a..42a73ea5aad 100644 --- a/hotspot/src/os/linux/vm/os_linux.cpp +++ b/hotspot/src/os/linux/vm/os_linux.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + // no precompiled headers #include "classfile/classLoader.hpp" #include "classfile/systemDictionary.hpp" @@ -1969,7 +1975,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {EM_ALPHA, EM_ALPHA, ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"}, {EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"}, {EM_MIPS, EM_MIPS, ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"}, + {EM_MIPS, EM_MIPS, ELFCLASS64, ELFDATA2LSB, (char*)"MIPS64 LE"}, {EM_PARISC, EM_PARISC, ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"}, +#if defined (LOONGARCH64) + {EM_LOONGARCH, EM_LOONGARCH, ELFCLASS64, ELFDATA2LSB, (char*)"LOONGARCH64"}, +#endif {EM_68K, EM_68K, ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}, {EM_AARCH64, EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"}, }; @@ -1984,6 +1994,8 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) static Elf32_Half running_arch_code=EM_SPARCV9; #elif (defined __sparc) && (!defined _LP64) static Elf32_Half running_arch_code=EM_SPARC; + #elif (defined MIPS64) + static Elf32_Half running_arch_code=EM_MIPS; #elif (defined __powerpc64__) static Elf32_Half running_arch_code=EM_PPC64; #elif (defined __powerpc__) @@ -2004,9 +2016,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) static Elf32_Half running_arch_code=EM_68K; #elif (defined AARCH64) static Elf32_Half running_arch_code=EM_AARCH64; + #elif (defined LOONGARCH64) + static Elf32_Half running_arch_code=EM_LOONGARCH; #else #error Method os::dll_load requires that one of following is defined:\ - IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K, AARCH64 + IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, __mips64, PARISC, M68K, AARCH64 #endif // Identify compatability class for VM's architecture and library's architecture @@ -3513,7 +3527,7 @@ size_t os::Linux::find_large_page_size() { #ifndef ZERO large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) - ARM_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M); + ARM_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M) MIPS64_ONLY(4 * M) LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA #endif // ZERO FILE *fp = fopen("/proc/meminfo", "r"); @@ -5120,7 +5134,12 @@ jint os::init_2(void) Linux::fast_thread_clock_init(); // Allocate a single page and mark it as readable for safepoint polling +#ifdef OPT_SAFEPOINT + void * p = (void *)(0x10000); + address polling_page = (address) ::mmap(p, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); +#else address polling_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); +#endif guarantee( polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page" ); os::set_polling_page( polling_page ); @@ -5155,13 +5174,20 @@ jint os::init_2(void) // size. Add a page for compiler2 recursion in main thread. // Add in 2*BytesPerWord times page size to account for VM stack during // class initialization depending on 32 or 64 bit VM. + + /* + * 2014/1/2: JDK8 requires larger -Xss option. + * Some application cannot run with -Xss192K. + * We are not sure whether this causes errors, so simply print a warning. + */ + size_t min_stack_allowed_jdk6 = os::Linux::min_stack_allowed; os::Linux::min_stack_allowed = MAX2(os::Linux::min_stack_allowed, (size_t)(StackYellowPages+StackRedPages+StackShadowPages) * Linux::page_size() + (2*BytesPerWord COMPILER2_PRESENT(+1)) * Linux::vm_default_page_size()); size_t threadStackSizeInBytes = ThreadStackSize * K; if (threadStackSizeInBytes != 0 && - threadStackSizeInBytes < os::Linux::min_stack_allowed) { + threadStackSizeInBytes < min_stack_allowed_jdk6) { tty->print_cr("\nThe stack size specified is too small, " "Specify at least %dk", os::Linux::min_stack_allowed/ K); diff --git a/hotspot/src/os/linux/vm/os_perf_linux.cpp b/hotspot/src/os/linux/vm/os_perf_linux.cpp index 0d1f75810af..cbc6c0757c3 100644 --- a/hotspot/src/os/linux/vm/os_perf_linux.cpp +++ b/hotspot/src/os/linux/vm/os_perf_linux.cpp @@ -50,6 +50,12 @@ #ifdef TARGET_ARCH_ppc # include "vm_version_ext_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "vm_version_ext_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "vm_version_ext_loongarch.hpp" +#endif #include #include diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp new file mode 100644 index 00000000000..5ee0965f426 --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "runtime/os.hpp" +#include "runtime/threadLocalStorage.hpp" + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T4 RT4 +#define T5 RT5 +#define T6 RT6 +#define T7 RT7 +#define T8 RT8 + +void MacroAssembler::get_thread(Register thread) { +#ifdef MINIMIZE_RAM_USAGE + Register tmp; + + if (thread == AT) + tmp = T9; + else + tmp = AT; + + move(thread, SP); + shr(thread, PAGE_SHIFT); + + push(tmp); + li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); + andr(thread, thread, tmp); + shl(thread, Address::times_ptr); // sizeof(Thread *) + li48(tmp, (long)ThreadLocalStorage::sp_map_addr()); + add_d(tmp, tmp, thread); + ld_ptr(thread, tmp, 0); + pop(tmp); +#else + if (thread != V0) { + push(V0); + } + pushad_except_v0(); + + li(A0, ThreadLocalStorage::thread_index()); + push(S5); + move(S5, SP); + li(AT, -StackAlignmentInBytes); + andr(SP, SP, AT); + // TODO: confirm reloc + call(CAST_FROM_FN_PTR(address, pthread_getspecific), relocInfo::runtime_call_type); + move(SP, S5); + pop(S5); + + popad_except_v0(); + if (thread != V0) { + move(thread, V0); + pop(V0); + } +#endif // MINIMIZE_RAM_USAGE +} diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp new file mode 100644 index 00000000000..69590ba5824 --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp @@ -0,0 +1,206 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP + +#include "orderAccess_linux_loongarch.inline.hpp" +#include "runtime/atomic.hpp" +#include "runtime/os.hpp" +#include "vm_version_loongarch.hpp" + +// Implementation of class atomic + +inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } +inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } +inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } +inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; } + +inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; } +inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; } +inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; } +inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void**)dest = store_value; } + +inline jlong Atomic::load (volatile jlong* src) { return *src; } + +///////////implementation of Atomic::add*///////////////// +inline jint Atomic::add (jint add_value, volatile jint* dest) { + //TODO LA opt amadd + jint __ret, __tmp; + __asm__ __volatile__ ( + "1: ll.w %[__ret], %[__dest] \n\t" + " add.w %[__tmp], %[__val], %[__ret] \n\t" + " sc.w %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (add_value) + : "memory" + ); + + return add_value + __ret; +} + +inline intptr_t Atomic::add_ptr (intptr_t add_value, volatile intptr_t* dest) { + //TODO LA opt amadd + jint __ret, __tmp; + __asm__ __volatile__ ( + "1: ll.d %[__ret], %[__dest] \n\t" + " add.d %[__tmp], %[__val], %[__ret] \n\t" + " sc.d %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (add_value) + : "memory" + ); + + return add_value + __ret; +} + +inline void* Atomic::add_ptr (intptr_t add_value, volatile void* dest) { + return (void*)add_ptr((intptr_t)add_value, (volatile intptr_t*)dest); +} + +///////////implementation of Atomic::inc*///////////////// +inline void Atomic::inc (volatile jint* dest) { (void)add(1, dest); } +inline void Atomic::inc_ptr (volatile intptr_t* dest) { (void)add_ptr(1, dest); } +inline void Atomic::inc_ptr (volatile void* dest) { (void)inc_ptr((volatile intptr_t*)dest); } + +///////////implementation of Atomic::dec*///////////////// +inline void Atomic::dec (volatile jint* dest) { (void)add(-1, dest); } +inline void Atomic::dec_ptr (volatile intptr_t* dest) { (void)add_ptr(-1, dest); } +inline void Atomic::dec_ptr (volatile void* dest) { (void)dec_ptr((volatile intptr_t*)dest); } + + +///////////implementation of Atomic::xchg*///////////////// +inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) { + jint __ret, __tmp; + + __asm__ __volatile__ ( + "1: ll.w %[__ret], %[__dest] \n\t" + " move %[__tmp], %[__val] \n\t" + " sc.w %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value) + : "memory" + ); + + return __ret; +} + +inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) { + intptr_t __ret, __tmp; + __asm__ __volatile__ ( + "1: ll.d %[__ret], %[__dest] \n\t" + " move %[__tmp], %[__val] \n\t" + " sc.d %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) + : "memory" + ); + return __ret; + +} + +inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) { + return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest); +} + +///////////implementation of Atomic::cmpxchg*///////////////// +inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value) { + jint __prev, __cmp; + + __asm__ __volatile__ ( + "1: ll.w %[__prev], %[__dest] \n\t" + " bne %[__prev], %[__old], 2f \n\t" + " move %[__cmp], $r0 \n\t" + " move %[__cmp], %[__new] \n\t" + " sc.w %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + "2: \n\t" + " dbar 0x700 \n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + + return __prev; +} + +inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value) { + jlong __prev, __cmp; + + __asm__ __volatile__ ( + "1: ll.d %[__prev], %[__dest] \n\t" + " bne %[__prev], %[__old], 2f \n\t" + " move %[__cmp], $r0 \n\t" + " move %[__cmp], %[__new] \n\t" + " sc.d %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + "2: \n\t" + " dbar 0x700 \n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + return __prev; +} + +inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) { + intptr_t __prev, __cmp; + __asm__ __volatile__ ( + "1: ll.d %[__prev], %[__dest] \n\t" + " bne %[__prev], %[__old], 2f \n\t" + " move %[__cmp], $r0 \n\t" + " move %[__cmp], %[__new] \n\t" + " sc.d %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + "2: \n\t" + " dbar 0x700 \n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "ZC" (*(volatile intptr_t*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + + return __prev; +} + +inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) { + return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value); +} + +#endif // OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp new file mode 100644 index 00000000000..4e205c468eb --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP + +#include + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. +inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } +inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } +inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } + +#endif // OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp new file mode 100644 index 00000000000..7d6e11a9356 --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP + +static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + (void)memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_words(from, to, count); +} + +static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) { + pd_conjoint_bytes(from, to, count); +} + +static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) { + //assert(!UseCompressedOops, "foo!"); + assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); + copy_conjoint_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_bytes_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); +} + +static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); +} + +static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) { + //assert(!UseCompressedOops, "foo!"); + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); + pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); +} + +#endif // OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp new file mode 100644 index 00000000000..8ec3fa8239a --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, DontYieldALot, false); +define_pd_global(intx, ThreadStackSize, 2048); +define_pd_global(intx, VMThreadStackSize, 2048); + +define_pd_global(intx, CompilerThreadStackSize, 0); // 0 => use system default + +define_pd_global(uintx,JVMInvokeMethodSlack, 8192); + +// Used on 64 bit platforms for UseCompressedOops base address +define_pd_global(uintx,HeapBaseMinAddress, 2*G); + +#endif // OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp new file mode 100644 index 00000000000..3e050c8d094 --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP + +#include "runtime/atomic.hpp" +#include "runtime/orderAccess.hpp" +#include "runtime/os.hpp" +#include "vm_version_loongarch.hpp" + +#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \ + __asm__ __volatile__ ("nop" : : : "memory"); \ + else \ + __asm__ __volatile__ ("dbar %0" : :"K"(v) : "memory"); + +inline void OrderAccess::loadload() { inlasm_sync(0x15); } +inline void OrderAccess::storestore() { inlasm_sync(0x1a); } +inline void OrderAccess::loadstore() { inlasm_sync(0x16); } +inline void OrderAccess::storeload() { inlasm_sync(0x19); } + +inline void OrderAccess::acquire() { inlasm_sync(0x14); } +inline void OrderAccess::release() { inlasm_sync(0x12); } +inline void OrderAccess::fence() { inlasm_sync(0x10); } + +//implementation of load_acquire +inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { jbyte data = *p; acquire(); return data; } +inline jshort OrderAccess::load_acquire(volatile jshort* p) { jshort data = *p; acquire(); return data; } +inline jint OrderAccess::load_acquire(volatile jint* p) { jint data = *p; acquire(); return data; } +inline jlong OrderAccess::load_acquire(volatile jlong* p) { jlong tmp = *p; acquire(); return tmp; } +inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { jubyte data = *p; acquire(); return data; } +inline jushort OrderAccess::load_acquire(volatile jushort* p) { jushort data = *p; acquire(); return data; } +inline juint OrderAccess::load_acquire(volatile juint* p) { juint data = *p; acquire(); return data; } +inline julong OrderAccess::load_acquire(volatile julong* p) { julong tmp = *p; acquire(); return tmp; } +inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { jfloat data = *p; acquire(); return data; } +inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { jdouble tmp = *p; acquire(); return tmp; } + +//implementation of load_ptr_acquire +inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { intptr_t data = *p; acquire(); return data; } +inline void* OrderAccess::load_ptr_acquire(volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; } +inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; } + +//implementation of release_store +inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jshort* p, jshort v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jint* p, jint v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jlong* p, jlong v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jushort* p, jushort v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile juint* p, juint v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile julong* p, julong v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { release(); *p = v; } + +//implementation of release_store_ptr +inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release(); *p = v; } +inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { release(); *(void* volatile *)p = v; } + +//implementation of store_fence +inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); } +inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; fence(); } +inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); } + +//implementation of store_ptr_fence +inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); } +inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; fence(); } + +//implementation of release_store_fence +inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); } + +//implementaion of release_store_ptr_fence +inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); } +inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { release_store_ptr(p, v); fence(); } + +#undef inlasm_sync + +#endif // OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp new file mode 100644 index 00000000000..f2c3df84a1d --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp @@ -0,0 +1,750 @@ +/* + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// no precompiled headers +#include "asm/macroAssembler.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/systemDictionary.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "jvm_linux.h" +#include "memory/allocation.inline.hpp" +#include "mutex_linux.inline.hpp" +#include "os_share_linux.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm.h" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" +#include "runtime/extendedPC.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" +#include "utilities/debug.hpp" +#include "compiler/disassembler.hpp" +// put OS-includes here +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +#define REG_SP 3 +#define REG_FP 22 + +address os::current_stack_pointer() { + register void *sp __asm__ ("$r3"); + return (address) sp; +} + +char* os::non_memory_address_word() { + // Must never look like an address returned by reserve_memory, + // even in its subfields (as defined by the CPU immediate fields, + // if the CPU splits constants across multiple instructions). + + return (char*) -1; +} + +void os::initialize_thread(Thread* thr) { +// Nothing to do. +} + +address os::Linux::ucontext_get_pc(ucontext_t * uc) { + return (address)uc->uc_mcontext.__pc; +} + +intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; +} + +intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread +// is currently interrupted by SIGPROF. +// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal +// frames. Currently we don't do that on Linux, so it's the same as +// os::fetch_frame_from_context(). +ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, + ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { + + assert(thread != NULL, "just checking"); + assert(ret_sp != NULL, "just checking"); + assert(ret_fp != NULL, "just checking"); + + return os::fetch_frame_from_context(uc, ret_sp, ret_fp); +} + +ExtendedPC os::fetch_frame_from_context(void* ucVoid, + intptr_t** ret_sp, intptr_t** ret_fp) { + + ExtendedPC epc; + ucontext_t* uc = (ucontext_t*)ucVoid; + + if (uc != NULL) { + epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); + if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); + } else { + // construct empty ExtendedPC for return value checking + epc = ExtendedPC(NULL); + if (ret_sp) *ret_sp = (intptr_t *)NULL; + if (ret_fp) *ret_fp = (intptr_t *)NULL; + } + + return epc; +} + +frame os::fetch_frame_from_context(void* ucVoid) { + intptr_t* sp; + intptr_t* fp; + ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); + return frame(sp, fp, epc.pc()); +} + +// By default, gcc always save frame pointer on stack. It may get +// turned off by -fomit-frame-pointer, +frame os::get_sender_for_C_frame(frame* fr) { + return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); +} + +//intptr_t* _get_previous_fp() { +intptr_t* __attribute__((noinline)) os::get_previous_fp() { + return (intptr_t*)__builtin_frame_address(0); +} + +frame os::current_frame() { + intptr_t* fp = (intptr_t*)get_previous_fp(); + frame myframe((intptr_t*)os::current_stack_pointer(), + (intptr_t*)fp, + CAST_FROM_FN_PTR(address, os::current_frame)); + if (os::is_first_C_frame(&myframe)) { + // stack is not walkable + return frame(); + } else { + return os::get_sender_for_C_frame(&myframe); + } +} + +extern "C" JNIEXPORT int +JVM_handle_linux_signal(int sig, + siginfo_t* info, + void* ucVoid, + int abort_if_unrecognized) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", + info->si_signo, + info->si_code, + info->si_errno, + info->si_addr); +#endif + + ucontext_t* uc = (ucontext_t*) ucVoid; + + Thread* t = ThreadLocalStorage::get_thread_slow(); + + SignalHandlerMark shm(t); + + // Note: it's not uncommon that JNI code uses signal/sigset to install + // then restore certain signal handler (e.g. to temporarily block SIGPIPE, + // or have a SIGILL handler when detecting CPU type). When that happens, + // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To + // avoid unnecessary crash when libjsig is not preloaded, try handle signals + // that do not require siginfo/ucontext first. + + if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { + // allow chained handler to go first + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } else { + if (PrintMiscellaneous && (WizardMode || Verbose)) { + warning("Ignoring SIGPIPE - see bug 4229104"); + } + return true; + } + } + + JavaThread* thread = NULL; + VMThread* vmthread = NULL; + if (os::Linux::signal_handlers_are_installed) { + if (t != NULL ){ + if(t->is_Java_thread()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("this thread is a java thread"); +#endif + thread = (JavaThread*)t; + } + else if(t->is_VM_thread()){ +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("this thread is a VM thread\n"); +#endif + vmthread = (VMThread *)t; + } + } + } + + // decide if this trap can be handled by a stub + address stub = NULL; + address pc = NULL; + + pc = (address) os::Linux::ucontext_get_pc(uc); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("pc=%lx", pc); + os::print_context(tty, uc); +#endif + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { + pc = (address) os::Linux::ucontext_get_pc(uc); + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { + address addr = (address) info->si_addr; +#ifdef PRINT_SIGNAL_HANDLE + tty->print("handle all stack overflow variations: "); + /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", + addr, + thread->stack_base(), + thread->stack_base() - thread->stack_size()); + */ +#endif + + // check if fault address is within thread stack + if (addr < thread->stack_base() && + addr >= thread->stack_base() - thread->stack_size()) { + // stack overflow +#ifdef PRINT_SIGNAL_HANDLE + tty->print("stack exception check \n"); +#endif + if (thread->in_stack_yellow_zone(addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is in yellow zone\n"); +#endif + thread->disable_stack_yellow_zone(); + if (thread->thread_state() == _thread_in_Java) { + // Throw a stack overflow exception. Guard pages will be reenabled + // while unwinding the stack. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("this thread is in java\n"); +#endif + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); + } else { + // Thread was in the vm or native code. Return and try to finish. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("this thread is in vm or native codes and return\n"); +#endif + return 1; + } + } else if (thread->in_stack_red_zone(addr)) { + // Fatal red zone violation. Disable the guard pages and fall through + // to handle_unexpected_exception way down below. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is in red zone\n"); +#endif + thread->disable_stack_red_zone(); + tty->print_raw_cr("An irrecoverable stack overflow has occurred."); + + // This is a likely cause, but hard to verify. Let's just print + // it as a hint. + tty->print_raw_cr("Please check if any of your loaded .so files has " + "enabled executable stack (see man page execstack(8))"); + } else { + // Accessing stack address below sp may cause SEGV if current + // thread has MAP_GROWSDOWN stack. This should only happen when + // current thread was created by user code with MAP_GROWSDOWN flag + // and then attached to VM. See notes in os_linux.cpp. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is neither in yellow zone nor in the red one\n"); +#endif + if (thread->osthread()->expanding_stack() == 0) { + thread->osthread()->set_expanding_stack(); + if (os::Linux::manually_expand_stack(thread, addr)) { + thread->osthread()->clear_expanding_stack(); + return 1; + } + thread->osthread()->clear_expanding_stack(); + } else { + fatal("recursive segv. expanding stack."); + } + } + } + } // sig == SIGSEGV + + if (thread->thread_state() == _thread_in_Java) { + // Java thread running in Java code => find exception handler if any + // a fault inside compiled code, the interpreter, or a stub +#ifdef PRINT_SIGNAL_HANDLE + tty->print("java thread running in java code\n"); +#endif + + // Handle signal from NativeJump::patch_verified_entry(). + if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); +#endif + stub = SharedRuntime::get_handle_wrong_method_stub(); + } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); +#endif + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault + // here if the underlying file has been truncated. + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL; +#ifdef PRINT_SIGNAL_HANDLE + tty->print("cb = %lx, nm = %lx\n", cb, nm); +#endif + if (nm != NULL && nm->has_unsafe_access()) { + stub = StubRoutines::handler_for_unsafe_access(); + } + } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { + // HACK: si_code does not work on linux 2.2.12-20!!! + int op = pc[0] & 0x3f; + int op1 = pc[3] & 0x3f; + //FIXME, Must port to LA code!! + switch (op) { + case 0x1e: //ddiv + case 0x1f: //ddivu + case 0x1a: //div + case 0x1b: //divu + case 0x34: //trap + // In LA, div_by_zero exception can only be triggered by explicit 'trap'. + stub = SharedRuntime::continuation_for_implicit_exception(thread, + pc, + SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); + break; + default: + // TODO: handle more cases if we are using other x86 instructions + // that can generate SIGFPE signal on linux. + tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); + //fatal("please update this code."); + } + } else if (sig == SIGSEGV && + !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("continuation for implicit exception\n"); +#endif + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); +#endif + } + } else if (thread->thread_state() == _thread_in_vm && + sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("SIGBUS in vm thread \n"); +#endif + stub = StubRoutines::handler_for_unsafe_access(); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("jni fast get trap: "); +#endif + address addr = JNI_FastGetField::find_slowcase_pc(pc); + if (addr != (address)-1) { + stub = addr; + } +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("addr = %d, stub = %lx", addr, stub); +#endif + } + + // Check to see if we caught the safepoint code in the + // process of write protecting the memory serialization page. + // It write enables the page immediately after protecting it + // so we can just return to retry the write. + if ((sig == SIGSEGV) && + os::is_memory_serialize_page(thread, (address) info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("write protecting the memory serialiazation page\n"); +#endif + // Block current thread until the memory serialize page permission restored. + os::block_on_serialize_page_trap(); + return true; + } + } + + if (stub != NULL) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("resolved stub=%lx\n",stub); +#endif + // save all thread context in case we need to restore it + if (thread != NULL) thread->set_saved_exception_pc(pc); + + uc->uc_mcontext.__pc = (greg_t)stub; + return true; + } + + // signal-chaining + if (os::Linux::chained_handler(sig, info, ucVoid)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("signal chaining\n"); +#endif + return true; + } + + if (!abort_if_unrecognized) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("abort becauce of unrecognized\n"); +#endif + // caller wants another chance, so give it to him + return false; + } + + if (pc == NULL && uc != NULL) { + pc = os::Linux::ucontext_get_pc(uc); + } + + // unmask current signal + sigset_t newset; + sigemptyset(&newset); + sigaddset(&newset, sig); + sigprocmask(SIG_UNBLOCK, &newset, NULL); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("VMError in signal handler\n"); +#endif + VMError err(t, sig, pc, info, ucVoid); + err.report_and_die(); + + ShouldNotReachHere(); + return true; // Mute compiler +} + +void os::Linux::init_thread_fpu_state(void) { +} + +int os::Linux::get_fpu_control_word(void) { + return 0; // mute compiler +} + +void os::Linux::set_fpu_control_word(int fpu_control) { +} + +bool os::is_allocatable(size_t bytes) { + + if (bytes < 2 * G) { + return true; + } + + char* addr = reserve_memory(bytes, NULL); + + if (addr != NULL) { + release_memory(addr, bytes); + } + + return addr != NULL; +} + +//////////////////////////////////////////////////////////////////////////////// +// thread stack + +size_t os::Linux::min_stack_allowed = 96 * K; + +// Test if pthread library can support variable thread stack size. LinuxThreads +// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads +// in floating stack mode and NPTL support variable stack size. +bool os::Linux::supports_variable_stack_size() { + if (os::Linux::is_NPTL()) { + // NPTL, yes + return true; + + } else { + // Note: We can't control default stack size when creating a thread. + // If we use non-default stack size (pthread_attr_setstacksize), both + // floating stack and non-floating stack LinuxThreads will return the + // same value. This makes it impossible to implement this function by + // detecting thread stack size directly. + // + // An alternative approach is to check %gs. Fixed-stack LinuxThreads + // do not use %gs, so its value is 0. Floating-stack LinuxThreads use + // %gs (either as LDT selector or GDT selector, depending on kernel) + // to access thread specific data. + // + // Note that %gs is a reserved glibc register since early 2001, so + // applications are not allowed to change its value (Ulrich Drepper from + // Redhat confirmed that all known offenders have been modified to use + // either %fs or TSD). In the worst case scenario, when VM is embedded in + // a native application that plays with %gs, we might see non-zero %gs + // even LinuxThreads is running in fixed stack mode. As the result, we'll + // return true and skip _thread_safety_check(), so we may not be able to + // detect stack-heap collisions. But otherwise it's harmless. + // + return false; + } +} + +// return default stack size for thr_type +size_t os::Linux::default_stack_size(os::ThreadType thr_type) { + // default stack size (compiler thread needs larger stack) + size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); + return s; +} + +size_t os::Linux::default_guard_size(os::ThreadType thr_type) { + // Creating guard page is very expensive. Java thread has HotSpot + // guard page, only enable glibc guard page for non-Java threads. + return (thr_type == java_thread ? 0 : page_size()); +} + +// Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ JavaThread created by VM does not have glibc +// | glibc guard page | - guard, attached Java thread usually has +// | |/ 1 page glibc guard. +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | HotSpot Guard Pages | - red and yellow pages +// | |/ +// +------------------------+ JavaThread::stack_yellow_zone_base() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// Non-Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ +// | glibc guard page | - usually 1 page +// | |/ +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from +// pthread_attr_getstack() + +static void current_stack_region(address * bottom, size_t * size) { + if (os::is_primordial_thread()) { + // primordial thread needs special handling because pthread_getattr_np() + // may return bogus value. + *bottom = os::Linux::initial_thread_stack_bottom(); + *size = os::Linux::initial_thread_stack_size(); + } else { + pthread_attr_t attr; + + int rslt = pthread_getattr_np(pthread_self(), &attr); + + // JVM needs to know exact stack location, abort if it fails + if (rslt != 0) { + if (rslt == ENOMEM) { + vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np"); + } else { + fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt)); + } + } + + if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) { + fatal("Can not locate current stack attributes!"); + } + + pthread_attr_destroy(&attr); + + } + assert(os::current_stack_pointer() >= *bottom && + os::current_stack_pointer() < *bottom + *size, "just checking"); +} + +address os::current_stack_base() { + address bottom; + size_t size; + current_stack_region(&bottom, &size); + return (bottom + size); +} + +size_t os::current_stack_size() { + // stack size includes normal stack and HotSpot guard pages + address bottom; + size_t size; + current_stack_region(&bottom, &size); + return size; +} + +///////////////////////////////////////////////////////////////////////////// +// helper functions for fatal error handler +void os::print_register_info(outputStream *st, void *context) { + + ucontext_t *uc = (ucontext_t*)context; + + st->print_cr("Register to memory mapping:"); + st->cr(); + // this is horrendously verbose but the layout of the registers in the + // // context does not match how we defined our abstract Register set, so + // // we can't just iterate through the gregs area + // + // // this is only for the "general purpose" registers + st->print("ZERO=" ); print_location(st, uc->uc_mcontext.__gregs[0]); + st->print("RA=" ); print_location(st, uc->uc_mcontext.__gregs[1]); + st->print("TP=" ); print_location(st, uc->uc_mcontext.__gregs[2]); + st->print("SP=" ); print_location(st, uc->uc_mcontext.__gregs[3]); + st->cr(); + st->print("A0=" ); print_location(st, uc->uc_mcontext.__gregs[4]); + st->print("A1=" ); print_location(st, uc->uc_mcontext.__gregs[5]); + st->print("A2=" ); print_location(st, uc->uc_mcontext.__gregs[6]); + st->print("A3=" ); print_location(st, uc->uc_mcontext.__gregs[7]); + st->cr(); + st->print("A4=" ); print_location(st, uc->uc_mcontext.__gregs[8]); + st->print("A5=" ); print_location(st, uc->uc_mcontext.__gregs[9]); + st->print("A6=" ); print_location(st, uc->uc_mcontext.__gregs[10]); + st->print("A7=" ); print_location(st, uc->uc_mcontext.__gregs[11]); + st->cr(); + st->print("T0=" ); print_location(st, uc->uc_mcontext.__gregs[12]); + st->print("T1=" ); print_location(st, uc->uc_mcontext.__gregs[13]); + st->print("T2=" ); print_location(st, uc->uc_mcontext.__gregs[14]); + st->print("T3=" ); print_location(st, uc->uc_mcontext.__gregs[15]); + st->cr(); + st->print("T4=" ); print_location(st, uc->uc_mcontext.__gregs[16]); + st->print("T5=" ); print_location(st, uc->uc_mcontext.__gregs[17]); + st->print("T6=" ); print_location(st, uc->uc_mcontext.__gregs[18]); + st->print("T7=" ); print_location(st, uc->uc_mcontext.__gregs[19]); + st->cr(); + st->print("T8=" ); print_location(st, uc->uc_mcontext.__gregs[20]); + st->print("RX=" ); print_location(st, uc->uc_mcontext.__gregs[21]); + st->print("FP=" ); print_location(st, uc->uc_mcontext.__gregs[22]); + st->print("S0=" ); print_location(st, uc->uc_mcontext.__gregs[23]); + st->cr(); + st->print("S1=" ); print_location(st, uc->uc_mcontext.__gregs[24]); + st->print("S2=" ); print_location(st, uc->uc_mcontext.__gregs[25]); + st->print("S3=" ); print_location(st, uc->uc_mcontext.__gregs[26]); + st->print("S4=" ); print_location(st, uc->uc_mcontext.__gregs[27]); + st->cr(); + st->print("S5=" ); print_location(st, uc->uc_mcontext.__gregs[28]); + st->print("S6=" ); print_location(st, uc->uc_mcontext.__gregs[29]); + st->print("S7=" ); print_location(st, uc->uc_mcontext.__gregs[30]); + st->print("S8=" ); print_location(st, uc->uc_mcontext.__gregs[31]); + st->cr(); + +} +void os::print_context(outputStream *st, void *context) { + + ucontext_t *uc = (ucontext_t*)context; + st->print_cr("Registers:"); + st->print( "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]); + st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]); + st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]); + st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]); + st->cr(); + st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]); + st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]); + st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]); + st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]); + st->cr(); + st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]); + st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]); + st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]); + st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]); + st->cr(); + st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]); + st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]); + st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]); + st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]); + st->cr(); + st->print( "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]); + st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]); + st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]); + st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]); + st->cr(); + st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]); + st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]); + st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]); + st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]); + st->cr(); + st->print( "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]); + st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]); + st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]); + st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]); + st->cr(); + st->print( "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]); + st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]); + st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]); + st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]); + st->cr(); + st->cr(); + + intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); + st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); + //print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t)); + print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t)); + st->cr(); + + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::Linux::ucontext_get_pc(uc); + st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); + print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); + Disassembler::decode(pc - 80, pc + 80, st); +} + +void os::setup_fpu() { + // no use for LA +} + +#ifndef PRODUCT +void os::verify_stack_alignment() { + assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); +} +#endif + +bool os::is_ActiveCoresMP() { + return UseActiveCoresMP && _initial_active_processor_count == 1; +} diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp new file mode 100644 index 00000000000..a7321ae0253 --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP + + static void setup_fpu(); + static bool is_allocatable(size_t bytes); + static intptr_t *get_previous_fp(); + + // Used to register dynamic code cache area with the OS + // Note: Currently only used in 64 bit Windows implementations + static bool register_code_area(char *low, char *high) { return true; } + + static bool is_ActiveCoresMP(); + +#endif // OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp new file mode 100644 index 00000000000..a1cedcd8cf7 --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP + + +inline void Prefetch::read (void *loc, intx interval) { +// According to previous and present SPECjbb2015 score, +// comment prefetch is better than if (interval >= 0) prefetch branch. +// So choose comment prefetch as the base line. +#if 0 + __asm__ __volatile__ ( + " preld 0, %[__loc] \n" + : + : [__loc] "m"( *((address)loc + interval) ) + : "memory" + ); +#endif +} + +inline void Prefetch::write(void *loc, intx interval) { +// Ditto +#if 0 + __asm__ __volatile__ ( + " preld 8, %[__loc] \n" + : + : [__loc] "m"( *((address)loc + interval) ) + : "memory" + ); +#endif +} + +#endif // OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp new file mode 100644 index 00000000000..be28a562a1e --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/threadLocalStorage.hpp" + +// Map stack pointer (%esp) to thread pointer for faster TLS access +// +// Here we use a flat table for better performance. Getting current thread +// is down to one memory access (read _sp_map[%esp>>12]) in generated code +// and two in runtime code (-fPIC code needs an extra load for _sp_map). +// +// This code assumes stack page is not shared by different threads. It works +// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters). +// +// Notice that _sp_map is allocated in the bss segment, which is ZFOD +// (zero-fill-on-demand). While it reserves 4M address space upfront, +// actual memory pages are committed on demand. +// +// If an application creates and destroys a lot of threads, usually the +// stack space freed by a thread will soon get reused by new thread +// (this is especially true in NPTL or LinuxThreads in fixed-stack mode). +// No memory page in _sp_map is wasted. +// +// However, it's still possible that we might end up populating & +// committing a large fraction of the 4M table over time, but the actual +// amount of live data in the table could be quite small. The max wastage +// is less than 4M bytes. If it becomes an issue, we could use madvise() +// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map. +// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the +// physical memory page (i.e. similar to MADV_FREE on Solaris). + +#ifdef MINIMIZE_RAM_USAGE +Thread* ThreadLocalStorage::_sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)]; +#endif // MINIMIZE_RAM_USAGE + +void ThreadLocalStorage::generate_code_for_get_thread() { + // nothing we can do here for user-level thread +} + +void ThreadLocalStorage::pd_init() { +#ifdef MINIMIZE_RAM_USAGE + assert(align_size_down(os::vm_page_size(), PAGE_SIZE) == os::vm_page_size(), + "page size must be multiple of PAGE_SIZE"); +#endif // MINIMIZE_RAM_USAGE +} + +void ThreadLocalStorage::pd_set_thread(Thread* thread) { + os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread); +#ifdef MINIMIZE_RAM_USAGE + address stack_top = os::current_stack_base(); + size_t stack_size = os::current_stack_size(); + + for (address p = stack_top - stack_size; p < stack_top; p += PAGE_SIZE) { + int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); + assert(thread == NULL || _sp_map[index] == NULL || thread == _sp_map[index], + "thread exited without detaching from VM??"); + _sp_map[index] = thread; + } +#endif // MINIMIZE_RAM_USAGE +} diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp new file mode 100644 index 00000000000..4fab788a75d --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP + +#ifdef MINIMIZE_RAM_USAGE + // Processor dependent parts of ThreadLocalStorage + //only the low 2G space for user program in Linux + + #define SP_BITLENGTH 34 + #define PAGE_SHIFT 14 + #define PAGE_SIZE (1UL << PAGE_SHIFT) + + static Thread* _sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)]; + static int _sp_map_low; + static int _sp_map_high; +#endif // MINIMIZE_RAM_USAGE + +public: +#ifdef MINIMIZE_RAM_USAGE + static Thread** sp_map_addr() { return _sp_map; } +#endif // MINIMIZE_RAM_USAGE + + static Thread* thread() { +#ifdef MINIMIZE_RAM_USAGE + /* Thread::thread() can also be optimized in the same way as __get_thread() */ + //return (Thread*) os::thread_local_storage_at(thread_index()); + uintptr_t sp; + uintptr_t mask = (1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1; + + __asm__ __volatile__ ("addi.d %0, $r29, 0 " : "=r" (sp)); + + return _sp_map[(sp >> PAGE_SHIFT) & mask]; +#else + return (Thread*) os::thread_local_storage_at(thread_index()); +#endif // MINIMIZE_RAM_USAGE + } +#endif // OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp new file mode 100644 index 00000000000..44f666d61f3 --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/sharedRuntime.hpp" + +void JavaThread::pd_initialize() +{ + _anchor.clear(); +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread is +// currently interrupted by SIGPROF +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, + void* ucontext, bool isInJava) { + + assert(Thread::current() == this, "caller must be current thread"); + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { + assert(this->is_Java_thread(), "must be JavaThread"); + JavaThread* jt = (JavaThread *)this; + + // If we have a last_Java_frame, then we should use it even if + // isInJava == true. It should be more reliable than ucontext info. + if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { + *fr_addr = jt->pd_last_frame(); + return true; + } + + // At this point, we don't have a last_Java_frame, so + // we try to glean some information out of the ucontext + // if we were running Java code when SIGPROF came in. + if (isInJava) { + ucontext_t* uc = (ucontext_t*) ucontext; + + intptr_t* ret_fp; + intptr_t* ret_sp; + ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, + &ret_sp, &ret_fp); + if (addr.pc() == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + + frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(jt)) { +#ifdef COMPILER2 + // C2 uses ebp as a general register see if NULL fp helps + frame ret_frame2(ret_sp, NULL, addr.pc()); + if (!ret_frame2.safe_for_sender(jt)) { + // nothing else to try if the frame isn't good + return false; + } + ret_frame = ret_frame2; +#else + // nothing else to try if the frame isn't good + return false; +#endif /* COMPILER2 */ + } + *fr_addr = ret_frame; + return true; + } + + // nothing else to try + return false; +} + +void JavaThread::cache_global_variables() { } + diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp new file mode 100644 index 00000000000..d6dd2521f42 --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP + + private: + void pd_initialize(); + + frame pd_last_frame() { + assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); + if (_anchor.last_Java_pc() != NULL) { + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); + } else { + // This will pick up pc from sp + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); + } + } + + + public: + // Mutators are highly dangerous.... + intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } + void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } + + void set_base_of_stack_pointer(intptr_t* base_sp) { + } + + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + + intptr_t* base_of_stack_pointer() { + return NULL; + } + void record_base_of_stack_pointer() { + } + + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); +public: + + // These routines are only used on cpu architectures that + // have separate register stacks (Itanium). + static bool register_stack_overflow() { return false; } + static void enable_register_stack_guard() {} + static void disable_register_stack_guard() {} + +#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp new file mode 100644 index 00000000000..0097cadcb7a --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP +#define OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, pid_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + + +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(pid_t) \ + declare_unsigned_integer_type(pthread_t) + +#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp new file mode 100644 index 00000000000..80a1538de9f --- /dev/null +++ b/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/os.hpp" +#include "vm_version_loongarch.hpp" + diff --git a/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp new file mode 100644 index 00000000000..4ba53d9341d --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "runtime/os.hpp" +#include "runtime/threadLocalStorage.hpp" + +#define A0 RA0 +#define A1 RA1 +#define A2 RA2 +#define A3 RA3 +#define A4 RA4 +#define A5 RA5 +#define A6 RA6 +#define A7 RA7 +#define T0 RT0 +#define T1 RT1 +#define T2 RT2 +#define T3 RT3 +#define T8 RT8 +#define T9 RT9 + +void MacroAssembler::get_thread(Register thread) { +#ifdef MINIMIZE_RAM_USAGE +// +// In MIPS64, we don't use full 64-bit address space. +// Only a small range is actually used. +// +// Example: +// $ cat /proc/13352/maps +// 120000000-120010000 r-xp 00000000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java +// 12001c000-120020000 rw-p 0000c000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java +// 120020000-1208dc000 rwxp 00000000 00:00 0 [heap] +// 555d574000-555d598000 r-xp 00000000 08:01 2073768 /lib/ld-2.12.so +// 555d598000-555d59c000 rw-p 00000000 00:00 0 +// ...... +// 558b1f8000-558b23c000 rwxp 00000000 00:00 0 +// 558b23c000-558b248000 ---p 00000000 00:00 0 +// 558b248000-558b28c000 rwxp 00000000 00:00 0 +// ffff914000-ffff94c000 rwxp 00000000 00:00 0 [stack] +// ffffffc000-10000000000 r-xp 00000000 00:00 0 [vdso] +// +// All stacks are positioned at 0x55________. +// Therefore, we can utilize the same algorithm used in 32-bit. + // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); + // Thread* thread = _sp_map[index]; + Register tmp; + + if (thread == AT) + tmp = T9; + else + tmp = AT; + + move(thread, SP); + shr(thread, PAGE_SHIFT); + + push(tmp); + li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); + andr(thread, thread, tmp); + shl(thread, Address::times_ptr); // sizeof(Thread *) + li48(tmp, (long)ThreadLocalStorage::sp_map_addr()); + addu(tmp, tmp, thread); + ld_ptr(thread, tmp, 0); + pop(tmp); +#else + if (thread != V0) { + push(V0); + } + pushad_except_v0(); + + move(A0, ThreadLocalStorage::thread_index()); + push(S5); + move(S5, SP); + move(AT, -StackAlignmentInBytes); + andr(SP, SP, AT); + call(CAST_FROM_FN_PTR(address, pthread_getspecific)); + delayed()->nop(); + move(SP, S5); + pop(S5); + + popad_except_v0(); + if (thread != V0) { + move(thread, V0); + pop(V0); + } +#endif // MINIMIZE_RAM_USAGE +} diff --git a/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp new file mode 100644 index 00000000000..1c7ad605e95 --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp @@ -0,0 +1,258 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP +#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP + +#include "orderAccess_linux_mips.inline.hpp" +#include "runtime/atomic.hpp" +#include "runtime/os.hpp" +#include "vm_version_mips.hpp" + +// Implementation of class atomic + +inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } +inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } +inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } +inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; } + +inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; } +inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; } +inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; } +inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; } +inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; } +inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void**)dest = store_value; } + +inline jlong Atomic::load (volatile jlong* src) { return *src; } + +///////////implementation of Atomic::add*///////////////// +inline jint Atomic::add (jint add_value, volatile jint* dest) { + jint __ret, __tmp; + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noreorder\n\t" + + "1: sync \n\t" + " ll %[__ret], %[__dest] \n\t" + " addu %[__tmp], %[__val], %[__ret] \n\t" + " sc %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + " nop \n\t" + + " .set pop\n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (add_value) + : "memory" + ); + + return add_value + __ret; +} + +inline intptr_t Atomic::add_ptr (intptr_t add_value, volatile intptr_t* dest) { + jint __ret, __tmp; + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noreorder\n\t" + + "1: sync \n\t" + " lld %[__ret], %[__dest] \n\t" + " daddu %[__tmp], %[__val], %[__ret] \n\t" + " scd %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + " nop \n\t" + + " .set pop\n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (add_value) + : "memory" + ); + + return add_value + __ret; +} + +inline void* Atomic::add_ptr (intptr_t add_value, volatile void* dest) { + return (void*)add_ptr((intptr_t)add_value, (volatile intptr_t*)dest); +} + +///////////implementation of Atomic::inc*///////////////// +inline void Atomic::inc (volatile jint* dest) { (void)add(1, dest); } +inline void Atomic::inc_ptr (volatile intptr_t* dest) { (void)add_ptr(1, dest); } +inline void Atomic::inc_ptr (volatile void* dest) { (void)inc_ptr((volatile intptr_t*)dest); } + +///////////implementation of Atomic::dec*///////////////// +inline void Atomic::dec (volatile jint* dest) { (void)add(-1, dest); } +inline void Atomic::dec_ptr (volatile intptr_t* dest) { (void)add_ptr(-1, dest); } +inline void Atomic::dec_ptr (volatile void* dest) { (void)dec_ptr((volatile intptr_t*)dest); } + + +///////////implementation of Atomic::xchg*///////////////// +inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) { + jint __ret, __tmp; + + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noreorder\n\t" + + "1: sync\n\t" + " ll %[__ret], %[__dest] \n\t" + " move %[__tmp], %[__val] \n\t" + " sc %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + " nop \n\t" + + " .set pop\n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value) + : "memory" + ); + + return __ret; +} + +inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) { + intptr_t __ret, __tmp; + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noreorder\n\t" + + "1: sync\n\t" + " lld %[__ret], %[__dest] \n\t" + " move %[__tmp], %[__val] \n\t" + " scd %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" + " nop \n\t" + + " .set pop\n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) + : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) + : "memory" + ); + return __ret; +} + +inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) { + return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest); +} + +///////////implementation of Atomic::cmpxchg*///////////////// +inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value) { + jint __prev, __cmp; + + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noreorder\n\t" + + "1:sync \n\t" + " ll %[__prev], %[__dest] \n\t" + " bne %[__prev], %[__old], 2f \n\t" + " move %[__cmp], $0 \n\t" + " move %[__cmp], %[__new] \n\t" + " sc %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + " nop \n\t" + "2: \n\t" + " sync \n\t" + + " .set pop\n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + + return __prev; +} + +inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value) { + jlong __prev, __cmp; + + __asm__ __volatile__ ( + " .set push\n\t" + " .set mips64\n\t" + " .set noreorder\n\t" + + "1:sync \n\t" + " lld %[__prev], %[__dest] \n\t" + " bne %[__prev], %[__old], 2f \n\t" + " move %[__cmp], $0 \n\t" + " move %[__cmp], %[__new] \n\t" + " scd %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + " nop \n\t" + "2: \n\t" + " sync \n\t" + + " .set pop\n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + return __prev; +} + +inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) { + intptr_t __prev, __cmp; + __asm__ __volatile__ ( + " .set push \n\t" + " .set mips64\n\t\t" + " .set noreorder\n\t" + + "1:sync \n\t" + " lld %[__prev], %[__dest] \n\t" + " bne %[__prev], %[__old], 2f \n\t" + " move %[__cmp], $0 \n\t" + " move %[__cmp], %[__new] \n\t" + " scd %[__cmp], %[__dest] \n\t" + " beqz %[__cmp], 1b \n\t" + " nop \n\t" + "2: \n\t" + " sync \n\t" + " .set pop \n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) + : [__dest] "m" (*(volatile intptr_t*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + + return __prev; +} + +inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) { + return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value); +} + +#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp new file mode 100644 index 00000000000..5b5cd10aa55 --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP +#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP + +#include + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. +inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } +inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } +inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } + +#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp new file mode 100644 index 00000000000..73ac34501bc --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP +#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP + +static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + (void)memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_words(from, to, count); +} + +static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) { + pd_conjoint_bytes(from, to, count); +} + +static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) { + copy_conjoint_atomic(from, to, count); +} + +static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) { + //assert(!UseCompressedOops, "foo!"); + assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); + copy_conjoint_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_bytes_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); +} + +static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); +} + +static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) { + //assert(!UseCompressedOops, "foo!"); + assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); + pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); +} + +#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp new file mode 100644 index 00000000000..f1599ac5f17 --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, DontYieldALot, false); +#ifdef MIPS64 +define_pd_global(intx, ThreadStackSize, 1024); // 0 => use system default +define_pd_global(intx, VMThreadStackSize, 1024); +#else +// ThreadStackSize 320 allows a couple of test cases to run while +// keeping the number of threads that can be created high. System +// default ThreadStackSize appears to be 512 which is too big. +define_pd_global(intx, ThreadStackSize, 320); +define_pd_global(intx, VMThreadStackSize, 512); +#endif // MIPS64 + +define_pd_global(intx, CompilerThreadStackSize, 0); + +define_pd_global(uintx,JVMInvokeMethodSlack, 8192); + +// Used on 64 bit platforms for UseCompressedOops base address +define_pd_global(uintx,HeapBaseMinAddress, 2*G); + +#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad new file mode 100644 index 00000000000..5e38996ffa3 --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad @@ -0,0 +1,153 @@ +// +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// mips32/godson2 Linux Architecture Description File + +//----------OS-DEPENDENT ENCODING BLOCK---------------------------------------- +// This block specifies the encoding classes used by the compiler to +// output byte streams. Encoding classes generate functions which are +// called by Machine Instruction Nodes in order to generate the bit +// encoding of the instruction. Operands specify their base encoding +// interface with the interface keyword. There are currently +// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & +// COND_INTER. REG_INTER causes an operand to generate a function +// which returns its register number when queried. CONST_INTER causes +// an operand to generate a function which returns the value of the +// constant when queried. MEMORY_INTER causes an operand to generate +// four functions which return the Base Register, the Index Register, +// the Scale Value, and the Offset Value of the operand when queried. +// COND_INTER causes an operand to generate six functions which return +// the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional +// instruction. Instructions specify two basic values for encoding. +// They use the ins_encode keyword to specify their encoding class +// (which must be one of the class names specified in the encoding +// block), and they use the opcode keyword to specify, in order, their +// primary, secondary, and tertiary opcode. Only the opcode sections +// which a particular instruction needs for encoding need to be +// specified. +encode %{ + // Build emit functions for each basic byte or larger field in the intel + // encoding scheme (opcode, rm, sib, immediate), and call them from C++ + // code in the enc_class source block. Emit functions will live in the + // main source block for now. In future, we can generalize this by + // adding a syntax that specifies the sizes of fields in an order, + // so that the adlc can build the emit functions automagically + + enc_class linux_breakpoint + %{ + MacroAssembler* masm = new MacroAssembler(&cbuf); + masm->call(CAST_FROM_FN_PTR(address, os::breakpoint), relocInfo::runtime_call_type); + %} + + enc_class call_epilog + %{ + if (VerifyStackAtCalls) { + // Check that stack depth is unchanged: find majik cookie on stack + int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP,-2)); + if(framesize >= 128) { + emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood + emit_d8(cbuf,0xBC); + emit_d8(cbuf,0x24); + emit_d32(cbuf,framesize); // Find majik cookie from ESP + emit_d32(cbuf, 0xbadb100d); + } + else { + emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood + emit_d8(cbuf,0x7C); + emit_d8(cbuf,0x24); + emit_d8(cbuf,framesize); // Find majik cookie from ESP + emit_d32(cbuf, 0xbadb100d); + } + // jmp EQ around INT3 + // QQQ TODO + const int jump_around = 5; // size of call to breakpoint, 1 for CC + emit_opcode(cbuf, 0x74); + emit_d8(cbuf, jump_around); + // QQQ temporary + emit_break(cbuf); + // Die if stack mismatch + // emit_opcode(cbuf,0xCC); + } + %} + +%} + +// INSTRUCTIONS -- Platform dependent + +//----------OS and Locking Instructions---------------------------------------- + +// This name is KNOWN by the ADLC and cannot be changed. +// The ADLC forces a 'TypeRawPtr::BOTTOM' output type +// for this guy. +instruct tlsLoadP(eAXRegP dst, eFlagsReg cr) %{ +%{ + match(Set dst (ThreadLocal)); + effect(DEF dst, KILL cr); + + format %{ "MOV EAX, Thread::current()" %} + ins_encode( linux_tlsencode(dst) ); + ins_pipe( ialu_reg_fat ); +%} + +// Die now +instruct ShouldNotReachHere() +%{ + match(Halt); + + // Use the following format syntax + format %{ "int3\t# ShouldNotReachHere" %} + // QQQ TODO for now call breakpoint + // opcode(0xCC); + // ins_encode(Opc); + ins_encode(linux_breakpoint); + ins_pipe(pipe_slow); +%} + + +// Platform dependent source + +source +%{ +// emit an interrupt that is caught by the debugger +void emit_break(CodeBuffer& cbuf) { + // Debugger doesn't really catch this but best we can do so far QQQ +#define __ masm. + __ lui(T9, Assembler::split_high((int)os::breakpoint)); + __ addiu(T9, T9, Assembler::split_low((int)os::breakpoint)); + __ jalr(T9); + __ delayed()->nop(); +} + +void MachBreakpointNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + emit_break(cbuf); +} + +uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { + //return 5; + return 16; +} + +%} diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s new file mode 100644 index 00000000000..f87fbf265d7 --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015, 2017, Loongson Technology. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + + diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad b/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad new file mode 100644 index 00000000000..ca4d094738b --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad @@ -0,0 +1,50 @@ +// +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// AMD64 Linux Architecture Description File + +//----------OS-DEPENDENT ENCODING BLOCK---------------------------------------- +// This block specifies the encoding classes used by the compiler to +// output byte streams. Encoding classes generate functions which are +// called by Machine Instruction Nodes in order to generate the bit +// encoding of the instruction. Operands specify their base encoding +// interface with the interface keyword. There are currently +// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, & +// COND_INTER. REG_INTER causes an operand to generate a function +// which returns its register number when queried. CONST_INTER causes +// an operand to generate a function which returns the value of the +// constant when queried. MEMORY_INTER causes an operand to generate +// four functions which return the Base Register, the Index Register, +// the Scale Value, and the Offset Value of the operand when queried. +// COND_INTER causes an operand to generate six functions which return +// the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional +// instruction. Instructions specify two basic values for encoding. +// They use the ins_encode keyword to specify their encoding class +// (which must be one of the class names specified in the encoding +// block), and they use the opcode keyword to specify, in order, their +// primary, secondary, and tertiary opcode. Only the opcode sections +// which a particular instruction needs for encoding need to be +// specified. diff --git a/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp new file mode 100644 index 00000000000..c9bc169aa5c --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP +#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP + +#include "runtime/atomic.hpp" +#include "runtime/orderAccess.hpp" +#include "runtime/os.hpp" +#include "vm_version_mips.hpp" + +#define inlasm_sync() if (os::is_ActiveCoresMP()) \ + __asm__ __volatile__ ("nop" : : : "memory"); \ + else \ + __asm__ __volatile__ ("sync" : : : "memory"); + +inline void OrderAccess::loadload() { inlasm_sync(); } +inline void OrderAccess::storestore() { inlasm_sync(); } +inline void OrderAccess::loadstore() { inlasm_sync(); } +inline void OrderAccess::storeload() { inlasm_sync(); } + +inline void OrderAccess::acquire() { inlasm_sync(); } +inline void OrderAccess::release() { inlasm_sync(); } +inline void OrderAccess::fence() { inlasm_sync(); } + +//implementation of load_acquire +inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { jbyte data = *p; acquire(); return data; } +inline jshort OrderAccess::load_acquire(volatile jshort* p) { jshort data = *p; acquire(); return data; } +inline jint OrderAccess::load_acquire(volatile jint* p) { jint data = *p; acquire(); return data; } +inline jlong OrderAccess::load_acquire(volatile jlong* p) { jlong tmp = *p; acquire(); return tmp; } +inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { jubyte data = *p; acquire(); return data; } +inline jushort OrderAccess::load_acquire(volatile jushort* p) { jushort data = *p; acquire(); return data; } +inline juint OrderAccess::load_acquire(volatile juint* p) { juint data = *p; acquire(); return data; } +inline julong OrderAccess::load_acquire(volatile julong* p) { julong tmp = *p; acquire(); return tmp; } +inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { jfloat data = *p; acquire(); return data; } +inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { jdouble tmp = *p; acquire(); return tmp; } + +//implementation of load_ptr_acquire +inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { intptr_t data = *p; acquire(); return data; } +inline void* OrderAccess::load_ptr_acquire(volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; } +inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; } + +//implementation of release_store +inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jshort* p, jshort v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jint* p, jint v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jlong* p, jlong v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jushort* p, jushort v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile juint* p, juint v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile julong* p, julong v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { release(); *p = v; } +inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { release(); *p = v; } + +//implementation of release_store_ptr +inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release(); *p = v; } +inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { release(); *(void* volatile *)p = v; } + +//implementation of store_fence +inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); } +inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; fence(); } +inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } +inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); } + +//implementation of store_ptr_fence +inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); } +inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; fence(); } + +//implementation of release_store_fence +inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { release_store(p, v); fence(); } +inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); } + +//implementaion of release_store_ptr_fence +inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); } +inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { release_store_ptr(p, v); fence(); } + +#undef inlasm_sync + +#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp new file mode 100644 index 00000000000..43487dab98a --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp @@ -0,0 +1,1015 @@ +/* + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// no precompiled headers +#include "asm/macroAssembler.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/systemDictionary.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "jvm_linux.h" +#include "memory/allocation.inline.hpp" +#include "mutex_linux.inline.hpp" +#include "os_share_linux.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm.h" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" +#include "runtime/extendedPC.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" +#include "utilities/debug.hpp" +#include "compiler/disassembler.hpp" +// put OS-includes here +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +#define REG_SP 29 +#define REG_FP 30 + +address os::current_stack_pointer() { + register void *sp __asm__ ("$29"); + return (address) sp; +} + +char* os::non_memory_address_word() { + // Must never look like an address returned by reserve_memory, + // even in its subfields (as defined by the CPU immediate fields, + // if the CPU splits constants across multiple instructions). + + return (char*) -1; +} + +void os::initialize_thread(Thread* thr) { +// Nothing to do. +} + +address os::Linux::ucontext_get_pc(ucontext_t * uc) { + //return (address)uc->uc_mcontext.gregs[REG_PC]; + return (address)uc->uc_mcontext.pc; +} + +intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.gregs[REG_SP]; +} + +intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.gregs[REG_FP]; +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread +// is currently interrupted by SIGPROF. +// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal +// frames. Currently we don't do that on Linux, so it's the same as +// os::fetch_frame_from_context(). +ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, + ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { + + assert(thread != NULL, "just checking"); + assert(ret_sp != NULL, "just checking"); + assert(ret_fp != NULL, "just checking"); + + return os::fetch_frame_from_context(uc, ret_sp, ret_fp); +} + +ExtendedPC os::fetch_frame_from_context(void* ucVoid, + intptr_t** ret_sp, intptr_t** ret_fp) { + + ExtendedPC epc; + ucontext_t* uc = (ucontext_t*)ucVoid; + + if (uc != NULL) { + epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); + if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); + } else { + // construct empty ExtendedPC for return value checking + epc = ExtendedPC(NULL); + if (ret_sp) *ret_sp = (intptr_t *)NULL; + if (ret_fp) *ret_fp = (intptr_t *)NULL; + } + + return epc; +} + +frame os::fetch_frame_from_context(void* ucVoid) { + intptr_t* sp; + intptr_t* fp; + ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); + return frame(sp, fp, epc.pc()); +} + +// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get +// turned off by -fomit-frame-pointer, +frame os::get_sender_for_C_frame(frame* fr) { + return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); +} + +//intptr_t* _get_previous_fp() { +intptr_t* __attribute__((noinline)) os::get_previous_fp() { + int *pc; + intptr_t sp; + int *pc_limit = (int*)(void*)&os::get_previous_fp; + int insn; + + { + l_pc:; + pc = (int*)&&l_pc; + __asm__ __volatile__ ("move %0, $sp" : "=r" (sp)); + } + + do { + insn = *pc; + switch(bitfield(insn, 16, 16)) { + case 0x27bd: /* addiu $sp,$sp,-i */ + case 0x67bd: /* daddiu $sp,$sp,-i */ + assert ((short)bitfield(insn, 0, 16)<0, "bad frame"); + sp -= (short)bitfield(insn, 0, 16); + return (intptr_t*)sp; + } + --pc; + } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization. + + ShouldNotReachHere(); + return NULL; // mute compiler +} + + +frame os::current_frame() { + intptr_t* fp = (intptr_t*)get_previous_fp(); + frame myframe((intptr_t*)os::current_stack_pointer(), + (intptr_t*)fp, + CAST_FROM_FN_PTR(address, os::current_frame)); + if (os::is_first_C_frame(&myframe)) { + // stack is not walkable + return frame(); + } else { + return os::get_sender_for_C_frame(&myframe); + } +} + +//x86 add 2 new assemble function here! +extern "C" JNIEXPORT int +JVM_handle_linux_signal(int sig, + siginfo_t* info, + void* ucVoid, + int abort_if_unrecognized) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", + info->si_signo, + info->si_code, + info->si_errno, + info->si_addr); +#endif + + ucontext_t* uc = (ucontext_t*) ucVoid; + + Thread* t = ThreadLocalStorage::get_thread_slow(); + + SignalHandlerMark shm(t); + + // Note: it's not uncommon that JNI code uses signal/sigset to install + // then restore certain signal handler (e.g. to temporarily block SIGPIPE, + // or have a SIGILL handler when detecting CPU type). When that happens, + // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To + // avoid unnecessary crash when libjsig is not preloaded, try handle signals + // that do not require siginfo/ucontext first. + + if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { + // allow chained handler to go first + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } else { + if (PrintMiscellaneous && (WizardMode || Verbose)) { + warning("Ignoring SIGPIPE - see bug 4229104"); + } + return true; + } + } + + JavaThread* thread = NULL; + VMThread* vmthread = NULL; + if (os::Linux::signal_handlers_are_installed) { + if (t != NULL ){ + if(t->is_Java_thread()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("this thread is a java thread"); +#endif + thread = (JavaThread*)t; + } + else if(t->is_VM_thread()){ +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("this thread is a VM thread\n"); +#endif + vmthread = (VMThread *)t; + } + } + } + + // decide if this trap can be handled by a stub + address stub = NULL; + address pc = NULL; + + pc = (address) os::Linux::ucontext_get_pc(uc); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("pc=%lx", pc); + os::print_context(tty, uc); +#endif + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { + pc = (address) os::Linux::ucontext_get_pc(uc); + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { + address addr = (address) info->si_addr; +#ifdef PRINT_SIGNAL_HANDLE + tty->print("handle all stack overflow variations: "); + /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", + addr, + thread->stack_base(), + thread->stack_base() - thread->stack_size()); + */ +#endif + + // check if fault address is within thread stack + if (addr < thread->stack_base() && + addr >= thread->stack_base() - thread->stack_size()) { + // stack overflow +#ifdef PRINT_SIGNAL_HANDLE + tty->print("stack exception check \n"); +#endif + if (thread->in_stack_yellow_zone(addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is in yellow zone\n"); +#endif + thread->disable_stack_yellow_zone(); + if (thread->thread_state() == _thread_in_Java) { + // Throw a stack overflow exception. Guard pages will be reenabled + // while unwinding the stack. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("this thread is in java\n"); +#endif + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); + } else { + // Thread was in the vm or native code. Return and try to finish. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("this thread is in vm or native codes and return\n"); +#endif + return 1; + } + } else if (thread->in_stack_red_zone(addr)) { + // Fatal red zone violation. Disable the guard pages and fall through + // to handle_unexpected_exception way down below. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is in red zone\n"); +#endif + thread->disable_stack_red_zone(); + tty->print_raw_cr("An irrecoverable stack overflow has occurred."); + + // This is a likely cause, but hard to verify. Let's just print + // it as a hint. + tty->print_raw_cr("Please check if any of your loaded .so files has " + "enabled executable stack (see man page execstack(8))"); + } else { + // Accessing stack address below sp may cause SEGV if current + // thread has MAP_GROWSDOWN stack. This should only happen when + // current thread was created by user code with MAP_GROWSDOWN flag + // and then attached to VM. See notes in os_linux.cpp. +#ifdef PRINT_SIGNAL_HANDLE + tty->print("exception addr is neither in yellow zone nor in the red one\n"); +#endif + if (thread->osthread()->expanding_stack() == 0) { + thread->osthread()->set_expanding_stack(); + if (os::Linux::manually_expand_stack(thread, addr)) { + thread->osthread()->clear_expanding_stack(); + return 1; + } + thread->osthread()->clear_expanding_stack(); + } else { + fatal("recursive segv. expanding stack."); + } + } + } //addr < + } //sig == SIGSEGV + + if (thread->thread_state() == _thread_in_Java) { + // Java thread running in Java code => find exception handler if any + // a fault inside compiled code, the interpreter, or a stub +#ifdef PRINT_SIGNAL_HANDLE + tty->print("java thread running in java code\n"); +#endif + + // Handle signal from NativeJump::patch_verified_entry(). + if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); +#endif + stub = SharedRuntime::get_handle_wrong_method_stub(); + } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); +#endif + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault + // here if the underlying file has been truncated. + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL; +#ifdef PRINT_SIGNAL_HANDLE + tty->print("cb = %lx, nm = %lx\n", cb, nm); +#endif + if (nm != NULL && nm->has_unsafe_access()) { + stub = StubRoutines::handler_for_unsafe_access(); + } + } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { + // HACK: si_code does not work on linux 2.2.12-20!!! + int op = pc[0] & 0x3f; + int op1 = pc[3] & 0x3f; + //FIXME, Must port to mips code!! + switch (op) { + case 0x1e: //ddiv + case 0x1f: //ddivu + case 0x1a: //div + case 0x1b: //divu + case 0x34: //trap + /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'. + * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv() + */ + stub = SharedRuntime::continuation_for_implicit_exception(thread, + pc, + SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); + break; + default: + // TODO: handle more cases if we are using other x86 instructions + // that can generate SIGFPE signal on linux. + tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); + //fatal("please update this code."); + } + } else if (sig == SIGSEGV && + !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("continuation for implicit exception\n"); +#endif + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); +#endif + } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) { + //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here. + //The method is to trigger kernel emulation of float emulation. + int inst = *(int*)pc; + int ops = (inst >> 26) & 0x3f; + int ops_fmt = (inst >> 21) & 0x1f; + int op = inst & 0x3f; + if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) { + int ft, fs, fd; + ft = (inst >> 16) & 0x1f; + fs = (inst >> 11) & 0x1f; + fd = (inst >> 6) & 0x1f; + float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower; + double ft_value, fs_value, fd_value; + ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; + fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; + __asm__ __volatile__ ( + "cvt.s.pl %0, %4\n\t" + "cvt.s.pu %1, %4\n\t" + "cvt.s.pl %2, %5\n\t" + "cvt.s.pu %3, %5\n\t" + : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper) + : "f" (fs_value), "f" (ft_value) + ); + + switch (op) { + case Assembler::fadd_op: + __asm__ __volatile__ ( + "add.s %1, %3, %5\n\t" + "add.s %2, %4, %6\n\t" + "pll.ps %0, %1, %2\n\t" + : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) + : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) + ); + uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; + stub = pc + 4; + break; + case Assembler::fsub_op: + //fd = fs - ft + __asm__ __volatile__ ( + "sub.s %1, %3, %5\n\t" + "sub.s %2, %4, %6\n\t" + "pll.ps %0, %1, %2\n\t" + : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) + : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) + ); + uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; + stub = pc + 4; + break; + case Assembler::fmul_op: + __asm__ __volatile__ ( + "mul.s %1, %3, %5\n\t" + "mul.s %2, %4, %6\n\t" + "pll.ps %0, %1, %2\n\t" + : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) + : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) + ); + uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; + stub = pc + 4; + break; + default: + tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op); + } + } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) { + // madd.ps is not used, the code below were not tested + int fr, ft, fs, fd; + float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower; + double fr_value, ft_value, fs_value, fd_value; + switch (op) { + case Assembler::madd_ps_op: + // fd = (fs * ft) + fr + fr = (inst >> 21) & 0x1f; + ft = (inst >> 16) & 0x1f; + fs = (inst >> 11) & 0x1f; + fd = (inst >> 6) & 0x1f; + fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr]; + ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; + fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; + __asm__ __volatile__ ( + "cvt.s.pu %3, %9\n\t" + "cvt.s.pl %4, %9\n\t" + "cvt.s.pu %5, %10\n\t" + "cvt.s.pl %6, %10\n\t" + "cvt.s.pu %7, %11\n\t" + "cvt.s.pl %8, %11\n\t" + "madd.s %1, %3, %5, %7\n\t" + "madd.s %2, %4, %6, %8\n\t" + "pll.ps %0, %1, %2\n\t" + : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower) + : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/ + ); + uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; + stub = pc + 4; + break; + default: + tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op); + } + } + } //SIGILL + } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) { + // thread->thread_state() != _thread_in_Java + // SIGILL must be caused by VM_Version::determine_features(). + VM_Version::set_supports_cpucfg(false); + stub = pc + 4; // continue with next instruction. + } else if (thread->thread_state() == _thread_in_vm && + sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("SIGBUS in vm thread \n"); +#endif + stub = StubRoutines::handler_for_unsafe_access(); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("jni fast get trap: "); +#endif + address addr = JNI_FastGetField::find_slowcase_pc(pc); + if (addr != (address)-1) { + stub = addr; + } +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("addr = %d, stub = %lx", addr, stub); +#endif + } + + // Check to see if we caught the safepoint code in the + // process of write protecting the memory serialization page. + // It write enables the page immediately after protecting it + // so we can just return to retry the write. + if ((sig == SIGSEGV) && + os::is_memory_serialize_page(thread, (address) info->si_addr)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print("write protecting the memory serialiazation page\n"); +#endif + // Block current thread until the memory serialize page permission restored. + os::block_on_serialize_page_trap(); + return true; + } + } + + // Execution protection violation + // + // This should be kept as the last step in the triage. We don't + // have a dedicated trap number for a no-execute fault, so be + // conservative and allow other handlers the first shot. + // + // Note: We don't test that info->si_code == SEGV_ACCERR here. + // this si_code is so generic that it is almost meaningless; and + // the si_code for this condition may change in the future. + // Furthermore, a false-positive should be harmless. + if (UnguardOnExecutionViolation > 0 && + //(sig == SIGSEGV || sig == SIGBUS) && + //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) { + (sig == SIGSEGV || sig == SIGBUS +#ifdef OPT_RANGECHECK + || sig == SIGSYS +#endif + ) && + //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) { + (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("execution protection violation\n"); +#endif + + int page_size = os::vm_page_size(); + address addr = (address) info->si_addr; + address pc = os::Linux::ucontext_get_pc(uc); + // Make sure the pc and the faulting address are sane. + // + // If an instruction spans a page boundary, and the page containing + // the beginning of the instruction is executable but the following + // page is not, the pc and the faulting address might be slightly + // different - we still want to unguard the 2nd page in this case. + // + // 15 bytes seems to be a (very) safe value for max instruction size. + bool pc_is_near_addr = + (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15); + bool instr_spans_page_boundary = + (align_size_down((intptr_t) pc ^ (intptr_t) addr, + (intptr_t) page_size) > 0); + + if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) { + static volatile address last_addr = + (address) os::non_memory_address_word(); + + // In conservative mode, don't unguard unless the address is in the VM + if (addr != last_addr && + (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) { + + // Set memory to RWX and retry + address page_start = + (address) align_size_down((intptr_t) addr, (intptr_t) page_size); + bool res = os::protect_memory((char*) page_start, page_size, + os::MEM_PROT_RWX); + + if (PrintMiscellaneous && Verbose) { + char buf[256]; + jio_snprintf(buf, sizeof(buf), "Execution protection violation " + "at " INTPTR_FORMAT + ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr, + page_start, (res ? "success" : "failed"), errno); + tty->print_raw_cr(buf); + } + stub = pc; + + // Set last_addr so if we fault again at the same address, we don't end + // up in an endless loop. + // + // There are two potential complications here. Two threads trapping at + // the same address at the same time could cause one of the threads to + // think it already unguarded, and abort the VM. Likely very rare. + // + // The other race involves two threads alternately trapping at + // different addresses and failing to unguard the page, resulting in + // an endless loop. This condition is probably even more unlikely than + // the first. + // + // Although both cases could be avoided by using locks or thread local + // last_addr, these solutions are unnecessary complication: this + // handler is a best-effort safety net, not a complete solution. It is + // disabled by default and should only be used as a workaround in case + // we missed any no-execute-unsafe VM code. + + last_addr = addr; + } + } + } + + if (stub != NULL) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("resolved stub=%lx\n",stub); +#endif + // save all thread context in case we need to restore it + if (thread != NULL) thread->set_saved_exception_pc(pc); + + uc->uc_mcontext.pc = (greg_t)stub; + return true; + } + + // signal-chaining + if (os::Linux::chained_handler(sig, info, ucVoid)) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("signal chaining\n"); +#endif + return true; + } + + if (!abort_if_unrecognized) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("abort becauce of unrecognized\n"); +#endif + // caller wants another chance, so give it to him + return false; + } + + if (pc == NULL && uc != NULL) { + pc = os::Linux::ucontext_get_pc(uc); + } + + // unmask current signal + sigset_t newset; + sigemptyset(&newset); + sigaddset(&newset, sig); + sigprocmask(SIG_UNBLOCK, &newset, NULL); +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("VMError in signal handler\n"); +#endif + VMError err(t, sig, pc, info, ucVoid); + err.report_and_die(); + + ShouldNotReachHere(); + return true; // Mute compiler +} + +// FCSR:...|24| 23 |22|21|... +// ...|FS|FCC0|FO|FN|... +void os::Linux::init_thread_fpu_state(void) { + if (SetFSFOFN == 999) + return; + int fs = (SetFSFOFN / 100)? 1:0; + int fo = ((SetFSFOFN % 100) / 10)? 1:0; + int fn = (SetFSFOFN % 10)? 1:0; + int mask = fs << 24 | fo << 22 | fn << 21; + + int fcsr = get_fpu_control_word(); + fcsr = fcsr | mask; + set_fpu_control_word(fcsr); + /* + if (fcsr != get_fpu_control_word()) + tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word()); + */ +} + +int os::Linux::get_fpu_control_word(void) { + int fcsr; + __asm__ __volatile__ ( + ".set noat;" + "daddiu %0, $0, 0;" + "cfc1 %0, $31;" + : "=r" (fcsr) + ); + return fcsr; +} + +void os::Linux::set_fpu_control_word(int fpu_control) { + __asm__ __volatile__ ( + ".set noat;" + "ctc1 %0, $31;" + : + : "r" (fpu_control) + ); +} + +bool os::is_allocatable(size_t bytes) { + + if (bytes < 2 * G) { + return true; + } + + char* addr = reserve_memory(bytes, NULL); + + if (addr != NULL) { + release_memory(addr, bytes); + } + + return addr != NULL; +} + +//////////////////////////////////////////////////////////////////////////////// +// thread stack + +size_t os::Linux::min_stack_allowed = 96 * K; + + +// Test if pthread library can support variable thread stack size. LinuxThreads +// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads +// in floating stack mode and NPTL support variable stack size. +bool os::Linux::supports_variable_stack_size() { + if (os::Linux::is_NPTL()) { + // NPTL, yes + return true; + + } else { + // Note: We can't control default stack size when creating a thread. + // If we use non-default stack size (pthread_attr_setstacksize), both + // floating stack and non-floating stack LinuxThreads will return the + // same value. This makes it impossible to implement this function by + // detecting thread stack size directly. + // + // An alternative approach is to check %gs. Fixed-stack LinuxThreads + // do not use %gs, so its value is 0. Floating-stack LinuxThreads use + // %gs (either as LDT selector or GDT selector, depending on kernel) + // to access thread specific data. + // + // Note that %gs is a reserved glibc register since early 2001, so + // applications are not allowed to change its value (Ulrich Drepper from + // Redhat confirmed that all known offenders have been modified to use + // either %fs or TSD). In the worst case scenario, when VM is embedded in + // a native application that plays with %gs, we might see non-zero %gs + // even LinuxThreads is running in fixed stack mode. As the result, we'll + // return true and skip _thread_safety_check(), so we may not be able to + // detect stack-heap collisions. But otherwise it's harmless. + // + return false; + } +} + +// return default stack size for thr_type +size_t os::Linux::default_stack_size(os::ThreadType thr_type) { + // default stack size (compiler thread needs larger stack) + size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); + return s; +} + +size_t os::Linux::default_guard_size(os::ThreadType thr_type) { + // Creating guard page is very expensive. Java thread has HotSpot + // guard page, only enable glibc guard page for non-Java threads. + return (thr_type == java_thread ? 0 : page_size()); +} + +// Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ JavaThread created by VM does not have glibc +// | glibc guard page | - guard, attached Java thread usually has +// | |/ 1 page glibc guard. +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | HotSpot Guard Pages | - red and yellow pages +// | |/ +// +------------------------+ JavaThread::stack_yellow_zone_base() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// Non-Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ +// | glibc guard page | - usually 1 page +// | |/ +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from +// pthread_attr_getstack() + +static void current_stack_region(address * bottom, size_t * size) { + if (os::is_primordial_thread()) { + // primordial thread needs special handling because pthread_getattr_np() + // may return bogus value. + *bottom = os::Linux::initial_thread_stack_bottom(); + *size = os::Linux::initial_thread_stack_size(); + } else { + pthread_attr_t attr; + + int rslt = pthread_getattr_np(pthread_self(), &attr); + + // JVM needs to know exact stack location, abort if it fails + if (rslt != 0) { + if (rslt == ENOMEM) { + vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np"); + } else { + fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt)); + } + } + + if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) { + fatal("Can not locate current stack attributes!"); + } + + pthread_attr_destroy(&attr); + + } + assert(os::current_stack_pointer() >= *bottom && + os::current_stack_pointer() < *bottom + *size, "just checking"); +} + +address os::current_stack_base() { + address bottom; + size_t size; + current_stack_region(&bottom, &size); + return (bottom + size); +} + +size_t os::current_stack_size() { + // stack size includes normal stack and HotSpot guard pages + address bottom; + size_t size; + current_stack_region(&bottom, &size); + return size; +} + +///////////////////////////////////////////////////////////////////////////// +// helper functions for fatal error handler +void os::print_register_info(outputStream *st, void *context) { + if (context == NULL) return; + + ucontext_t *uc = (ucontext_t*)context; + + st->print_cr("Register to memory mapping:"); + st->cr(); + // this is horrendously verbose but the layout of the registers in the + // // context does not match how we defined our abstract Register set, so + // // we can't just iterate through the gregs area + // + // // this is only for the "general purpose" registers + st->print("R0=" ); print_location(st, uc->uc_mcontext.gregs[0]); + st->print("AT=" ); print_location(st, uc->uc_mcontext.gregs[1]); + st->print("V0=" ); print_location(st, uc->uc_mcontext.gregs[2]); + st->print("V1=" ); print_location(st, uc->uc_mcontext.gregs[3]); + st->cr(); + st->print("A0=" ); print_location(st, uc->uc_mcontext.gregs[4]); + st->print("A1=" ); print_location(st, uc->uc_mcontext.gregs[5]); + st->print("A2=" ); print_location(st, uc->uc_mcontext.gregs[6]); + st->print("A3=" ); print_location(st, uc->uc_mcontext.gregs[7]); + st->cr(); + st->print("A4=" ); print_location(st, uc->uc_mcontext.gregs[8]); + st->print("A5=" ); print_location(st, uc->uc_mcontext.gregs[9]); + st->print("A6=" ); print_location(st, uc->uc_mcontext.gregs[10]); + st->print("A7=" ); print_location(st, uc->uc_mcontext.gregs[11]); + st->cr(); + st->print("T0=" ); print_location(st, uc->uc_mcontext.gregs[12]); + st->print("T1=" ); print_location(st, uc->uc_mcontext.gregs[13]); + st->print("T2=" ); print_location(st, uc->uc_mcontext.gregs[14]); + st->print("T3=" ); print_location(st, uc->uc_mcontext.gregs[15]); + st->cr(); + st->print("S0=" ); print_location(st, uc->uc_mcontext.gregs[16]); + st->print("S1=" ); print_location(st, uc->uc_mcontext.gregs[17]); + st->print("S2=" ); print_location(st, uc->uc_mcontext.gregs[18]); + st->print("S3=" ); print_location(st, uc->uc_mcontext.gregs[19]); + st->cr(); + st->print("S4=" ); print_location(st, uc->uc_mcontext.gregs[20]); + st->print("S5=" ); print_location(st, uc->uc_mcontext.gregs[21]); + st->print("S6=" ); print_location(st, uc->uc_mcontext.gregs[22]); + st->print("S7=" ); print_location(st, uc->uc_mcontext.gregs[23]); + st->cr(); + st->print("T8=" ); print_location(st, uc->uc_mcontext.gregs[24]); + st->print("T9=" ); print_location(st, uc->uc_mcontext.gregs[25]); + st->print("K0=" ); print_location(st, uc->uc_mcontext.gregs[26]); + st->print("K1=" ); print_location(st, uc->uc_mcontext.gregs[27]); + st->cr(); + st->print("GP=" ); print_location(st, uc->uc_mcontext.gregs[28]); + st->print("SP=" ); print_location(st, uc->uc_mcontext.gregs[29]); + st->print("FP=" ); print_location(st, uc->uc_mcontext.gregs[30]); + st->print("RA=" ); print_location(st, uc->uc_mcontext.gregs[31]); + st->cr(); + +} +void os::print_context(outputStream *st, void *context) { + if (context == NULL) return; + + ucontext_t *uc = (ucontext_t*)context; + st->print_cr("Registers:"); + st->print( "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]); + st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]); + st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]); + st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]); + st->cr(); + st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]); + st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]); + st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]); + st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]); + st->cr(); + st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]); + st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]); + st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]); + st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]); + st->cr(); + st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]); + st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]); + st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]); + st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]); + st->cr(); + st->print( "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]); + st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]); + st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]); + st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]); + st->cr(); + st->print( "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]); + st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]); + st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]); + st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]); + st->cr(); + st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]); + st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]); + st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]); + st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]); + st->cr(); + st->print( "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]); + st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]); + st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]); + st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]); + st->cr(); + st->cr(); + + intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); + st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); + //print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t)); + print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t)); + st->cr(); + + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::Linux::ucontext_get_pc(uc); + st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); + print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); + Disassembler::decode(pc - 80, pc + 80, st); +} + +void os::setup_fpu() { + /* + //no use for MIPS + int fcsr; + address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std(); + __asm__ __volatile__ ( + ".set noat;" + "cfc1 %0, $31;" + "sw %0, 0(%1);" + : "=r" (fcsr) + : "r" (fpu_cntrl) + : "memory" + ); + printf("fpu_cntrl: %lx\n", fpu_cntrl); + */ +} + +#ifndef PRODUCT +void os::verify_stack_alignment() { + assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); +} +#endif + +bool os::is_ActiveCoresMP() { + return UseActiveCoresMP && _initial_active_processor_count == 1; +} diff --git a/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp new file mode 100644 index 00000000000..c07d08156f2 --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP + + static void setup_fpu(); + static bool is_allocatable(size_t bytes); + static intptr_t *get_previous_fp(); + + // Used to register dynamic code cache area with the OS + // Note: Currently only used in 64 bit Windows implementations + static bool register_code_area(char *low, char *high) { return true; } + + static bool is_ActiveCoresMP(); + +#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP diff --git a/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp new file mode 100644 index 00000000000..93490345f0b --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP +#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP + + +inline void Prefetch::read (void *loc, intx interval) { + // 'pref' is implemented as NOP in Loongson 3A + __asm__ __volatile__ ( + " .set push\n" + " .set mips32\n" + " .set noreorder\n" + " pref 0, 0(%[__loc]) \n" + " .set pop\n" + : [__loc] "=&r"(loc) + : + : "memory" + ); +} + +inline void Prefetch::write(void *loc, intx interval) { + __asm__ __volatile__ ( + " .set push\n" + " .set mips32\n" + " .set noreorder\n" + " pref 1, 0(%[__loc]) \n" + " .set pop\n" + : [__loc] "=&r"(loc) + : + : "memory" + ); + +} + +#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP diff --git a/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp new file mode 100644 index 00000000000..be28a562a1e --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/threadLocalStorage.hpp" + +// Map stack pointer (%esp) to thread pointer for faster TLS access +// +// Here we use a flat table for better performance. Getting current thread +// is down to one memory access (read _sp_map[%esp>>12]) in generated code +// and two in runtime code (-fPIC code needs an extra load for _sp_map). +// +// This code assumes stack page is not shared by different threads. It works +// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters). +// +// Notice that _sp_map is allocated in the bss segment, which is ZFOD +// (zero-fill-on-demand). While it reserves 4M address space upfront, +// actual memory pages are committed on demand. +// +// If an application creates and destroys a lot of threads, usually the +// stack space freed by a thread will soon get reused by new thread +// (this is especially true in NPTL or LinuxThreads in fixed-stack mode). +// No memory page in _sp_map is wasted. +// +// However, it's still possible that we might end up populating & +// committing a large fraction of the 4M table over time, but the actual +// amount of live data in the table could be quite small. The max wastage +// is less than 4M bytes. If it becomes an issue, we could use madvise() +// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map. +// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the +// physical memory page (i.e. similar to MADV_FREE on Solaris). + +#ifdef MINIMIZE_RAM_USAGE +Thread* ThreadLocalStorage::_sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)]; +#endif // MINIMIZE_RAM_USAGE + +void ThreadLocalStorage::generate_code_for_get_thread() { + // nothing we can do here for user-level thread +} + +void ThreadLocalStorage::pd_init() { +#ifdef MINIMIZE_RAM_USAGE + assert(align_size_down(os::vm_page_size(), PAGE_SIZE) == os::vm_page_size(), + "page size must be multiple of PAGE_SIZE"); +#endif // MINIMIZE_RAM_USAGE +} + +void ThreadLocalStorage::pd_set_thread(Thread* thread) { + os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread); +#ifdef MINIMIZE_RAM_USAGE + address stack_top = os::current_stack_base(); + size_t stack_size = os::current_stack_size(); + + for (address p = stack_top - stack_size; p < stack_top; p += PAGE_SIZE) { + int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); + assert(thread == NULL || _sp_map[index] == NULL || thread == _sp_map[index], + "thread exited without detaching from VM??"); + _sp_map[index] = thread; + } +#endif // MINIMIZE_RAM_USAGE +} diff --git a/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp new file mode 100644 index 00000000000..e595195e213 --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP + +#ifdef MINIMIZE_RAM_USAGE + // Processor dependent parts of ThreadLocalStorage + //only the low 2G space for user program in Linux + + #define SP_BITLENGTH 34 + #define PAGE_SHIFT 14 + #define PAGE_SIZE (1UL << PAGE_SHIFT) + + static Thread* _sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)]; + static int _sp_map_low; + static int _sp_map_high; +#endif // MINIMIZE_RAM_USAGE + +public: +#ifdef MINIMIZE_RAM_USAGE + static Thread** sp_map_addr() { return _sp_map; } +#endif // MINIMIZE_RAM_USAGE + + static Thread* thread() { +#ifdef MINIMIZE_RAM_USAGE + /* Thread::thread() can also be optimized in the same way as __get_thread() */ + //return (Thread*) os::thread_local_storage_at(thread_index()); + uintptr_t sp; + uintptr_t mask = (1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1; + + __asm__ __volatile__ ("daddiu %0, $29, 0 " : "=r" (sp)); + + return _sp_map[(sp >> PAGE_SHIFT) & mask]; +#else + return (Thread*) os::thread_local_storage_at(thread_index()); +#endif // MINIMIZE_RAM_USAGE + } +#endif // OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP diff --git a/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp new file mode 100644 index 00000000000..44f666d61f3 --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/sharedRuntime.hpp" + +void JavaThread::pd_initialize() +{ + _anchor.clear(); +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread is +// currently interrupted by SIGPROF +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, + void* ucontext, bool isInJava) { + + assert(Thread::current() == this, "caller must be current thread"); + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { + assert(this->is_Java_thread(), "must be JavaThread"); + JavaThread* jt = (JavaThread *)this; + + // If we have a last_Java_frame, then we should use it even if + // isInJava == true. It should be more reliable than ucontext info. + if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { + *fr_addr = jt->pd_last_frame(); + return true; + } + + // At this point, we don't have a last_Java_frame, so + // we try to glean some information out of the ucontext + // if we were running Java code when SIGPROF came in. + if (isInJava) { + ucontext_t* uc = (ucontext_t*) ucontext; + + intptr_t* ret_fp; + intptr_t* ret_sp; + ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, + &ret_sp, &ret_fp); + if (addr.pc() == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + + frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(jt)) { +#ifdef COMPILER2 + // C2 uses ebp as a general register see if NULL fp helps + frame ret_frame2(ret_sp, NULL, addr.pc()); + if (!ret_frame2.safe_for_sender(jt)) { + // nothing else to try if the frame isn't good + return false; + } + ret_frame = ret_frame2; +#else + // nothing else to try if the frame isn't good + return false; +#endif /* COMPILER2 */ + } + *fr_addr = ret_frame; + return true; + } + + // nothing else to try + return false; +} + +void JavaThread::cache_global_variables() { } + diff --git a/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp new file mode 100644 index 00000000000..cb11c36ae50 --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP + + private: + void pd_initialize(); + + frame pd_last_frame() { + assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); + if (_anchor.last_Java_pc() != NULL) { + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); + } else { + // This will pick up pc from sp + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); + } + } + + + public: + // Mutators are highly dangerous.... + intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } + void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } + + void set_base_of_stack_pointer(intptr_t* base_sp) { + } + + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + + intptr_t* base_of_stack_pointer() { + return NULL; + } + void record_base_of_stack_pointer() { + } + + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); +public: + + // These routines are only used on cpu architectures that + // have separate register stacks (Itanium). + static bool register_stack_overflow() { return false; } + static void enable_register_stack_guard() {} + static void disable_register_stack_guard() {} + +#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP diff --git a/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp new file mode 100644 index 00000000000..b7454bf045a --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP +#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, pid_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + + +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(pid_t) \ + declare_unsigned_integer_type(pthread_t) + +#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP diff --git a/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp new file mode 100644 index 00000000000..ce697823b99 --- /dev/null +++ b/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/os.hpp" +#include "vm_version_mips.hpp" diff --git a/hotspot/src/share/tools/hsdis/Makefile b/hotspot/src/share/tools/hsdis/Makefile index 0d1b608944f..a9754ce2acf 100644 --- a/hotspot/src/share/tools/hsdis/Makefile +++ b/hotspot/src/share/tools/hsdis/Makefile @@ -105,12 +105,25 @@ CFLAGS/sparc += -m32 endif CFLAGS += $(CFLAGS/$(ARCH)) CFLAGS += -fPIC +ifeq ($(ARCH), mips64) +CPUINFO = $(shell cat /proc/cpuinfo) +ifneq ($(findstring Loongson,$(CPUINFO)),) +CFLAGS += -DLOONGSON +endif +endif OS = linux LIB_EXT = .so CC = gcc endif CFLAGS += -O DLDFLAGS += -shared +ifeq ($(ARCH), mips64) +DLDFLAGS += -Wl,-z,noexecstack +endif +ifeq ($(ARCH), loongarch64) +DLDFLAGS += -Wl,-z,noexecstack +CONFIGURE_ARGS += --disable-werror +endif LDFLAGS += -ldl OUTFLAGS += -o $@ else diff --git a/hotspot/src/share/tools/hsdis/hsdis.c b/hotspot/src/share/tools/hsdis/hsdis.c index 4fb49648704..f6ef5bea151 100644 --- a/hotspot/src/share/tools/hsdis/hsdis.c +++ b/hotspot/src/share/tools/hsdis/hsdis.c @@ -493,6 +493,16 @@ static const char* native_arch_name() { #if defined(LIBARCH_ppc64) || defined(LIBARCH_ppc64le) res = "powerpc:common64"; #endif +#ifdef LIBARCH_mips64 +#ifdef LOONGSON + res = "mips:loongson_3a"; +#else + res = "mips:isa64"; +#endif +#endif +#ifdef LIBARCH_loongarch64 + res = "loongarch"; +#endif #ifdef LIBARCH_aarch64 res = "aarch64"; #endif diff --git a/hotspot/src/share/vm/adlc/main.cpp b/hotspot/src/share/vm/adlc/main.cpp index 52044f12d40..50c585872ea 100644 --- a/hotspot/src/share/vm/adlc/main.cpp +++ b/hotspot/src/share/vm/adlc/main.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + // MAIN.CPP - Entry point for the Architecture Description Language Compiler #include "adlc.hpp" @@ -234,6 +240,14 @@ int main(int argc, char *argv[]) AD.addInclude(AD._CPP_file, "nativeInst_x86.hpp"); AD.addInclude(AD._CPP_file, "vmreg_x86.inline.hpp"); #endif +#ifdef TARGET_ARCH_mips + AD.addInclude(AD._CPP_file, "nativeInst_mips.hpp"); + AD.addInclude(AD._CPP_file, "vmreg_mips.inline.hpp"); +#endif +#ifdef TARGET_ARCH_loongarch + AD.addInclude(AD._CPP_file, "nativeInst_loongarch.hpp"); + AD.addInclude(AD._CPP_file, "vmreg_loongarch.inline.hpp"); +#endif #ifdef TARGET_ARCH_aarch64 AD.addInclude(AD._CPP_file, "assembler_aarch64.inline.hpp"); AD.addInclude(AD._CPP_file, "nativeInst_aarch64.hpp"); diff --git a/hotspot/src/share/vm/asm/assembler.hpp b/hotspot/src/share/vm/asm/assembler.hpp index f7f1ae1d367..572aa997cac 100644 --- a/hotspot/src/share/vm/asm/assembler.hpp +++ b/hotspot/src/share/vm/asm/assembler.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_ASM_ASSEMBLER_HPP #define SHARE_VM_ASM_ASSEMBLER_HPP @@ -53,6 +59,14 @@ # include "register_ppc.hpp" # include "vm_version_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "register_mips.hpp" +# include "vm_version_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "register_loongarch.hpp" +# include "vm_version_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "register_aarch64.hpp" # include "vm_version_aarch64.hpp" @@ -468,6 +482,12 @@ class AbstractAssembler : public ResourceObj { #ifdef TARGET_ARCH_ppc # include "assembler_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "assembler_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "assembler_loongarch.hpp" +#endif #endif // SHARE_VM_ASM_ASSEMBLER_HPP diff --git a/hotspot/src/share/vm/asm/assembler.inline.hpp b/hotspot/src/share/vm/asm/assembler.inline.hpp index 1a48cb3171d..8ac90e14740 100644 --- a/hotspot/src/share/vm/asm/assembler.inline.hpp +++ b/hotspot/src/share/vm/asm/assembler.inline.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_ASM_ASSEMBLER_INLINE_HPP #define SHARE_VM_ASM_ASSEMBLER_INLINE_HPP @@ -42,6 +48,12 @@ #ifdef TARGET_ARCH_ppc # include "assembler_ppc.inline.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "assembler_mips.inline.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "assembler_loongarch.inline.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "assembler_aarch64.inline.hpp" #endif diff --git a/hotspot/src/share/vm/asm/codeBuffer.cpp b/hotspot/src/share/vm/asm/codeBuffer.cpp index d94ac406555..f6b578111f3 100644 --- a/hotspot/src/share/vm/asm/codeBuffer.cpp +++ b/hotspot/src/share/vm/asm/codeBuffer.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2023. These + * modifications are Copyright (c) 2015, 2023, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "asm/codeBuffer.hpp" #include "compiler/disassembler.hpp" @@ -323,6 +329,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format) assert(rtype == relocInfo::none || rtype == relocInfo::runtime_call_type || rtype == relocInfo::internal_word_type|| + NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||)) rtype == relocInfo::section_word_type || rtype == relocInfo::external_word_type, "code needs relocation information"); diff --git a/hotspot/src/share/vm/asm/codeBuffer.hpp b/hotspot/src/share/vm/asm/codeBuffer.hpp index 02b619ad77f..c04560a0bc1 100644 --- a/hotspot/src/share/vm/asm/codeBuffer.hpp +++ b/hotspot/src/share/vm/asm/codeBuffer.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_ASM_CODEBUFFER_HPP #define SHARE_VM_ASM_CODEBUFFER_HPP @@ -635,6 +641,12 @@ class CodeBuffer: public StackObj { #ifdef TARGET_ARCH_ppc # include "codeBuffer_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "codeBuffer_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "codeBuffer_loongarch.hpp" +#endif }; diff --git a/hotspot/src/share/vm/asm/macroAssembler.hpp b/hotspot/src/share/vm/asm/macroAssembler.hpp index 1482eb630b1..0be415b6c50 100644 --- a/hotspot/src/share/vm/asm/macroAssembler.hpp +++ b/hotspot/src/share/vm/asm/macroAssembler.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_ASM_MACROASSEMBLER_HPP #define SHARE_VM_ASM_MACROASSEMBLER_HPP @@ -45,5 +51,10 @@ #ifdef TARGET_ARCH_aarch64 # include "macroAssembler_aarch64.hpp" #endif - +#ifdef TARGET_ARCH_mips +# include "macroAssembler_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "macroAssembler_loongarch.hpp" +#endif #endif // SHARE_VM_ASM_MACROASSEMBLER_HPP diff --git a/hotspot/src/share/vm/asm/macroAssembler.inline.hpp b/hotspot/src/share/vm/asm/macroAssembler.inline.hpp index db3daa52e9a..6f4e523c595 100644 --- a/hotspot/src/share/vm/asm/macroAssembler.inline.hpp +++ b/hotspot/src/share/vm/asm/macroAssembler.inline.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP #define SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP @@ -42,6 +48,12 @@ #ifdef TARGET_ARCH_ppc # include "macroAssembler_ppc.inline.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "macroAssembler_mips.inline.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "macroAssembler_loongarch.inline.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "macroAssembler_aarch64.inline.hpp" #endif diff --git a/hotspot/src/share/vm/asm/register.hpp b/hotspot/src/share/vm/asm/register.hpp index c500890181a..6a20929e590 100644 --- a/hotspot/src/share/vm/asm/register.hpp +++ b/hotspot/src/share/vm/asm/register.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_ASM_REGISTER_HPP #define SHARE_VM_ASM_REGISTER_HPP @@ -108,6 +114,12 @@ const type name = ((type)name##_##type##EnumValue) #ifdef TARGET_ARCH_ppc # include "register_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "register_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "register_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "register_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/c1/c1_Defs.hpp b/hotspot/src/share/vm/c1/c1_Defs.hpp index b0cd7637399..b42b9de1b55 100644 --- a/hotspot/src/share/vm/c1/c1_Defs.hpp +++ b/hotspot/src/share/vm/c1/c1_Defs.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_C1_C1_DEFS_HPP #define SHARE_VM_C1_C1_DEFS_HPP @@ -29,6 +35,9 @@ #ifdef TARGET_ARCH_x86 # include "register_x86.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "register_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "register_aarch64.hpp" #endif @@ -56,6 +65,9 @@ enum { #ifdef TARGET_ARCH_x86 # include "c1_Defs_x86.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "c1_Defs_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "c1_Defs_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp b/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp index f07e97a4d32..6bc367a8974 100644 --- a/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp +++ b/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_C1_C1_FPUSTACKSIM_HPP #define SHARE_VM_C1_C1_FPUSTACKSIM_HPP @@ -35,6 +41,9 @@ class FpuStackSim; #ifdef TARGET_ARCH_x86 # include "c1_FpuStackSim_x86.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "c1_FpuStackSim_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "c1_FpuStackSim_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/c1/c1_FrameMap.cpp b/hotspot/src/share/vm/c1/c1_FrameMap.cpp index 1dac94d58cf..b1e37ec41c1 100644 --- a/hotspot/src/share/vm/c1/c1_FrameMap.cpp +++ b/hotspot/src/share/vm/c1/c1_FrameMap.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "c1/c1_FrameMap.hpp" #include "c1/c1_LIR.hpp" @@ -29,6 +35,9 @@ #ifdef TARGET_ARCH_x86 # include "vmreg_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "vmreg_loongarch.inline.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "vmreg_aarch64.inline.hpp" #endif diff --git a/hotspot/src/share/vm/c1/c1_FrameMap.hpp b/hotspot/src/share/vm/c1/c1_FrameMap.hpp index 41571e3d168..c0e7b28ea47 100644 --- a/hotspot/src/share/vm/c1/c1_FrameMap.hpp +++ b/hotspot/src/share/vm/c1/c1_FrameMap.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_C1_C1_FRAMEMAP_HPP #define SHARE_VM_C1_C1_FRAMEMAP_HPP @@ -85,6 +91,9 @@ class FrameMap : public CompilationResourceObj { #ifdef TARGET_ARCH_x86 # include "c1_FrameMap_x86.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "c1_FrameMap_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "c1_FrameMap_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/c1/c1_LIR.cpp b/hotspot/src/share/vm/c1/c1_LIR.cpp index fa37e7a046e..5d33d3f7a0f 100644 --- a/hotspot/src/share/vm/c1/c1_LIR.cpp +++ b/hotspot/src/share/vm/c1/c1_LIR.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "c1/c1_InstructionPrinter.hpp" #include "c1/c1_LIR.hpp" @@ -79,6 +85,17 @@ FloatRegister LIR_OprDesc::as_double_reg() const { #endif +#if defined(LOONGARCH64) + +FloatRegister LIR_OprDesc::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); +} + +FloatRegister LIR_OprDesc::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); +} + +#endif LIR_Opr LIR_OprFact::illegalOpr = LIR_OprFact::illegal(); @@ -149,13 +166,19 @@ void LIR_Address::verify0() const { #endif #ifdef _LP64 assert(base()->is_cpu_register(), "wrong base operand"); -#ifndef AARCH64 +#if !defined(AARCH64) && !defined(LOONGARCH64) assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand"); #else assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); #endif +#ifdef LOONGARCH64 + assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || + base()->type() == T_LONG || base()->type() == T_METADATA, + "wrong type for addresses"); +#else assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, "wrong type for addresses"); +#endif #else assert(base()->is_single_cpu(), "wrong base operand"); assert(index()->is_illegal() || index()->is_single_cpu(), "wrong index operand"); @@ -258,8 +281,6 @@ bool LIR_OprDesc::is_oop() const { } } - - void LIR_Op2::verify() const { #ifdef ASSERT switch (code()) { @@ -301,6 +322,18 @@ void LIR_Op2::verify() const { #endif } +void LIR_Op4::verify() const { +#ifdef ASSERT + switch (code()) { + case lir_cmp_cmove: + break; + + default: + assert(!result_opr()->is_register() || !result_opr()->is_oop_register(), + "can't produce oops from arith"); + } +#endif +} LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block) : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL) @@ -358,6 +391,55 @@ void LIR_OpBranch::negate_cond() { } } +LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info) + : LIR_Op2(lir_cmp_branch, cond, left, right, info) + , _label(stub->entry()) + , _block(NULL) + , _ublock(NULL) + , _stub(stub) { +} + +LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info) + : LIR_Op2(lir_cmp_branch, cond, left, right, info) + , _label(block->label()) + , _block(block) + , _ublock(NULL) + , _stub(NULL) { +} + +LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info) + : LIR_Op2(lir_cmp_float_branch, cond, left, right, info) + , _label(block->label()) + , _block(block) + , _ublock(ublock) + , _stub(NULL) { +} + +void LIR_OpCmpBranch::change_block(BlockBegin* b) { + assert(_block != NULL, "must have old block"); + assert(_block->label() == label(), "must be equal"); + + _block = b; + _label = b->label(); +} + +void LIR_OpCmpBranch::change_ublock(BlockBegin* b) { + assert(_ublock != NULL, "must have old block"); + + _ublock = b; +} + +void LIR_OpCmpBranch::negate_cond() { + switch (condition()) { + case lir_cond_equal: set_condition(lir_cond_notEqual); break; + case lir_cond_notEqual: set_condition(lir_cond_equal); break; + case lir_cond_less: set_condition(lir_cond_greaterEqual); break; + case lir_cond_lessEqual: set_condition(lir_cond_greater); break; + case lir_cond_greaterEqual: set_condition(lir_cond_less); break; + case lir_cond_greater: set_condition(lir_cond_lessEqual); break; + default: ShouldNotReachHere(); + } +} LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass, LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3, @@ -560,10 +642,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) { assert(opConvert->_info == NULL, "must be"); if (opConvert->_opr->is_valid()) do_input(opConvert->_opr); if (opConvert->_result->is_valid()) do_output(opConvert->_result); -#if defined(PPC) || defined(AARCH64) - if (opConvert->_tmp1->is_valid()) do_temp(opConvert->_tmp1); - if (opConvert->_tmp2->is_valid()) do_temp(opConvert->_tmp2); -#endif + if (opConvert->_tmp->is_valid()) do_temp(opConvert->_tmp); do_stub(opConvert->_stub); break; @@ -661,6 +740,25 @@ void LIR_OpVisitState::visit(LIR_Op* op) { break; } +// LIR_OpCmpBranch; + case lir_cmp_branch: // may have info, input and result register always invalid + case lir_cmp_float_branch: // may have info, input and result register always invalid + { + assert(op->as_OpCmpBranch() != NULL, "must be"); + LIR_OpCmpBranch* opCmpBranch = (LIR_OpCmpBranch*)op; + assert(opCmpBranch->_tmp2->is_illegal() && opCmpBranch->_tmp3->is_illegal() && + opCmpBranch->_tmp4->is_illegal() && opCmpBranch->_tmp5->is_illegal(), "not used"); + + if (opCmpBranch->_info) do_info(opCmpBranch->_info); + if (opCmpBranch->_opr1->is_valid()) do_input(opCmpBranch->_opr1); + if (opCmpBranch->_opr2->is_valid()) do_input(opCmpBranch->_opr2); + if (opCmpBranch->_tmp1->is_valid()) do_temp(opCmpBranch->_tmp1); + if (opCmpBranch->_stub != NULL) opCmpBranch->stub()->visit(this); + assert(opCmpBranch->_result->is_illegal(), "not used"); + + break; + } + // special handling for cmove: right input operand must not be equal // to the result operand, otherwise the backend fails case lir_cmove: @@ -806,6 +904,29 @@ void LIR_OpVisitState::visit(LIR_Op* op) { break; } +// LIR_Op4 + // special handling for cmp cmove: src2(opr4) operand must not be equal + // to the result operand, otherwise the backend fails + case lir_cmp_cmove: + { + assert(op->as_Op4() != NULL, "must be"); + LIR_Op4* op4 = (LIR_Op4*)op; + + assert(op4->_info == NULL, "not used"); + assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() && + op4->_opr3->is_valid() && op4->_opr4->is_valid() && + op4->_result->is_valid(), "used"); + + do_input(op4->_opr1); + do_input(op4->_opr2); + do_input(op4->_opr3); + do_input(op4->_opr4); + do_temp(op4->_opr4); + do_output(op4->_result); + + break; + } + // LIR_OpJavaCall case lir_static_call: @@ -1121,6 +1242,13 @@ void LIR_Op2::emit_code(LIR_Assembler* masm) { masm->emit_op2(this); } +void LIR_OpCmpBranch::emit_code(LIR_Assembler* masm) { + masm->emit_opCmpBranch(this); + if (stub()) { + masm->append_code_stub(stub()); + } +} + void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) { masm->emit_alloc_array(this); masm->append_code_stub(stub()); @@ -1141,6 +1269,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) { masm->emit_op3(this); } +void LIR_Op4::emit_code(LIR_Assembler* masm) { + masm->emit_op4(this); +} + void LIR_OpLock::emit_code(LIR_Assembler* masm) { masm->emit_lock(this); if (stub()) { @@ -1381,7 +1513,6 @@ void LIR_List::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int info)); } - void LIR_List::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info) { append(new LIR_Op2( lir_cmp, @@ -1391,6 +1522,17 @@ void LIR_List::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* ad info)); } +void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_null) { + if (deoptimize_on_null) { + // Emit an explicit null check and deoptimize if opr is null + CodeStub* deopt = new DeoptimizeStub(info); + cmp_branch(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL), T_OBJECT, deopt); + } else { + // Emit an implicit null check + append(new LIR_Op1(lir_null_check, opr, info)); + } +} + void LIR_List::allocate_object(LIR_Opr dst, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4, int header_size, int object_size, LIR_Opr klass, bool init_check, CodeStub* stub) { append(new LIR_OpAllocObj( @@ -1520,18 +1662,6 @@ void LIR_List::store_check(LIR_Opr object, LIR_Opr array, LIR_Opr tmp1, LIR_Opr append(c); } -void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_null) { - if (deoptimize_on_null) { - // Emit an explicit null check and deoptimize if opr is null - CodeStub* deopt = new DeoptimizeStub(info); - cmp(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL)); - branch(lir_cond_equal, T_OBJECT, deopt); - } else { - // Emit an implicit null check - append(new LIR_Op1(lir_null_check, opr, info)); - } -} - void LIR_List::cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, LIR_Opr t1, LIR_Opr t2, LIR_Opr result) { append(new LIR_OpCompareAndSwap(lir_cas_long, addr, cmp_value, new_value, t1, t2, result)); @@ -1780,6 +1910,8 @@ const char * LIR_Op::name() const { case lir_cmp_l2i: s = "cmp_l2i"; break; case lir_ucmp_fd2i: s = "ucomp_fd2i"; break; case lir_cmp_fd2i: s = "comp_fd2i"; break; + case lir_cmp_branch: s = "cmp_branch"; break; + case lir_cmp_float_branch: s = "cmp_fbranch"; break; case lir_cmove: s = "cmove"; break; case lir_add: s = "add"; break; case lir_sub: s = "sub"; break; @@ -1809,6 +1941,8 @@ const char * LIR_Op::name() const { // LIR_Op3 case lir_idiv: s = "idiv"; break; case lir_irem: s = "irem"; break; + // LIR_Op4 + case lir_cmp_cmove: s = "cmp_cmove"; break; // LIR_OpJavaCall case lir_static_call: s = "static"; break; case lir_optvirtual_call: s = "optvirtual"; break; @@ -1960,6 +2094,26 @@ void LIR_OpBranch::print_instr(outputStream* out) const { } } +// LIR_OpCmpBranch +void LIR_OpCmpBranch::print_instr(outputStream* out) const { + print_condition(out, condition()); out->print(" "); + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(" "); + if (block() != NULL) { + out->print("[B%d] ", block()->block_id()); + } else if (stub() != NULL) { + out->print("["); + stub()->print_name(out); + out->print(": " INTPTR_FORMAT "]", p2i(stub())); + if (stub()->info() != NULL) out->print(" [bci:%d]", stub()->info()->stack()->bci()); + } else { + out->print("[label:" INTPTR_FORMAT "] ", p2i(label())); + } + if (ublock() != NULL) { + out->print("unordered: [B%d] ", ublock()->block_id()); + } +} + void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) { switch(cond) { case lir_cond_equal: out->print("[EQ]"); break; @@ -1980,12 +2134,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const { print_bytecode(out, bytecode()); in_opr()->print(out); out->print(" "); result_opr()->print(out); out->print(" "); -#if defined(PPC) || defined(AARCH64) - if(tmp1()->is_valid()) { - tmp1()->print(out); out->print(" "); - tmp2()->print(out); out->print(" "); + if(tmp()->is_valid()) { + tmp()->print(out); out->print(" "); } -#endif } void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) { @@ -2031,9 +2182,6 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const { // LIR_Op2 void LIR_Op2::print_instr(outputStream* out) const { - if (code() == lir_cmove) { - print_condition(out, condition()); out->print(" "); - } in_opr1()->print(out); out->print(" "); in_opr2()->print(out); out->print(" "); if (tmp1_opr()->is_valid()) { tmp1_opr()->print(out); out->print(" "); } @@ -2082,6 +2230,18 @@ void LIR_Op3::print_instr(outputStream* out) const { result_opr()->print(out); } +// LIR_Op4 +void LIR_Op4::print_instr(outputStream* out) const { + if (code() == lir_cmp_cmove) { + print_condition(out, condition()); out->print(" "); + } + in_opr1()->print(out); out->print(" "); + in_opr2()->print(out); out->print(" "); + in_opr3()->print(out); out->print(" "); + in_opr4()->print(out); out->print(" "); + result_opr()->print(out); +} + void LIR_OpLock::print_instr(outputStream* out) const { hdr_opr()->print(out); out->print(" "); @@ -2095,10 +2255,14 @@ void LIR_OpLock::print_instr(outputStream* out) const { #ifdef ASSERT void LIR_OpAssert::print_instr(outputStream* out) const { + tty->print_cr("function LIR_OpAssert::print_instr unimplemented yet! "); + Unimplemented(); + /* print_condition(out, condition()); out->print(" "); in_opr1()->print(out); out->print(" "); in_opr2()->print(out); out->print(", \""); out->print("%s", msg()); out->print("\""); + */ } #endif diff --git a/hotspot/src/share/vm/c1/c1_LIR.hpp b/hotspot/src/share/vm/c1/c1_LIR.hpp index 24b86202111..aec77afe1f8 100644 --- a/hotspot/src/share/vm/c1/c1_LIR.hpp +++ b/hotspot/src/share/vm/c1/c1_LIR.hpp @@ -22,6 +22,11 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ #ifndef SHARE_VM_C1_C1_LIR_HPP #define SHARE_VM_C1_C1_LIR_HPP @@ -452,7 +457,7 @@ class LIR_OprDesc: public CompilationResourceObj { // for compatibility with RInfo int fpu () const { return lo_reg_half(); } #endif -#if defined(SPARC) || defined(ARM) || defined(PPC) || defined(AARCH64) +#if defined(SPARC) || defined(ARM) || defined(PPC) || defined(AARCH64) || defined(LOONGARCH) FloatRegister as_float_reg () const; FloatRegister as_double_reg () const; #endif @@ -542,7 +547,7 @@ class LIR_Address: public LIR_OprPtr { , _type(type) , _disp(0) { verify(); } -#if defined(X86) || defined(ARM) || defined(AARCH64) +#if defined(X86) || defined(ARM) || defined(AARCH64) || defined(LOONGARCH) LIR_Address(LIR_Opr base, LIR_Opr index, Scale scale, intx disp, BasicType type): _base(base) , _index(index) @@ -658,7 +663,13 @@ class LIR_OprFact: public AllStatic { LIR_OprDesc::double_type | LIR_OprDesc::cpu_register | LIR_OprDesc::double_size); } -#endif // PPC +#elif defined(LOONGARCH) + static LIR_Opr double_fpu(int reg) { return (LIR_Opr)(intptr_t)((reg << LIR_OprDesc::reg1_shift) | + (reg << LIR_OprDesc::reg2_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::double_size); } +#endif // LOONGARCH static LIR_Opr virtual_register(int index, BasicType type) { LIR_Opr res; @@ -872,9 +883,11 @@ class LIR_OpConvert; class LIR_OpAllocObj; class LIR_OpRoundFP; class LIR_Op2; +class LIR_OpCmpBranch; class LIR_OpDelay; class LIR_Op3; class LIR_OpAllocArray; +class LIR_Op4; class LIR_OpCall; class LIR_OpJavaCall; class LIR_OpRTCall; @@ -943,6 +956,8 @@ enum LIR_Code { , lir_cmp_l2i , lir_ucmp_fd2i , lir_cmp_fd2i + , lir_cmp_branch + , lir_cmp_float_branch , lir_cmove , lir_add , lir_sub @@ -976,6 +991,9 @@ enum LIR_Code { , lir_idiv , lir_irem , end_op3 + , begin_op4 + , lir_cmp_cmove + , end_op4 , begin_opJavaCall , lir_static_call , lir_optvirtual_call @@ -1139,12 +1157,14 @@ class LIR_Op: public CompilationResourceObj { virtual LIR_OpAllocObj* as_OpAllocObj() { return NULL; } virtual LIR_OpRoundFP* as_OpRoundFP() { return NULL; } virtual LIR_OpBranch* as_OpBranch() { return NULL; } + virtual LIR_OpCmpBranch* as_OpCmpBranch() { return NULL; } virtual LIR_OpRTCall* as_OpRTCall() { return NULL; } virtual LIR_OpConvert* as_OpConvert() { return NULL; } virtual LIR_Op0* as_Op0() { return NULL; } virtual LIR_Op1* as_Op1() { return NULL; } virtual LIR_Op2* as_Op2() { return NULL; } virtual LIR_Op3* as_Op3() { return NULL; } + virtual LIR_Op4* as_Op4() { return NULL; } virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; } virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; } virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; } @@ -1474,37 +1494,18 @@ class LIR_OpConvert: public LIR_Op1 { private: Bytecodes::Code _bytecode; ConversionStub* _stub; -#if defined(PPC) || defined(AARCH64) - LIR_Opr _tmp1; - LIR_Opr _tmp2; -#endif + LIR_Opr _tmp; public: - LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub) + LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp) : LIR_Op1(lir_convert, opr, result) , _stub(stub) -#ifdef PPC - , _tmp1(LIR_OprDesc::illegalOpr()) - , _tmp2(LIR_OprDesc::illegalOpr()) -#endif + , _tmp(tmp) , _bytecode(code) {} -#if defined(PPC) || defined(AARCH64) - LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub - ,LIR_Opr tmp1, LIR_Opr tmp2) - : LIR_Op1(lir_convert, opr, result) - , _stub(stub) - , _tmp1(tmp1) - , _tmp2(tmp2) - , _bytecode(code) {} -#endif - Bytecodes::Code bytecode() const { return _bytecode; } ConversionStub* stub() const { return _stub; } -#if defined(PPC) || defined(AARCH64) - LIR_Opr tmp1() const { return _tmp1; } - LIR_Opr tmp2() const { return _tmp2; } -#endif + LIR_Opr tmp() const { return _tmp; } virtual void emit_code(LIR_Assembler* masm); virtual LIR_OpConvert* as_OpConvert() { return this; } @@ -1659,7 +1660,7 @@ class LIR_Op2: public LIR_Op { , _tmp3(LIR_OprFact::illegalOpr) , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) { - assert(code == lir_cmp || code == lir_assert, "code check"); + assert(code == lir_cmp || code == lir_cmp_branch || code == lir_cmp_float_branch || code == lir_assert, "code check"); } LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) @@ -1691,7 +1692,7 @@ class LIR_Op2: public LIR_Op { , _tmp3(LIR_OprFact::illegalOpr) , _tmp4(LIR_OprFact::illegalOpr) , _tmp5(LIR_OprFact::illegalOpr) { - assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); + assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr, @@ -1707,7 +1708,7 @@ class LIR_Op2: public LIR_Op { , _tmp3(tmp3) , _tmp4(tmp4) , _tmp5(tmp5) { - assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check"); + assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check"); } LIR_Opr in_opr1() const { return _opr1; } @@ -1719,10 +1720,12 @@ class LIR_Op2: public LIR_Op { LIR_Opr tmp4_opr() const { return _tmp4; } LIR_Opr tmp5_opr() const { return _tmp5; } LIR_Condition condition() const { - assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition; + assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); + return _condition; } void set_condition(LIR_Condition condition) { - assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove"); _condition = condition; + assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove, "only valid for cmp and cmove"); + _condition = condition; } void set_fpu_stack_size(int size) { _fpu_stack_size = size; } @@ -1736,6 +1739,43 @@ class LIR_Op2: public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; +class LIR_OpCmpBranch: public LIR_Op2 { + friend class LIR_OpVisitState; + + private: + Label* _label; + BlockBegin* _block; // if this is a branch to a block, this is the block + BlockBegin* _ublock; // if this is a float-branch, this is the unorderd block + CodeStub* _stub; // if this is a branch to a stub, this is the stub + + public: + LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl, CodeEmitInfo* info = NULL) + : LIR_Op2(lir_cmp_branch, cond, left, right, info) + , _label(lbl) + , _block(NULL) + , _ublock(NULL) + , _stub(NULL) { } + + LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info = NULL); + LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info = NULL); + + // for unordered comparisons + LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info = NULL); + + Label* label() const { return _label; } + BlockBegin* block() const { return _block; } + BlockBegin* ublock() const { return _ublock; } + CodeStub* stub() const { return _stub; } + + void change_block(BlockBegin* b); + void change_ublock(BlockBegin* b); + void negate_cond(); + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_OpCmpBranch* as_OpCmpBranch() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + class LIR_OpAllocArray : public LIR_Op { friend class LIR_OpVisitState; @@ -1776,7 +1816,6 @@ class LIR_OpAllocArray : public LIR_Op { virtual void print_instr(outputStream* out) const PRODUCT_RETURN; }; - class LIR_Op3: public LIR_Op { friend class LIR_OpVisitState; @@ -1800,6 +1839,48 @@ class LIR_Op3: public LIR_Op { }; +class LIR_Op4: public LIR_Op { + friend class LIR_OpVisitState; + + private: + LIR_Opr _opr1; + LIR_Opr _opr2; + LIR_Opr _opr3; + LIR_Opr _opr4; + BasicType _type; + LIR_Condition _condition; + + void verify() const; + + public: + LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, LIR_Opr result, BasicType type) + : LIR_Op(code, result, NULL) + , _opr1(opr1) + , _opr2(opr2) + , _opr3(opr3) + , _opr4(opr4) + , _type(type) + , _condition(condition) { + assert(is_in_range(code, begin_op4, end_op4), "code check"); + assert(type != T_ILLEGAL, "cmove should have type"); + } + LIR_Opr in_opr1() const { return _opr1; } + LIR_Opr in_opr2() const { return _opr2; } + LIR_Opr in_opr3() const { return _opr3; } + LIR_Opr in_opr4() const { return _opr4; } + BasicType type() const { return _type; } + LIR_Condition condition() const { + assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); return _condition; + } + void set_condition(LIR_Condition condition) { + assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); _condition = condition; + } + + virtual void emit_code(LIR_Assembler* masm); + virtual LIR_Op4* as_Op4() { return this; } + virtual void print_instr(outputStream* out) const PRODUCT_RETURN; +}; + //-------------------------------- class LabelObj: public CompilationResourceObj { private: @@ -2141,17 +2222,9 @@ class LIR_List: public CompilationResourceObj { void safepoint(LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op1(lir_safepoint, tmp, info)); } -#ifdef PPC - void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_OpConvert(code, left, dst, NULL, tmp1, tmp2)); } -#endif -#if defined(AARCH64) - void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, - ConversionStub* stub = NULL, LIR_Opr tmp1 = LIR_OprDesc::illegalOpr()) { - append(new LIR_OpConvert(code, left, dst, stub, tmp1, LIR_OprDesc::illegalOpr())); + void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) { + append(new LIR_OpConvert(code, left, dst, stub, tmp)); } -#else - void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); } -#endif void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and, left, right, dst)); } void logical_or (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or, left, right, dst)); } @@ -2256,6 +2329,48 @@ class LIR_List: public CompilationResourceObj { append(new LIR_OpBranch(cond, type, block, unordered)); } +#if defined(X86) || defined(AARCH64) + + template + void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL) { + cmp(condition, left, right, info); + branch(condition, type, tgt); + } + + void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { + cmp(condition, left, right); + branch(condition, type, block, unordered); + } + + void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + cmp(condition, left, right); + cmove(condition, src1, src2, dst, type); + } + +#endif + +#ifdef LOONGARCH + + template + void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL) { + append(new LIR_OpCmpBranch(condition, left, right, tgt, info)); + } + + void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { + append(new LIR_OpCmpBranch(condition, left, right, block, unordered)); + } + + void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { + append(new LIR_Op4(lir_cmp_cmove, condition, left, right, src1, src2, dst, type)); + } + +#endif + + template + void cmp_branch(LIR_Condition condition, LIR_Opr left, int right, BasicType type, T tgt, CodeEmitInfo* info = NULL) { + cmp_branch(condition, left, LIR_OprFact::intConst(right), type, tgt, info); + } + void shift_left(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp); void shift_right(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp); void unsigned_shift_right(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp); diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp index e5cd19f17a7..a18c53008bd 100644 --- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp +++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "c1/c1_Compilation.hpp" #include "c1/c1_Instruction.hpp" @@ -34,6 +40,10 @@ # include "nativeInst_x86.hpp" # include "vmreg_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "nativeInst_loongarch.hpp" +# include "vmreg_loongarch.inline.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "nativeInst_aarch64.hpp" # include "vmreg_aarch64.inline.hpp" @@ -811,6 +821,18 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) { } +void LIR_Assembler::emit_op4(LIR_Op4* op) { + switch (op->code()) { + case lir_cmp_cmove: + cmp_cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->in_opr3(), op->in_opr4(), op->result_opr(), op->type()); + break; + + default: + Unimplemented(); + break; + } +} + void LIR_Assembler::build_frame() { _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); } diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp index 1a68d458d23..ac0f4e7a462 100644 --- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp +++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_C1_C1_LIRASSEMBLER_HPP #define SHARE_VM_C1_C1_LIRASSEMBLER_HPP @@ -195,7 +201,9 @@ class LIR_Assembler: public CompilationResourceObj { void emit_op1(LIR_Op1* op); void emit_op2(LIR_Op2* op); void emit_op3(LIR_Op3* op); + void emit_op4(LIR_Op4* op); void emit_opBranch(LIR_OpBranch* op); + void emit_opCmpBranch(LIR_OpCmpBranch* op); void emit_opLabel(LIR_OpLabel* op); void emit_arraycopy(LIR_OpArrayCopy* op); void emit_updatecrc32(LIR_OpUpdateCRC32* op); @@ -227,6 +235,7 @@ class LIR_Assembler: public CompilationResourceObj { void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); // info set for null exceptions void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op); + void cmp_cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type); void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type); void call( LIR_OpJavaCall* op, relocInfo::relocType rtype); @@ -265,6 +274,9 @@ class LIR_Assembler: public CompilationResourceObj { #ifdef TARGET_ARCH_x86 # include "c1_LIRAssembler_x86.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "c1_LIRAssembler_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "c1_LIRAssembler_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp index e98834d03a5..d1a987c6991 100644 --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "c1/c1_Defs.hpp" #include "c1/c1_Compilation.hpp" @@ -483,13 +489,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) { CodeStub* stub = new RangeCheckStub(range_check_info, index); if (index->is_constant()) { - cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(), - index->as_jint(), null_check_info); - __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch + cmp_mem_int_branch(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(), + index->as_jint(), stub, null_check_info); // forward branch } else { - cmp_reg_mem(lir_cond_aboveEqual, index, array, - arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info); - __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch + cmp_reg_mem_branch(lir_cond_aboveEqual, index, array, arrayOopDesc::length_offset_in_bytes(), + T_INT, stub, null_check_info); // forward branch } } @@ -497,12 +501,10 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index, void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) { CodeStub* stub = new RangeCheckStub(info, index, true); if (index->is_constant()) { - cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info); - __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch + cmp_mem_int_branch(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), stub, info); // forward branch } else { - cmp_reg_mem(lir_cond_aboveEqual, index, buffer, - java_nio_Buffer::limit_offset(), T_INT, info); - __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch + cmp_reg_mem_branch(lir_cond_aboveEqual, index, buffer, + java_nio_Buffer::limit_offset(), T_INT, stub, info); // forward branch } __ move(index, result); } @@ -935,7 +937,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) { return tmp; } -void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { +void LIRGenerator::profile_branch(If* if_instr, If::Condition cond, LIR_Opr left, LIR_Opr right) { if (if_instr->should_profile()) { ciMethod* method = if_instr->profiled_method(); assert(method != NULL, "method should be set if branch is profiled"); @@ -956,10 +958,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) { __ metadata2reg(md->constant_encoding(), md_reg); LIR_Opr data_offset_reg = new_pointer_register(); - __ cmove(lir_cond(cond), - LIR_OprFact::intptrConst(taken_count_offset), - LIR_OprFact::intptrConst(not_taken_count_offset), - data_offset_reg, as_BasicType(if_instr->x()->type())); + if (left == LIR_OprFact::illegalOpr && right == LIR_OprFact::illegalOpr) { + __ cmove(lir_cond(cond), + LIR_OprFact::intptrConst(taken_count_offset), + LIR_OprFact::intptrConst(not_taken_count_offset), + data_offset_reg, as_BasicType(if_instr->x()->type())); + } else { + __ cmp_cmove(lir_cond(cond), left, right, + LIR_OprFact::intptrConst(taken_count_offset), + LIR_OprFact::intptrConst(not_taken_count_offset), + data_offset_reg, as_BasicType(if_instr->x()->type())); + } // MDO cells are intptr_t, so the data_reg width is arch-dependent. LIR_Opr data_reg = new_pointer_register(); @@ -1306,8 +1315,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) { } __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info); - __ cmp(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0)); - __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); + __ cmp_cmove(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0), + LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN); } // Example: Thread.currentThread() @@ -1500,7 +1509,6 @@ void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr p // Read the marking-in-progress flag. LIR_Opr flag_val = new_register(T_INT); __ load(mark_active_flag_addr, flag_val); - __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0)); LIR_PatchCode pre_val_patch_code = lir_patch_none; @@ -1529,7 +1537,7 @@ void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr p slow = new G1PreBarrierStub(pre_val); } - __ branch(lir_cond_notEqual, T_INT, slow); + __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow); __ branch_destination(slow->continuation()); } @@ -1587,10 +1595,8 @@ void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_Opr } assert(new_val->is_register(), "must be a register at this point"); - __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD)); - CodeStub* slow = new G1PostBarrierStub(addr, new_val); - __ branch(lir_cond_notEqual, LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow); + __ cmp_branch(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD), T_INT, slow); __ branch_destination(slow->continuation()); } @@ -1860,12 +1866,10 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) { CodeEmitInfo* info = state_for(x); CodeStub* stub = new RangeCheckStub(info, index.result(), true); if (index.result()->is_constant()) { - cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info); - __ branch(lir_cond_belowEqual, T_INT, stub); + cmp_mem_int_branch(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), stub, info); } else { - cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(), - java_nio_Buffer::limit_offset(), T_INT, info); - __ branch(lir_cond_aboveEqual, T_INT, stub); + cmp_reg_mem_branch(lir_cond_aboveEqual, index.result(), buf.result(), + java_nio_Buffer::limit_offset(), T_INT, stub, info); } __ move(index.result(), result); } else { @@ -1946,8 +1950,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) { } else if (use_length) { // TODO: use a (modified) version of array_range_check that does not require a // constant length to be loaded to a register - __ cmp(lir_cond_belowEqual, length.result(), index.result()); - __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result())); + CodeStub* stub = new RangeCheckStub(range_check_info, index.result()); + __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub); } else { array_range_check(array.result(), index.result(), null_check_info, range_check_info); // The range check performs the null check, so clear it out for the load @@ -2129,7 +2133,7 @@ void LIRGenerator::do_UnsafeGetRaw(UnsafeGetRaw* x) { assert(index_op->type() == T_INT, "only int constants supported"); addr = new LIR_Address(base_op, index_op->as_jint(), dst_type); } else { -#if defined(X86) || defined(AARCH64) +#if defined(X86) || defined(AARCH64) || defined(LOONGARCH) addr = new LIR_Address(base_op, index_op, LIR_Address::Scale(log2_scale), 0, dst_type); #elif defined(GENERATE_ADDRESS_IS_PREFERRED) addr = generate_address(base_op, index_op, log2_scale, 0, dst_type); @@ -2344,19 +2348,18 @@ void LIRGenerator::do_UnsafeGetObject(UnsafeGetObject* x) { if (off.type()->is_int()) { referent_off = LIR_OprFact::intConst(java_lang_ref_Reference::referent_offset); + __ cmp_branch(lir_cond_notEqual, off.result(), referent_off, T_INT, Lcont->label()); } else { assert(off.type()->is_long(), "what else?"); referent_off = new_register(T_LONG); __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off); + __ cmp_branch(lir_cond_notEqual, off.result(), referent_off, T_LONG, Lcont->label()); } - __ cmp(lir_cond_notEqual, off.result(), referent_off); - __ branch(lir_cond_notEqual, as_BasicType(off.type()), Lcont->label()); } if (gen_source_check) { // offset is a const and equals referent offset // if (source == null) -> continue - __ cmp(lir_cond_equal, src_reg, LIR_OprFact::oopConst(NULL)); - __ branch(lir_cond_equal, T_OBJECT, Lcont->label()); + __ cmp_branch(lir_cond_equal, src_reg, LIR_OprFact::oopConst(NULL), T_OBJECT, Lcont->label()); } LIR_Opr src_klass = new_register(T_METADATA); if (gen_type_check) { @@ -2366,8 +2369,7 @@ void LIRGenerator::do_UnsafeGetObject(UnsafeGetObject* x) { LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(InstanceKlass::reference_type_offset()), T_BYTE); LIR_Opr reference_type = new_register(T_INT); __ move(reference_type_addr, reference_type); - __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE)); - __ branch(lir_cond_equal, T_INT, Lcont->label()); + __ cmp_branch(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE), T_INT, Lcont->label()); } { // We have determined that src->_klass->_reference_type != REF_NONE @@ -2447,19 +2449,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi int high_key = one_range->high_key(); BlockBegin* dest = one_range->sux(); if (low_key == high_key) { - __ cmp(lir_cond_equal, value, low_key); - __ branch(lir_cond_equal, T_INT, dest); + __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest); } else if (high_key - low_key == 1) { - __ cmp(lir_cond_equal, value, low_key); - __ branch(lir_cond_equal, T_INT, dest); - __ cmp(lir_cond_equal, value, high_key); - __ branch(lir_cond_equal, T_INT, dest); + __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest); + __ cmp_branch(lir_cond_equal, value, high_key, T_INT, dest); } else { LabelObj* L = new LabelObj(); - __ cmp(lir_cond_less, value, low_key); - __ branch(lir_cond_less, T_INT, L->label()); - __ cmp(lir_cond_lessEqual, value, high_key); - __ branch(lir_cond_lessEqual, T_INT, dest); + __ cmp_branch(lir_cond_less, value, low_key, T_INT, L->label()); + __ cmp_branch(lir_cond_lessEqual, value, high_key, T_INT, dest); __ branch_destination(L->label()); } } @@ -2546,8 +2543,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) { do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux()); } else { for (int i = 0; i < len; i++) { - __ cmp(lir_cond_equal, value, i + lo_key); - __ branch(lir_cond_equal, T_INT, x->sux_at(i)); + __ cmp_branch(lir_cond_equal, value, i + lo_key, T_INT, x->sux_at(i)); } __ jump(x->default_sux()); } @@ -2572,8 +2568,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) { } else { int len = x->length(); for (int i = 0; i < len; i++) { - __ cmp(lir_cond_equal, value, x->key_at(i)); - __ branch(lir_cond_equal, T_INT, x->sux_at(i)); + __ cmp_branch(lir_cond_equal, value, x->key_at(i), T_INT, x->sux_at(i)); } __ jump(x->default_sux()); } @@ -2625,7 +2620,6 @@ void LIRGenerator::do_Goto(Goto* x) { } LIR_Opr md_reg = new_register(T_METADATA); __ metadata2reg(md->constant_encoding(), md_reg); - increment_counter(new LIR_Address(md_reg, offset, NOT_LP64(T_INT) LP64_ONLY(T_LONG)), DataLayout::counter_increment); } @@ -3079,8 +3073,8 @@ void LIRGenerator::do_IfOp(IfOp* x) { f_val.dont_load_item(); LIR_Opr reg = rlock_result(x); - __ cmp(lir_cond(x->cond()), left.result(), right.result()); - __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); + __ cmp_cmove(lir_cond(x->cond()), left.result(), right.result(), + t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type())); } #ifdef JFR_HAVE_INTRINSICS @@ -3120,8 +3114,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) { T_OBJECT); LIR_Opr result = rlock_result(x); __ move_wide(jobj_addr, result); - __ cmp(lir_cond_equal, result, LIR_OprFact::oopConst(NULL)); - __ branch(lir_cond_equal, T_OBJECT, L_end->label()); + __ cmp_branch(lir_cond_equal, result, LIR_OprFact::oopConst(0), T_OBJECT, L_end->label()); __ move_wide(new LIR_Address(result, T_OBJECT), result); __ branch_destination(L_end->label()); @@ -3485,10 +3478,9 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info, LIR_Opr meth = new_register(T_METADATA); __ metadata2reg(method->constant_encoding(), meth); __ logical_and(result, mask, result); - __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0)); // The bci for info can point to cmp for if's we want the if bci CodeStub* overflow = new CounterOverflowStub(info, bci, meth); - __ branch(lir_cond_equal, T_INT, overflow); + __ cmp_branch(lir_cond_equal, result, LIR_OprFact::intConst(0), T_INT, overflow); __ branch_destination(overflow->continuation()); } } @@ -3600,8 +3592,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) { CodeEmitInfo *info = state_for(x, x->state()); CodeStub* stub = new PredicateFailedStub(info); - __ cmp(lir_cond(cond), left, right); - __ branch(lir_cond(cond), right->type(), stub); + __ cmp_branch(lir_cond(cond), left, right, right->type(), stub); } } @@ -3749,8 +3740,7 @@ LIR_Opr LIRGenerator::maybe_mask_boolean(StoreIndexed* x, LIR_Opr array, LIR_Opr __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout); int diffbit = Klass::layout_helper_boolean_diffbit(); __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout); - __ cmp(lir_cond_notEqual, layout, LIR_OprFact::intConst(0)); - __ cmove(lir_cond_notEqual, value_fixed, value, value_fixed, T_BYTE); + __ cmp_cmove(lir_cond_notEqual, layout, LIR_OprFact::intConst(0), value_fixed, value, value_fixed, T_BYTE); value = value_fixed; } return value; diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp index 27be79fee14..57c253db690 100644 --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2015. These + * modifications are Copyright (c) 2015 Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_C1_C1_LIRGENERATOR_HPP #define SHARE_VM_C1_C1_LIRGENERATOR_HPP @@ -246,6 +252,9 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { void do_getClass(Intrinsic* x); void do_currentThread(Intrinsic* x); void do_MathIntrinsic(Intrinsic* x); +#if defined(LOONGARCH64) + void do_LibmIntrinsic(Intrinsic* x); +#endif void do_ArrayCopy(Intrinsic* x); void do_CompareAndSwap(Intrinsic* x, ValueType* type); void do_NIOCheckIndex(Intrinsic* x); @@ -335,8 +344,10 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { void new_instance (LIR_Opr dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr scratch1, LIR_Opr scratch2, LIR_Opr scratch3, LIR_Opr scratch4, LIR_Opr klass_reg, CodeEmitInfo* info); // machine dependent - void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info); - void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info); + template + void cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info); + template + void cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info); void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info); void arraycopy_helper(Intrinsic* x, int* flags, ciArrayKlass** expected_type); @@ -364,7 +375,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { LIR_Opr safepoint_poll_register(); - void profile_branch(If* if_instr, If::Condition cond); + void profile_branch(If* if_instr, If::Condition cond, LIR_Opr left = LIR_OprFact::illegalOpr, LIR_Opr right = LIR_OprFact::illegalOpr); void increment_event_counter_impl(CodeEmitInfo* info, ciMethod *method, int frequency, int bci, bool backedge, bool notify); diff --git a/hotspot/src/share/vm/c1/c1_LinearScan.cpp b/hotspot/src/share/vm/c1/c1_LinearScan.cpp index 1f6281bf250..4549ff09282 100644 --- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp +++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "c1/c1_CFGPrinter.hpp" #include "c1/c1_CodeStubs.hpp" @@ -35,6 +41,9 @@ #ifdef TARGET_ARCH_x86 # include "vmreg_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "vmreg_loongarch.inline.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "vmreg_aarch64.inline.hpp" #endif @@ -1256,6 +1265,23 @@ void LinearScan::add_register_hints(LIR_Op* op) { LIR_Opr move_from = cmove->in_opr1(); LIR_Opr move_to = cmove->result_opr(); + if (move_to->is_register() && move_from->is_register()) { + Interval* from = interval_at(reg_num(move_from)); + Interval* to = interval_at(reg_num(move_to)); + if (from != NULL && to != NULL) { + to->set_register_hint(from); + TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num())); + } + } + break; + } + case lir_cmp_cmove: { + assert(op->as_Op4() != NULL, "lir_cmp_cmove must be LIR_Op4"); + LIR_Op4* cmove = (LIR_Op4*)op; + + LIR_Opr move_from = cmove->in_opr3(); + LIR_Opr move_to = cmove->result_opr(); + if (move_to->is_register() && move_from->is_register()) { Interval* from = interval_at(reg_num(move_from)); Interval* to = interval_at(reg_num(move_to)); @@ -2104,7 +2130,7 @@ LIR_Opr LinearScan::calc_operand_for_interval(const Interval* interval) { #ifdef _LP64 return LIR_OprFact::double_cpu(assigned_reg, assigned_reg); #else -#if defined(SPARC) || defined(PPC) +#if defined(SPARC) || defined(PPC) || defined(LOONGARCH) return LIR_OprFact::double_cpu(assigned_regHi, assigned_reg); #else return LIR_OprFact::double_cpu(assigned_reg, assigned_regHi); @@ -3285,7 +3311,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() { check_live = (move->patch_code() == lir_patch_none); } LIR_OpBranch* branch = op->as_OpBranch(); - if (branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) { + LIR_OpCmpBranch* cmp_branch = op->as_OpCmpBranch(); + if ((branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) || + (cmp_branch != NULL && cmp_branch->stub() != NULL && cmp_branch->stub()->is_exception_throw_stub())) { // Don't bother checking the stub in this case since the // exception stub will never return to normal control flow. check_live = false; @@ -6142,6 +6170,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch"); LIR_OpBranch* branch = (LIR_OpBranch*)op; + if (branch->block() == target_from) { + branch->change_block(target_to); + } + if (branch->ublock() == target_from) { + branch->change_ublock(target_to); + } + } else if (op->code() == lir_cmp_branch || op->code() == lir_cmp_float_branch) { + assert(op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch"); + LIR_OpCmpBranch* branch = (LIR_OpCmpBranch*)op; + if (branch->block() == target_from) { branch->change_block(target_to); } @@ -6252,6 +6290,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { instructions->truncate(instructions->length() - 1); } } + } else if (prev_op->code() == lir_cmp_branch || prev_op->code() == lir_cmp_float_branch) { + assert(prev_op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch"); + LIR_OpCmpBranch* prev_branch = (LIR_OpCmpBranch*)prev_op; + + if (prev_branch->stub() == NULL) { + if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) { + TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id())); + + // eliminate a conditional branch to the immediate successor + prev_branch->change_block(last_branch->block()); + prev_branch->negate_cond(); + instructions->trunc_to(instructions->length() - 1); + } + } } } } @@ -6328,6 +6380,13 @@ void ControlFlowOptimizer::verify(BlockList* code) { assert(op_branch->block() == NULL || code->index_of(op_branch->block()) != -1, "branch target not valid"); assert(op_branch->ublock() == NULL || code->index_of(op_branch->ublock()) != -1, "branch target not valid"); } + + LIR_OpCmpBranch* op_cmp_branch = instructions->at(j)->as_OpCmpBranch(); + + if (op_cmp_branch != NULL) { + assert(op_cmp_branch->block() == NULL || code->find(op_cmp_branch->block()) != -1, "branch target not valid"); + assert(op_cmp_branch->ublock() == NULL || code->find(op_cmp_branch->ublock()) != -1, "branch target not valid"); + } } for (j = 0; j < block->number_of_sux() - 1; j++) { @@ -6571,6 +6630,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) { break; } + case lir_cmp_branch: + case lir_cmp_float_branch: { + LIR_OpCmpBranch* branch = op->as_OpCmpBranch(); + if (branch->block() == NULL) { + inc_counter(counter_stub_branch); + } else { + inc_counter(counter_cond_branch); + } + inc_counter(counter_cmp); + break; + } + + case lir_cmp_cmove: { + inc_counter(counter_misc_inst); + inc_counter(counter_cmp); + break; + } + case lir_neg: case lir_add: case lir_sub: diff --git a/hotspot/src/share/vm/c1/c1_LinearScan.hpp b/hotspot/src/share/vm/c1/c1_LinearScan.hpp index 96e6b3babff..576a07d73dc 100644 --- a/hotspot/src/share/vm/c1/c1_LinearScan.hpp +++ b/hotspot/src/share/vm/c1/c1_LinearScan.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_C1_C1_LINEARSCAN_HPP #define SHARE_VM_C1_C1_LINEARSCAN_HPP @@ -976,6 +982,9 @@ class LinearScanTimers : public StackObj { #ifdef TARGET_ARCH_x86 # include "c1_LinearScan_x86.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "c1_LinearScan_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "c1_LinearScan_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp b/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp index 7e22bbaa270..12aca7bf50c 100644 --- a/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp +++ b/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_C1_C1_MACROASSEMBLER_HPP #define SHARE_VM_C1_C1_MACROASSEMBLER_HPP @@ -50,6 +56,9 @@ class C1_MacroAssembler: public MacroAssembler { #ifdef TARGET_ARCH_x86 # include "c1_MacroAssembler_x86.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "c1_MacroAssembler_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "c1_MacroAssembler_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/c1/c1_Runtime1.cpp b/hotspot/src/share/vm/c1/c1_Runtime1.cpp index aebc377527c..f1253506f67 100644 --- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp +++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "asm/codeBuffer.hpp" #include "c1/c1_CodeStubs.hpp" @@ -710,6 +716,7 @@ JRT_ENTRY(void, Runtime1::deoptimize(JavaThread* thread)) // Return to the now deoptimized frame. JRT_END +#ifndef LOONGARCH static Klass* resolve_field_return_klass(methodHandle caller, int bci, TRAPS) { Bytecode_field field_access(caller, bci); @@ -1186,6 +1193,47 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_i } JRT_END +#else + +JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_id )) +{ + RegisterMap reg_map(thread, false); + + NOT_PRODUCT(_patch_code_slowcase_cnt++;) + // According to the LoongArch, "Concurrent modification and + // execution of instructions can lead to the resulting instruction + // performing any behavior that can be achieved by executing any + // sequence of instructions that can be executed from the same + // Exception level, except where the instruction before + // modification and the instruction after modification is a B, BL, + // NOP, BRK instruction." + // + // This effectively makes the games we play when patching + // impossible, so when we come across an access that needs + // patching we must deoptimize. + + if (TracePatching) { + tty->print_cr("Deoptimizing because patch is needed"); + } + + frame runtime_frame = thread->last_frame(); + frame caller_frame = runtime_frame.sender(®_map); + + // It's possible the nmethod was invalidated in the last + // safepoint, but if it's still alive then make it not_entrant. + nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); + if (nm != NULL) { + nm->make_not_entrant(); + } + + Deoptimization::deoptimize_frame(thread, caller_frame.id()); + + // Return to the now deoptimized frame. +} +JRT_END + +#endif + // // Entry point for compiled code. We want to patch a nmethod. // We don't do a normal VM transition here because we want to diff --git a/hotspot/src/share/vm/c1/c1_globals.hpp b/hotspot/src/share/vm/c1/c1_globals.hpp index 8f7f9f61c90..0e2d926bdfb 100644 --- a/hotspot/src/share/vm/c1/c1_globals.hpp +++ b/hotspot/src/share/vm/c1/c1_globals.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_C1_C1_GLOBALS_HPP #define SHARE_VM_C1_C1_GLOBALS_HPP @@ -29,6 +35,9 @@ #ifdef TARGET_ARCH_x86 # include "c1_globals_x86.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "c1_globals_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "c1_globals_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp b/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp index f067419ffcd..5aa19dc84f0 100644 --- a/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp +++ b/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/bytecodeAssembler.hpp" @@ -32,6 +38,12 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif #ifdef TARGET_ARCH_sparc # include "bytes_sparc.hpp" #endif diff --git a/hotspot/src/share/vm/classfile/classFileStream.hpp b/hotspot/src/share/vm/classfile/classFileStream.hpp index 9632c8c8c24..fad25c44fc3 100644 --- a/hotspot/src/share/vm/classfile/classFileStream.hpp +++ b/hotspot/src/share/vm/classfile/classFileStream.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_CLASSFILE_CLASSFILESTREAM_HPP #define SHARE_VM_CLASSFILE_CLASSFILESTREAM_HPP @@ -29,6 +35,12 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "bytes_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/classfile/stackMapTable.hpp b/hotspot/src/share/vm/classfile/stackMapTable.hpp index a36a7ba3cfd..d7c1f086442 100644 --- a/hotspot/src/share/vm/classfile/stackMapTable.hpp +++ b/hotspot/src/share/vm/classfile/stackMapTable.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP #define SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP @@ -34,6 +40,12 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "bytes_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/classfile/verifier.cpp b/hotspot/src/share/vm/classfile/verifier.cpp index 2dddd1fdedc..4a20d15f311 100644 --- a/hotspot/src/share/vm/classfile/verifier.cpp +++ b/hotspot/src/share/vm/classfile/verifier.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/classFileStream.hpp" #include "classfile/javaClasses.hpp" @@ -48,6 +54,12 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "bytes_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/code/codeBlob.cpp b/hotspot/src/share/vm/code/codeBlob.cpp index aff2aaf0ca7..9ba76007cd0 100644 --- a/hotspot/src/share/vm/code/codeBlob.cpp +++ b/hotspot/src/share/vm/code/codeBlob.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "code/codeBlob.hpp" #include "code/codeCache.hpp" @@ -57,6 +63,12 @@ #ifdef TARGET_ARCH_ppc # include "nativeInst_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "nativeInst_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "nativeInst_loongarch.hpp" +#endif #ifdef COMPILER1 #include "c1/c1_Runtime1.hpp" #endif diff --git a/hotspot/src/share/vm/code/compiledIC.hpp b/hotspot/src/share/vm/code/compiledIC.hpp index f910f11886e..e282a3f3afe 100644 --- a/hotspot/src/share/vm/code/compiledIC.hpp +++ b/hotspot/src/share/vm/code/compiledIC.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_CODE_COMPILEDIC_HPP #define SHARE_VM_CODE_COMPILEDIC_HPP @@ -45,6 +51,12 @@ #ifdef TARGET_ARCH_ppc # include "nativeInst_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "nativeInst_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "nativeInst_loongarch.hpp" +#endif //----------------------------------------------------------------------------- // The CompiledIC represents a compiled inline cache. diff --git a/hotspot/src/share/vm/code/relocInfo.hpp b/hotspot/src/share/vm/code/relocInfo.hpp index ad55a2fd93a..813504821d3 100644 --- a/hotspot/src/share/vm/code/relocInfo.hpp +++ b/hotspot/src/share/vm/code/relocInfo.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021. These + * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_CODE_RELOCINFO_HPP #define SHARE_VM_CODE_RELOCINFO_HPP @@ -261,7 +267,11 @@ class relocInfo VALUE_OBJ_CLASS_SPEC { poll_return_type = 11, // polling instruction for safepoints at return metadata_type = 12, // metadata that used to be oops trampoline_stub_type = 13, // stub-entry for trampoline +#if !defined MIPS64 yet_unused_type_1 = 14, // Still unused +#else + internal_pc_type = 14, // tag for internal data,?? +#endif data_prefix_tag = 15, // tag for a prefix (carries data arguments) type_mask = 15 // A mask which selects only the above values }; @@ -288,6 +298,7 @@ class relocInfo VALUE_OBJ_CLASS_SPEC { ; #endif +#if defined MIPS64 && !defined ZERO #define APPLY_TO_RELOCATIONS(visitor) \ visitor(oop) \ visitor(metadata) \ @@ -300,9 +311,26 @@ class relocInfo VALUE_OBJ_CLASS_SPEC { visitor(internal_word) \ visitor(poll) \ visitor(poll_return) \ - visitor(section_word) \ visitor(trampoline_stub) \ + visitor(internal_pc) \ +#else + #define APPLY_TO_RELOCATIONS(visitor) \ + visitor(oop) \ + visitor(metadata) \ + visitor(virtual_call) \ + visitor(opt_virtual_call) \ + visitor(static_call) \ + visitor(static_stub) \ + visitor(runtime_call) \ + visitor(external_word) \ + visitor(internal_word) \ + visitor(poll) \ + visitor(poll_return) \ + visitor(trampoline_stub) \ + visitor(section_word) \ + +#endif public: enum { @@ -432,6 +460,12 @@ class relocInfo VALUE_OBJ_CLASS_SPEC { #endif #ifdef TARGET_ARCH_ppc # include "relocInfo_ppc.hpp" +#endif +#ifdef TARGET_ARCH_mips +# include "relocInfo_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "relocInfo_loongarch.hpp" #endif @@ -1024,6 +1058,15 @@ class metadata_Relocation : public DataRelocation { // Note: metadata_value transparently converts Universe::non_metadata_word to NULL. }; +#if defined MIPS64 +// to handle the set_last_java_frame pc +class internal_pc_Relocation : public Relocation { + relocInfo::relocType type() { return relocInfo::internal_pc_type; } + public: + address pc() { return pd_get_address_from_code(); } + void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest); +}; +#endif class virtual_call_Relocation : public CallRelocation { relocInfo::relocType type() { return relocInfo::virtual_call_type; } diff --git a/hotspot/src/share/vm/code/vmreg.hpp b/hotspot/src/share/vm/code/vmreg.hpp index 07b595b60a0..5bc7131a8a0 100644 --- a/hotspot/src/share/vm/code/vmreg.hpp +++ b/hotspot/src/share/vm/code/vmreg.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_CODE_VMREG_HPP #define SHARE_VM_CODE_VMREG_HPP @@ -47,6 +53,12 @@ #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/adGlobals_ppc_64.hpp" #endif +#ifdef TARGET_ARCH_MODEL_mips_64 +# include "adfiles/adGlobals_mips_64.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/adGlobals_loongarch_64.hpp" +#endif #endif //------------------------------VMReg------------------------------------------ @@ -158,6 +170,12 @@ friend class OptoReg; #ifdef TARGET_ARCH_x86 # include "vmreg_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "vmreg_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "vmreg_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "vmreg_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/compiler/disassembler.cpp b/hotspot/src/share/vm/compiler/disassembler.cpp index dfdd5f77e79..2dd0ff69ac4 100644 --- a/hotspot/src/share/vm/compiler/disassembler.cpp +++ b/hotspot/src/share/vm/compiler/disassembler.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/javaClasses.hpp" #include "code/codeCache.hpp" @@ -50,6 +56,12 @@ #ifdef TARGET_ARCH_ppc # include "depChecker_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "depChecker_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "depChecker_loongarch.hpp" +#endif #ifdef SHARK #include "shark/sharkEntry.hpp" #endif diff --git a/hotspot/src/share/vm/compiler/disassembler.hpp b/hotspot/src/share/vm/compiler/disassembler.hpp index 168851cc264..8b632748f23 100644 --- a/hotspot/src/share/vm/compiler/disassembler.hpp +++ b/hotspot/src/share/vm/compiler/disassembler.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_COMPILER_DISASSEMBLER_HPP #define SHARE_VM_COMPILER_DISASSEMBLER_HPP @@ -95,6 +101,12 @@ class Disassembler { #endif #ifdef TARGET_ARCH_ppc # include "disassembler_ppc.hpp" +#endif +#ifdef TARGET_ARCH_mips +# include "disassembler_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "disassembler_loongarch.hpp" #endif diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp index 733b5c91ad9..678a1ee8367 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp @@ -86,6 +86,9 @@ class CardTableExtension : public CardTableModRefBS { void inline_write_ref_field_gc(void* field, oop new_val) { jbyte* byte = byte_for(field); *byte = youngergen_card; +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif } // Adaptive size policy support diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp index 1dde10746d2..8b800b31c55 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp @@ -105,6 +105,9 @@ ParMarkBitMap::mark_obj(HeapWord* addr, size_t size) assert(end_bit_ok, "concurrency problem"); DEBUG_ONLY(Atomic::inc_ptr(&mark_bitmap_count)); DEBUG_ONLY(Atomic::add_ptr(size, &mark_bitmap_size)); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif return true; } return false; diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp index 6cf76353d9c..4d34bc209bd 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp @@ -33,6 +33,9 @@ void ParCompactionManager::push_objarray(oop obj, size_t index) ObjArrayTask task(obj, index); assert(task.is_valid(), "bad ObjArrayTask"); _objarray_stack.push(task); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif } void ParCompactionManager::push_region(size_t index) @@ -44,6 +47,9 @@ void ParCompactionManager::push_region(size_t index) assert(region_ptr->_pushed++ == 0, "should only be pushed once"); #endif region_stack()->push(index); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif } #endif // SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSCOMPACTIONMANAGER_INLINE_HPP diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp index 0fa980ef83c..2f66493e0a8 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp @@ -499,6 +499,9 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len) if (beg_region == end_region) { // All in one region. _region_data[beg_region].add_live_obj(len); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif return; } @@ -517,6 +520,9 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len) const size_t end_ofs = region_offset(addr + len - 1); _region_data[end_region].set_partial_obj_size(end_ofs + 1); _region_data[end_region].set_partial_obj_addr(addr); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif } void @@ -3229,6 +3235,9 @@ void PSParallelCompact::fill_blocks(size_t region_idx) if (new_block != cur_block) { cur_block = new_block; sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits)); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif } const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end); diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp index 881f380ceab..461b83930ff 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp @@ -1329,6 +1329,9 @@ inline bool PSParallelCompact::mark_obj(oop obj) { const int obj_size = obj->size(); if (mark_bitmap()->mark_obj(obj, obj_size)) { _summary_data.add_obj(obj, obj_size); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif return true; } else { return false; @@ -1363,6 +1366,9 @@ inline void PSParallelCompact::mark_and_push(ParCompactionManager* cm, T* p) { oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); if (mark_bitmap()->is_unmarked(obj) && mark_obj(obj)) { cm->push(obj); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif } } } diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp index a33132009c3..291019660a8 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp @@ -41,8 +41,9 @@ template inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) { if (p != NULL) { // XXX: error if p != NULL here oop o = oopDesc::load_decode_heap_oop_not_null(p); - if (o->is_forwarded()) { - o = o->forwardee(); + markOop m = o->mark(); + if (m->is_marked()) { + o = (oop) m->decode_pointer(); // Card mark if (PSScavenge::is_obj_in_young(o)) { PSScavenge::card_table()->inline_write_ref_field_gc(p, o); @@ -102,11 +103,19 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) { oop new_obj = NULL; +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + // NOTE! We must be very careful with any methods that access the mark // in o. There may be multiple threads racing on it, and it may be forwarded // at any time. Do not use oop methods for accessing the mark! markOop test_mark = o->mark(); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + // The same test as "o->is_forwarded()" if (!test_mark->is_marked()) { bool new_obj_is_tenured = false; @@ -141,6 +150,10 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) { } } } + +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif } } @@ -200,6 +213,9 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) { // Copy obj Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif // Now we have to CAS in the header. if (o->cas_forward_to(new_obj, test_mark)) { @@ -247,6 +263,10 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) { // don't update this before the unallocation! new_obj = o->forwardee(); } + +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif } else { assert(o->is_forwarded(), "Sanity"); new_obj = o->forwardee(); diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp index 1a722a7ca72..4980be3946c 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp @@ -71,14 +71,22 @@ inline void PSScavenge::copy_and_push_safe_barrier(PSPromotionManager* pm, assert(should_scavenge(p, true), "revisiting object?"); oop o = oopDesc::load_decode_heap_oop_not_null(p); - oop new_obj = o->is_forwarded() - ? o->forwardee() - : pm->copy_to_survivor_space(o); +#if defined MIPS || defined LOONGARCH + if (oopDesc::is_null(o)) return; +#endif + + oop new_obj; + markOop m = o->mark(); + if (m->is_marked()) { + new_obj = (oop) m->decode_pointer(); + } else { + new_obj = pm->copy_to_survivor_space(o); + } #ifndef PRODUCT // This code must come after the CAS test, or it will print incorrect // information. - if (TraceScavenge && o->is_forwarded()) { + if (TraceScavenge && m->is_marked()) { gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}", "forwarding", new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size()); @@ -138,8 +146,9 @@ class PSScavengeFromKlassClosure: public OopClosure { oop o = *p; oop new_obj; - if (o->is_forwarded()) { - new_obj = o->forwardee(); + markOop m = o->mark(); + if (m->is_marked()) { + new_obj = (oop) m->decode_pointer(); } else { new_obj = _pm->copy_to_survivor_space(o); } diff --git a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp index e14c50bf01c..8b3860070c1 100644 --- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp +++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_ABSTRACTINTERPRETER_HPP #define SHARE_VM_INTERPRETER_ABSTRACTINTERPRETER_HPP @@ -42,6 +48,10 @@ # include "interp_masm_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "interp_masm_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "interp_masm_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "interp_masm_loongarch_64.hpp" #endif // This file contains the platform-independent parts diff --git a/hotspot/src/share/vm/interpreter/bytecode.hpp b/hotspot/src/share/vm/interpreter/bytecode.hpp index 7e55fd009a2..a06dcd58bc3 100644 --- a/hotspot/src/share/vm/interpreter/bytecode.hpp +++ b/hotspot/src/share/vm/interpreter/bytecode.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_BYTECODE_HPP #define SHARE_VM_INTERPRETER_BYTECODE_HPP @@ -31,6 +37,12 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "bytes_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp index 28843715c75..c17fe8d7e05 100644 --- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp +++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_HPP #define SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_HPP @@ -35,6 +41,9 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "bytes_aarch64.hpp" #endif @@ -592,6 +601,12 @@ void print(); #ifdef TARGET_ARCH_x86 # include "bytecodeInterpreter_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytecodeInterpreter_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytecodeInterpreter_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "bytecodeInterpreter_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp index f5db0b4d9d2..8adbf95acb5 100644 --- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp +++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_INLINE_HPP #define SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_INLINE_HPP @@ -46,6 +52,12 @@ #ifdef TARGET_ARCH_x86 # include "bytecodeInterpreter_x86.inline.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytecodeInterpreter_mips.inline.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytecodeInterpreter_loongarch.inline.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "bytecodeInterpreter_aarch64.inline.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/bytecodeStream.hpp b/hotspot/src/share/vm/interpreter/bytecodeStream.hpp index b814b88d5df..e1f2421600c 100644 --- a/hotspot/src/share/vm/interpreter/bytecodeStream.hpp +++ b/hotspot/src/share/vm/interpreter/bytecodeStream.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_BYTECODESTREAM_HPP #define SHARE_VM_INTERPRETER_BYTECODESTREAM_HPP @@ -32,6 +38,12 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "bytes_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/bytecodes.cpp b/hotspot/src/share/vm/interpreter/bytecodes.cpp index ce5632ea19b..7f8e8801997 100644 --- a/hotspot/src/share/vm/interpreter/bytecodes.cpp +++ b/hotspot/src/share/vm/interpreter/bytecodes.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "interpreter/bytecodes.hpp" #include "memory/resourceArea.hpp" @@ -29,6 +35,12 @@ #ifdef TARGET_ARCH_x86 # include "bytes_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "bytes_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/bytecodes.hpp b/hotspot/src/share/vm/interpreter/bytecodes.hpp index c3463cd76df..bdf4c487f00 100644 --- a/hotspot/src/share/vm/interpreter/bytecodes.hpp +++ b/hotspot/src/share/vm/interpreter/bytecodes.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_BYTECODES_HPP #define SHARE_VM_INTERPRETER_BYTECODES_HPP @@ -292,6 +298,12 @@ class Bytecodes: AllStatic { #ifdef TARGET_ARCH_x86 # include "bytecodes_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytecodes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytecodes_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "bytecodes_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp index 6a6447503cf..f9c540fb4a1 100644 --- a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp +++ b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_CPPINTERPRETER_HPP #define SHARE_VM_INTERPRETER_CPPINTERPRETER_HPP @@ -84,6 +90,12 @@ class CppInterpreter: public AbstractInterpreter { #ifdef TARGET_ARCH_x86 # include "cppInterpreter_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "cppInterpreter_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "cppInterpreter_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "cppInterpreter_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp index 6a08a3f43f7..1fd19994d76 100644 --- a/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp +++ b/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_CPPINTERPRETERGENERATOR_HPP #define SHARE_VM_INTERPRETER_CPPINTERPRETERGENERATOR_HPP @@ -50,6 +56,12 @@ class CppInterpreterGenerator: public AbstractInterpreterGenerator { #ifdef TARGET_ARCH_x86 # include "cppInterpreterGenerator_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "cppInterpreterGenerator_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "cppInterpreterGenerator_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "cppInterpreterGenerator_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/interpreter.hpp b/hotspot/src/share/vm/interpreter/interpreter.hpp index ebfb68d36b4..610949f3f77 100644 --- a/hotspot/src/share/vm/interpreter/interpreter.hpp +++ b/hotspot/src/share/vm/interpreter/interpreter.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_INTERPRETER_HPP #define SHARE_VM_INTERPRETER_INTERPRETER_HPP @@ -148,6 +154,12 @@ class Interpreter: public CC_INTERP_ONLY(CppInterpreter) NOT_CC_INTERP(TemplateI #ifdef TARGET_ARCH_x86 # include "interpreter_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "interpreter_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "interpreter_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "interpreter_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp index 1dc7cb29833..92bbe6b4407 100644 --- a/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp +++ b/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_INTERPRETERGENERATOR_HPP #define SHARE_VM_INTERPRETER_INTERPRETERGENERATOR_HPP @@ -44,6 +50,12 @@ InterpreterGenerator(StubQueue* _code); #ifdef TARGET_ARCH_x86 # include "interpreterGenerator_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "interpreterGenerator_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "interpreterGenerator_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "interpreterGenerator_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp index 5d2845383ca..f48622f67ef 100644 --- a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp +++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" @@ -59,6 +65,12 @@ #ifdef TARGET_ARCH_x86 # include "vm_version_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "vm_version_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "vm_version_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "vm_version_aarch64.hpp" #endif @@ -1290,7 +1302,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth // preparing the same method will be sure to see non-null entry & mirror. IRT_END -#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) +#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH) IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address)) if (src_address == dest_address) { return; diff --git a/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp b/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp index 472bf4d94cc..9a98d5559c8 100644 --- a/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp +++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP #define SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP @@ -156,7 +162,7 @@ class InterpreterRuntime: AllStatic { Method* method, intptr_t* from, intptr_t* to); -#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) +#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH) // Popframe support (only needed on x86, AMD64 and ARM) static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address); #endif @@ -165,6 +171,12 @@ class InterpreterRuntime: AllStatic { #ifdef TARGET_ARCH_x86 # include "interpreterRT_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "interpreterRT_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "interpreterRT_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "interpreterRT_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp index 5f76dca8a6f..757860f43cc 100644 --- a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp +++ b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETER_HPP #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETER_HPP @@ -190,6 +196,12 @@ class TemplateInterpreter: public AbstractInterpreter { #ifdef TARGET_ARCH_x86 # include "templateInterpreter_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "templateInterpreter_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "templateInterpreter_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "templateInterpreter_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp index bd94bd02bc3..28ca437eb2c 100644 --- a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp +++ b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP @@ -89,6 +95,12 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator { #ifdef TARGET_ARCH_x86 # include "templateInterpreterGenerator_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "templateInterpreterGenerator_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "templateInterpreterGenerator_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "templateInterpreterGenerator_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/interpreter/templateTable.hpp b/hotspot/src/share/vm/interpreter/templateTable.hpp index 60d243c16a6..1b73822abd6 100644 --- a/hotspot/src/share/vm/interpreter/templateTable.hpp +++ b/hotspot/src/share/vm/interpreter/templateTable.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_INTERPRETER_TEMPLATETABLE_HPP #define SHARE_VM_INTERPRETER_TEMPLATETABLE_HPP @@ -40,6 +46,10 @@ # include "interp_masm_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "interp_masm_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "interp_masm_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "interp_masm_loongarch_64.hpp" #endif #ifndef CC_INTERP @@ -367,6 +377,10 @@ class TemplateTable: AllStatic { # include "templateTable_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "templateTable_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "templateTable_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "templateTable_loongarch_64.hpp" #endif }; diff --git a/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp b/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp index 6d9ab39fdda..f4e9a4ca697 100644 --- a/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp +++ b/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp @@ -116,7 +116,7 @@ inline T JfrBigEndian::read_unaligned(const address location) { inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) return true; -#elif defined(SPARC) || defined(ARM) || defined(AARCH64) +#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH) return false; #else #warning "Unconfigured platform" diff --git a/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp b/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp index 42a8b719cd8..f08f6ee13ab 100644 --- a/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp +++ b/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_JFR_WRITERS_JFRENCODERS_HPP #define SHARE_VM_JFR_WRITERS_JFRENCODERS_HPP @@ -46,6 +52,12 @@ #ifdef TARGET_ARCH_aarch64 # include "bytes_aarch64.hpp" #endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif // // The Encoding policy prescribes a template diff --git a/hotspot/src/share/vm/memory/barrierSet.hpp b/hotspot/src/share/vm/memory/barrierSet.hpp index 13ff9b2738f..081b70744d1 100644 --- a/hotspot/src/share/vm/memory/barrierSet.hpp +++ b/hotspot/src/share/vm/memory/barrierSet.hpp @@ -27,6 +27,7 @@ #include "memory/memRegion.hpp" #include "oops/oopsHierarchy.hpp" +#include "runtime/orderAccess.hpp" // This class provides the interface between a barrier implementation and // the rest of the system. @@ -95,8 +96,16 @@ class BarrierSet: public CHeapObj { // Keep this private so as to catch violations at build time. virtual void write_ref_field_pre_work( void* field, oop new_val) { guarantee(false, "Not needed"); }; protected: - virtual void write_ref_field_pre_work( oop* field, oop new_val) {}; - virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {}; + virtual void write_ref_field_pre_work( oop* field, oop new_val) { +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + }; + virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) { +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + }; public: // ...then the post-write version. @@ -132,9 +141,17 @@ class BarrierSet: public CHeapObj { // Below length is the # array elements being written virtual void write_ref_array_pre(oop* dst, int length, - bool dest_uninitialized = false) {} + bool dest_uninitialized = false) { +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + } virtual void write_ref_array_pre(narrowOop* dst, int length, - bool dest_uninitialized = false) {} + bool dest_uninitialized = false) { +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif +} // Below count is the # array elements being written, starting // at the address "start", which may not necessarily be HeapWord-aligned inline void write_ref_array(HeapWord* start, size_t count); diff --git a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp index 01e46888366..80bd1518737 100644 --- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp +++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp @@ -316,6 +316,9 @@ class CardTableModRefBS: public ModRefBarrierSet { inline void inline_write_ref_array(MemRegion mr) { dirty_MemRegion(mr); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif } protected: void write_ref_array_work(MemRegion mr) { @@ -329,7 +332,11 @@ class CardTableModRefBS: public ModRefBarrierSet { // *** Card-table-barrier-specific things. - template inline void inline_write_ref_field_pre(T* field, oop newVal) {} + template inline void inline_write_ref_field_pre(T* field, oop newVal) { +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + } template inline void inline_write_ref_field(T* field, oop newVal, bool release) { jbyte* byte = byte_for((void*)field); @@ -339,6 +346,9 @@ class CardTableModRefBS: public ModRefBarrierSet { } else { *byte = dirty_card; } +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif } // These are used by G1, when it uses the card table as a temporary data diff --git a/hotspot/src/share/vm/memory/cardTableRS.cpp b/hotspot/src/share/vm/memory/cardTableRS.cpp index fb33a708aec..da22acba47e 100644 --- a/hotspot/src/share/vm/memory/cardTableRS.cpp +++ b/hotspot/src/share/vm/memory/cardTableRS.cpp @@ -252,6 +252,9 @@ void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) { // cur_youngergen_and_prev_nonclean_card ==> no change. void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) { jbyte* entry = ct_bs()->byte_for(field); +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif do { jbyte entry_val = *entry; // We put this first because it's probably the most common case. @@ -266,7 +269,12 @@ void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) { jbyte new_val = cur_youngergen_and_prev_nonclean_card; jbyte res = Atomic::cmpxchg(new_val, entry, entry_val); // Did the CAS succeed? - if (res == entry_val) return; + if (res == entry_val) { +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + return; + } // Otherwise, retry, to see the new value. continue; } else { diff --git a/hotspot/src/share/vm/memory/cardTableRS.hpp b/hotspot/src/share/vm/memory/cardTableRS.hpp index 25884feac8b..5d4e77f2693 100644 --- a/hotspot/src/share/vm/memory/cardTableRS.hpp +++ b/hotspot/src/share/vm/memory/cardTableRS.hpp @@ -121,7 +121,14 @@ class CardTableRS: public GenRemSet { void inline_write_ref_field_gc(void* field, oop new_val) { jbyte* byte = _ct_bs->byte_for(field); - *byte = youngergen_card; +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + *byte = youngergen_card; +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + } void write_ref_field_gc_work(void* field, oop new_val) { inline_write_ref_field_gc(field, new_val); diff --git a/hotspot/src/share/vm/memory/metaspace.cpp b/hotspot/src/share/vm/memory/metaspace.cpp index fb0564ac276..9cec7d43750 100644 --- a/hotspot/src/share/vm/memory/metaspace.cpp +++ b/hotspot/src/share/vm/memory/metaspace.cpp @@ -21,6 +21,13 @@ * questions. * */ + +/* + * This file has been modified by Loongson Technology in 2021. These + * modifications are Copyright (c) 2021 Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "gc_interface/collectedHeap.hpp" #include "memory/allocation.hpp" @@ -3065,12 +3072,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a // Don't use large pages for the class space. bool large_pages = false; -#ifndef AARCH64 +#if !defined(AARCH64) && !defined(MIPS64) && !defined(LOONGARCH) ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(), _reserve_alignment, large_pages, requested_addr, 0); -#else // AARCH64 +#else // defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH) ReservedSpace metaspace_rs; // Our compressed klass pointers may fit nicely into the lower 32 @@ -3107,7 +3114,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a } } -#endif // AARCH64 +#endif // defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH) if (!metaspace_rs.is_reserved()) { #if INCLUDE_CDS diff --git a/hotspot/src/share/vm/oops/constantPool.hpp b/hotspot/src/share/vm/oops/constantPool.hpp index ec111df04eb..6c0607105c1 100644 --- a/hotspot/src/share/vm/oops/constantPool.hpp +++ b/hotspot/src/share/vm/oops/constantPool.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_OOPS_CONSTANTPOOLOOP_HPP #define SHARE_VM_OOPS_CONSTANTPOOLOOP_HPP @@ -50,6 +56,13 @@ #ifdef TARGET_ARCH_ppc # include "bytes_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif + // A constantPool is an array containing class constants as described in the // class file. diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp index acef3348499..23fc0b99881 100644 --- a/hotspot/src/share/vm/oops/klass.hpp +++ b/hotspot/src/share/vm/oops/klass.hpp @@ -32,6 +32,9 @@ #include "oops/klassPS.hpp" #include "oops/metadata.hpp" #include "oops/oop.hpp" +#if defined MIPS || defined LOONGARCH +#include "runtime/orderAccess.hpp" +#endif #include "utilities/accessFlags.hpp" #include "utilities/macros.hpp" #if INCLUDE_ALL_GCS @@ -289,8 +292,18 @@ class Klass : public Metadata { // The Klasses are not placed in the Heap, so the Card Table or // the Mod Union Table can't be used to mark when klasses have modified oops. // The CT and MUT bits saves this information for the individual Klasses. - void record_modified_oops() { _modified_oops = 1; } - void clear_modified_oops() { _modified_oops = 0; } + void record_modified_oops() { + _modified_oops = 1; +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + } + void clear_modified_oops() { + _modified_oops = 0; +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) OrderAccess::fence(); +#endif + } bool has_modified_oops() { return _modified_oops == 1; } void accumulate_modified_oops() { if (has_modified_oops()) _accumulated_modified_oops = 1; } diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp index 0678c6b3fbb..1cb20e351f6 100644 --- a/hotspot/src/share/vm/oops/oop.hpp +++ b/hotspot/src/share/vm/oops/oop.hpp @@ -72,7 +72,13 @@ class oopDesc { markOop mark() const { return _mark; } markOop* mark_addr() const { return (markOop*) &_mark; } - void set_mark(volatile markOop m) { _mark = m; } + void set_mark(volatile markOop m) { +#if (defined MIPS || defined LOONGARCH) && !defined ZERO + if (UseSyncLevel >= 2000) release_set_mark(m); + else +#endif + _mark = m; + } void release_set_mark(markOop m); markOop cas_set_mark(markOop new_mark, markOop old_mark); diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp index beec739d388..8660c1e3312 100644 --- a/hotspot/src/share/vm/oops/oop.inline.hpp +++ b/hotspot/src/share/vm/oops/oop.inline.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP #define SHARE_VM_OOPS_OOP_INLINE_HPP @@ -60,6 +66,12 @@ #ifdef TARGET_ARCH_ppc # include "bytes_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif // Implementation of all inlined member functions defined in oop.hpp // We need a separate file to avoid circular references diff --git a/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp b/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp index 8a4603944ea..b28bb99189a 100644 --- a/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp +++ b/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022. These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP #define SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP @@ -75,7 +81,7 @@ inline oop oopDesc::forward_to_atomic(oop p) { // forwarding pointer. oldMark = curMark; } - return forwardee(); + return (oop) oldMark->decode_pointer(); } #endif // SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP diff --git a/hotspot/src/share/vm/opto/buildOopMap.cpp b/hotspot/src/share/vm/opto/buildOopMap.cpp index 91642f1d7dd..5df185df04c 100644 --- a/hotspot/src/share/vm/opto/buildOopMap.cpp +++ b/hotspot/src/share/vm/opto/buildOopMap.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "compiler/oopMap.hpp" #include "opto/addnode.hpp" @@ -50,6 +56,12 @@ #ifdef TARGET_ARCH_ppc # include "vmreg_ppc.inline.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "vmreg_mips.inline.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "vmreg_loongarch.inline.hpp" +#endif // The functions in this file builds OopMaps after all scheduling is done. // diff --git a/hotspot/src/share/vm/opto/bytecodeInfo.cpp b/hotspot/src/share/vm/opto/bytecodeInfo.cpp index 7fd615d35f2..ad472e87221 100644 --- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp +++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp @@ -361,9 +361,20 @@ bool InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method, } else if (forced_inline()) { // Inlining was forced by CompilerOracle, ciReplay or annotation } else if (profile.count() == 0) { +#ifndef MIPS // don't inline unreached call sites set_msg("call site not reached"); return false; +#else + ciMethodBlocks* blocks = caller_method->get_method_blocks(); + // Check if the call site belongs to a start block: + // call sites in a start block must be reached before. + if (blocks->block_containing(0) != blocks->block_containing(jvms->bci())) { + // don't inline unreached call sites + set_msg("call site not reached"); + return false; + } +#endif } } diff --git a/hotspot/src/share/vm/opto/c2_globals.hpp b/hotspot/src/share/vm/opto/c2_globals.hpp index 82d2efef92c..d373b20456f 100644 --- a/hotspot/src/share/vm/opto/c2_globals.hpp +++ b/hotspot/src/share/vm/opto/c2_globals.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_OPTO_C2_GLOBALS_HPP #define SHARE_VM_OPTO_C2_GLOBALS_HPP @@ -35,6 +41,12 @@ #ifdef TARGET_ARCH_sparc # include "c2_globals_sparc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "c2_globals_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "c2_globals_loongarch.hpp" +#endif #ifdef TARGET_ARCH_arm # include "c2_globals_arm.hpp" #endif diff --git a/hotspot/src/share/vm/opto/c2compiler.cpp b/hotspot/src/share/vm/opto/c2compiler.cpp index 137f49600d9..f689d64a386 100644 --- a/hotspot/src/share/vm/opto/c2compiler.cpp +++ b/hotspot/src/share/vm/opto/c2compiler.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "opto/c2compiler.hpp" #include "opto/runtime.hpp" @@ -39,6 +45,10 @@ # include "adfiles/ad_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/ad_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/ad_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/ad_loongarch_64.hpp" #endif // register information defined by ADLC diff --git a/hotspot/src/share/vm/opto/chaitin.hpp b/hotspot/src/share/vm/opto/chaitin.hpp index de6d443cd30..0b27dc9335e 100644 --- a/hotspot/src/share/vm/opto/chaitin.hpp +++ b/hotspot/src/share/vm/opto/chaitin.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_OPTO_CHAITIN_HPP #define SHARE_VM_OPTO_CHAITIN_HPP @@ -136,8 +142,12 @@ class LRG : public ResourceObj { // Number of registers this live range uses when it colors private: +#ifdef LOONGARCH64 + uint16_t _num_regs; +#else uint8 _num_regs; // 2 for Longs and Doubles, 1 for all else // except _num_regs is kill count for fat_proj +#endif public: int num_regs() const { return _num_regs; } void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; } @@ -145,7 +155,11 @@ class LRG : public ResourceObj { private: // Number of physical registers this live range uses when it colors // Architecture and register-set dependent +#ifdef LOONGARCH64 + uint16_t _reg_pressure; +#else uint8 _reg_pressure; +#endif public: void set_reg_pressure(int i) { _reg_pressure = i; } int reg_pressure() const { return _reg_pressure; } diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp index ae22ba84d9f..9004dc0d727 100644 --- a/hotspot/src/share/vm/opto/compile.cpp +++ b/hotspot/src/share/vm/opto/compile.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" @@ -81,6 +87,10 @@ # include "adfiles/ad_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/ad_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/ad_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/ad_loongarch_64.hpp" #endif // -------------------- Compile::mach_constant_base_node ----------------------- diff --git a/hotspot/src/share/vm/opto/compile.hpp b/hotspot/src/share/vm/opto/compile.hpp index b4f4cfefed3..d263ee2fc45 100644 --- a/hotspot/src/share/vm/opto/compile.hpp +++ b/hotspot/src/share/vm/opto/compile.hpp @@ -1025,7 +1025,7 @@ class Compile : public Phase { bool in_scratch_emit_size() const { return _in_scratch_emit_size; } enum ScratchBufferBlob { - MAX_inst_size = 1024, + MAX_inst_size = 1024 MIPS64_ONLY(* 2) LOONGARCH64_ONLY(*2), MAX_locs_size = 128, // number of relocInfo elements MAX_const_size = 128, MAX_stubs_size = 128 diff --git a/hotspot/src/share/vm/opto/gcm.cpp b/hotspot/src/share/vm/opto/gcm.cpp index f51484efb0f..12457b7c344 100644 --- a/hotspot/src/share/vm/opto/gcm.cpp +++ b/hotspot/src/share/vm/opto/gcm.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "libadt/vectset.hpp" #include "memory/allocation.inline.hpp" @@ -49,6 +55,10 @@ # include "adfiles/ad_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/ad_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/ad_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/ad_loongarch_64.hpp" #endif diff --git a/hotspot/src/share/vm/opto/lcm.cpp b/hotspot/src/share/vm/opto/lcm.cpp index c6178a715b8..2d492568d96 100644 --- a/hotspot/src/share/vm/opto/lcm.cpp +++ b/hotspot/src/share/vm/opto/lcm.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "memory/allocation.inline.hpp" #include "opto/block.hpp" @@ -44,6 +50,10 @@ # include "adfiles/ad_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/ad_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/ad_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/ad_loongarch_64.hpp" #endif // Optimization - Graph Style diff --git a/hotspot/src/share/vm/opto/locknode.hpp b/hotspot/src/share/vm/opto/locknode.hpp index b320f6bfb2f..4bfb0ff072f 100644 --- a/hotspot/src/share/vm/opto/locknode.hpp +++ b/hotspot/src/share/vm/opto/locknode.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_OPTO_LOCKNODE_HPP #define SHARE_VM_OPTO_LOCKNODE_HPP @@ -42,6 +48,10 @@ # include "adfiles/ad_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/ad_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/ad_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/ad_loongarch_64.hpp" #endif //------------------------------BoxLockNode------------------------------------ diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp index 6660b4b467b..dec46861d3a 100644 --- a/hotspot/src/share/vm/opto/matcher.cpp +++ b/hotspot/src/share/vm/opto/matcher.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "memory/allocation.inline.hpp" #include "opto/addnode.hpp" @@ -52,6 +58,10 @@ # include "adfiles/ad_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/ad_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/ad_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/ad_loongarch_64.hpp" #endif OptoReg::Name OptoReg::c_frame_pointer; diff --git a/hotspot/src/share/vm/opto/output.cpp b/hotspot/src/share/vm/opto/output.cpp index 6032b72a9b2..7fb4dea28e9 100644 --- a/hotspot/src/share/vm/opto/output.cpp +++ b/hotspot/src/share/vm/opto/output.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2021. These + * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "asm/assembler.inline.hpp" #include "code/compiledIC.hpp" @@ -844,6 +850,27 @@ void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) { // Add the safepoint in the DebugInfoRecorder if( !mach->is_MachCall() ) { mcall = NULL; +#if defined(MIPS) || defined(LOONGARCH) + // safepoint_pc_offset should point to tha last instruction in safePoint. + // In X86 and sparc, their safePoints only contain one instruction. + // However, we should add current_offset with the size of safePoint in MIPS. + // 0x2d6ff22c: lw s2, 0x14(s2) + // last_pd->pc_offset()=308, pc_offset=304, bci=64 + // last_pd->pc_offset()=312, pc_offset=312, bci=64 + // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc") + // + // ;; Safepoint: + // ---> pc_offset=304 + // 0x2d6ff230: lui at, 0x2b7a ; OopMap{s2=Oop s5=Oop t4=Oop off=308} + // ;*goto + // ; - java.util.Hashtable::get@64 (line 353) + // ---> last_pd(308) + // 0x2d6ff234: lw at, 0xffffc100(at) ;*goto + // ; - java.util.Hashtable::get@64 (line 353) + // ; {poll} + // 0x2d6ff238: addiu s0, zero, 0x0 + safepoint_pc_offset += sfn->size(_regalloc) - 4; +#endif debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); } else { mcall = mach->as_MachCall(); @@ -1502,6 +1529,22 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { DEBUG_ONLY( uint instr_offset = cb->insts_size(); ) n->emit(*cb, _regalloc); current_offset = cb->insts_size(); +#if defined(MIPS) || defined(LOONGARCH) + if (!n->is_Proj() && (cb->insts()->end() != badAddress)) { + // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime + // is not the instruction which access memory. adjust is needed. previous_offset points to the + // instruction which access memory. Instruction size is 4. cb->insts_size() and + // cb->insts()->end() are the location of current instruction. + int adjust = 4; + NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4); + if (inst->is_sync()) { + // a sync may be the last instruction, see store_B_immI_enc_sync + adjust += 4; + inst = (NativeInstruction*) (cb->insts()->end() - 8); + } + previous_offset = current_offset - adjust; + } +#endif // Above we only verified that there is enough space in the instruction section. // However, the instruction may emit stubs that cause code buffer expansion. diff --git a/hotspot/src/share/vm/opto/output.hpp b/hotspot/src/share/vm/opto/output.hpp index ba728413632..37f954de9bf 100644 --- a/hotspot/src/share/vm/opto/output.hpp +++ b/hotspot/src/share/vm/opto/output.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_OPTO_OUTPUT_HPP #define SHARE_VM_OPTO_OUTPUT_HPP @@ -41,6 +47,10 @@ # include "adfiles/ad_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/ad_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/ad_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/ad_loongarch_64.hpp" #endif class Arena; diff --git a/hotspot/src/share/vm/opto/regmask.cpp b/hotspot/src/share/vm/opto/regmask.cpp index 352ccfb9d95..9a656d03ee4 100644 --- a/hotspot/src/share/vm/opto/regmask.cpp +++ b/hotspot/src/share/vm/opto/regmask.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "opto/compile.hpp" #include "opto/regmask.hpp" @@ -39,6 +45,10 @@ # include "adfiles/ad_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/ad_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/ad_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/ad_loongarch_64.hpp" #endif #define RM_SIZE _RM_SIZE /* a constant private to the class RegMask */ diff --git a/hotspot/src/share/vm/opto/regmask.hpp b/hotspot/src/share/vm/opto/regmask.hpp index 5ceebb3fb86..6d08b687316 100644 --- a/hotspot/src/share/vm/opto/regmask.hpp +++ b/hotspot/src/share/vm/opto/regmask.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_OPTO_REGMASK_HPP #define SHARE_VM_OPTO_REGMASK_HPP @@ -42,6 +48,10 @@ # include "adfiles/adGlobals_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/adGlobals_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/adGlobals_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/adGlobals_loongarch_64.hpp" #endif // Some fun naming (textual) substitutions: diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp index a43b37f2c56..f2bcafa2c52 100644 --- a/hotspot/src/share/vm/opto/runtime.cpp +++ b/hotspot/src/share/vm/opto/runtime.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" @@ -82,6 +88,10 @@ # include "adfiles/ad_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/ad_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/ad_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/ad_loongarch_64.hpp" #endif diff --git a/hotspot/src/share/vm/opto/type.cpp b/hotspot/src/share/vm/opto/type.cpp index 58572f137db..299d48b12ab 100644 --- a/hotspot/src/share/vm/opto/type.cpp +++ b/hotspot/src/share/vm/opto/type.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "ci/ciMethodData.hpp" #include "ci/ciTypeFlow.hpp" @@ -68,6 +74,16 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { { Bad, T_ILLEGAL, "vectord:", false, Op_RegD, relocInfo::none }, // VectorD { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY +#elif defined(MIPS64) + { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS + { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD + { Bad, T_ILLEGAL, "vectorx:", false, 0, relocInfo::none }, // VectorX + { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY +#elif defined(LOONGARCH64) + { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS + { Bad, T_ILLEGAL, "vectord:", false, 0, relocInfo::none }, // VectorD + { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX + { Bad, T_ILLEGAL, "vectory:", false, Op_VecY, relocInfo::none }, // VectorY #elif defined(PPC64) { Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS { Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD diff --git a/hotspot/src/share/vm/prims/jniCheck.cpp b/hotspot/src/share/vm/prims/jniCheck.cpp index 593ca8a1e34..82813b71fe2 100644 --- a/hotspot/src/share/vm/prims/jniCheck.cpp +++ b/hotspot/src/share/vm/prims/jniCheck.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" @@ -55,6 +61,12 @@ #ifdef TARGET_ARCH_ppc # include "jniTypes_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "jniTypes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "jniTypes_loongarch.hpp" +#endif // Complain every extra number of unplanned local refs #define CHECK_JNI_LOCAL_REF_CAP_WARN_THRESHOLD 32 diff --git a/hotspot/src/share/vm/prims/jni_md.h b/hotspot/src/share/vm/prims/jni_md.h index 6209a664496..271715d4a29 100644 --- a/hotspot/src/share/vm/prims/jni_md.h +++ b/hotspot/src/share/vm/prims/jni_md.h @@ -22,6 +22,12 @@ * or visit www.oracle.com if you need additional information or have any * questions. */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + /* Switch to the correct jni_md.h file without reliance on -I options. */ #ifdef TARGET_ARCH_x86 @@ -42,6 +48,12 @@ #ifdef TARGET_ARCH_ppc # include "jni_ppc.h" #endif +#ifdef TARGET_ARCH_mips +# include "jni_mips.h" +#endif +#ifdef TARGET_ARCH_loongarch +# include "jni_loongarch.h" +#endif /* diff --git a/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp b/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp index ab31d0d91e5..0d8570b7649 100644 --- a/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp +++ b/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/symbolTable.hpp" #include "interpreter/bytecodeStream.hpp" @@ -46,6 +52,12 @@ #ifdef TARGET_ARCH_ppc # include "bytes_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif // FIXME: add Deprecated attribute // FIXME: fix Synthetic attribute // FIXME: per Serguei, add error return handling for ConstantPool::copy_cpool_bytes() diff --git a/hotspot/src/share/vm/prims/methodHandles.hpp b/hotspot/src/share/vm/prims/methodHandles.hpp index db6e06180d1..841082859a4 100644 --- a/hotspot/src/share/vm/prims/methodHandles.hpp +++ b/hotspot/src/share/vm/prims/methodHandles.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_PRIMS_METHODHANDLES_HPP #define SHARE_VM_PRIMS_METHODHANDLES_HPP @@ -198,6 +204,13 @@ class MethodHandles: AllStatic { #ifdef TARGET_ARCH_ppc # include "methodHandles_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "methodHandles_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "methodHandles_loongarch.hpp" +#endif + // Tracing static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN; diff --git a/hotspot/src/share/vm/runtime/atomic.inline.hpp b/hotspot/src/share/vm/runtime/atomic.inline.hpp index 222f29cbf41..7c7c6edb27f 100644 --- a/hotspot/src/share/vm/runtime/atomic.inline.hpp +++ b/hotspot/src/share/vm/runtime/atomic.inline.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP #define SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP @@ -31,6 +37,12 @@ #ifdef TARGET_OS_ARCH_linux_x86 # include "atomic_linux_x86.inline.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_mips +# include "atomic_linux_mips.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_loongarch +# include "atomic_linux_loongarch.inline.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_sparc # include "atomic_linux_sparc.inline.hpp" #endif diff --git a/hotspot/src/share/vm/runtime/deoptimization.cpp b/hotspot/src/share/vm/runtime/deoptimization.cpp index f91afdc4165..36a924fd4fb 100644 --- a/hotspot/src/share/vm/runtime/deoptimization.cpp +++ b/hotspot/src/share/vm/runtime/deoptimization.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/systemDictionary.hpp" #include "code/debugInfoRec.hpp" @@ -68,6 +74,12 @@ #ifdef TARGET_ARCH_ppc # include "vmreg_ppc.inline.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "vmreg_mips.inline.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "vmreg_loongarch.inline.hpp" +#endif #ifdef COMPILER2 #if defined AD_MD_HPP # include AD_MD_HPP @@ -84,6 +96,12 @@ #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/ad_ppc_64.hpp" #endif +#ifdef TARGET_ARCH_MODEL_mips_64 +# include "adfiles/ad_mips_64.hpp" +#endif +#ifdef TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/ad_loongarch_64.hpp" +#endif #endif // COMPILER2 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC diff --git a/hotspot/src/share/vm/runtime/dtraceJSDT.hpp b/hotspot/src/share/vm/runtime/dtraceJSDT.hpp index db568def348..490c5f5a4e9 100644 --- a/hotspot/src/share/vm/runtime/dtraceJSDT.hpp +++ b/hotspot/src/share/vm/runtime/dtraceJSDT.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_DTRACEJSDT_HPP #define SHARE_VM_RUNTIME_DTRACEJSDT_HPP @@ -44,6 +50,12 @@ #ifdef TARGET_ARCH_ppc # include "nativeInst_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "nativeInst_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "nativeInst_loongarch.hpp" +#endif class RegisteredProbes; typedef jlong OpaqueProbes; diff --git a/hotspot/src/share/vm/runtime/frame.cpp b/hotspot/src/share/vm/runtime/frame.cpp index 338b7ad3a7b..5a161133baf 100644 --- a/hotspot/src/share/vm/runtime/frame.cpp +++ b/hotspot/src/share/vm/runtime/frame.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "compiler/abstractCompiler.hpp" #include "compiler/disassembler.hpp" @@ -64,6 +70,13 @@ #ifdef TARGET_ARCH_ppc # include "nativeInst_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "nativeInst_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "nativeInst_loongarch.hpp" +#endif + PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC diff --git a/hotspot/src/share/vm/runtime/frame.hpp b/hotspot/src/share/vm/runtime/frame.hpp index 2d80ecc2085..4a9e6edb541 100644 --- a/hotspot/src/share/vm/runtime/frame.hpp +++ b/hotspot/src/share/vm/runtime/frame.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_FRAME_HPP #define SHARE_VM_RUNTIME_FRAME_HPP @@ -45,6 +51,10 @@ # include "adfiles/adGlobals_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/adGlobals_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/adGlobals_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/adGlobals_loongarch_64.hpp" #endif #endif // COMPILER2 #ifdef TARGET_ARCH_zero @@ -489,6 +499,12 @@ class frame VALUE_OBJ_CLASS_SPEC { #ifdef TARGET_ARCH_x86 # include "frame_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "frame_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "frame_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "frame_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/runtime/frame.inline.hpp b/hotspot/src/share/vm/runtime/frame.inline.hpp index 710b82306ab..704cc8df8f2 100644 --- a/hotspot/src/share/vm/runtime/frame.inline.hpp +++ b/hotspot/src/share/vm/runtime/frame.inline.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_FRAME_INLINE_HPP #define SHARE_VM_RUNTIME_FRAME_INLINE_HPP @@ -49,6 +55,12 @@ #ifdef TARGET_ARCH_ppc # include "jniTypes_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "jniTypes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "jniTypes_loongarch.hpp" +#endif #ifdef TARGET_ARCH_zero # include "entryFrame_zero.hpp" # include "fakeStubFrame_zero.hpp" @@ -115,6 +127,12 @@ inline oop* frame::interpreter_frame_temp_oop_addr() const { #ifdef TARGET_ARCH_ppc # include "frame_ppc.inline.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "frame_mips.inline.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "frame_loongarch.inline.hpp" +#endif #endif // SHARE_VM_RUNTIME_FRAME_INLINE_HPP diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 23ce8af5696..f36137aabfb 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -55,6 +55,12 @@ #ifdef TARGET_ARCH_ppc # include "globals_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "globals_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "globals_loongarch.hpp" +#endif #ifdef TARGET_OS_FAMILY_linux # include "globals_linux.hpp" #endif @@ -79,6 +85,12 @@ #ifdef TARGET_OS_ARCH_linux_sparc # include "globals_linux_sparc.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_mips +# include "globals_linux_mips.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_loongarch +# include "globals_linux_loongarch.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_zero # include "globals_linux_zero.hpp" #endif @@ -116,6 +128,12 @@ #ifdef TARGET_ARCH_sparc # include "c1_globals_sparc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "c1_globals_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "c1_globals_loongarch.hpp" +#endif #ifdef TARGET_ARCH_arm # include "c1_globals_arm.hpp" #endif @@ -148,6 +166,12 @@ #ifdef TARGET_ARCH_sparc # include "c2_globals_sparc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "c2_globals_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "c2_globals_loongarch.hpp" +#endif #ifdef TARGET_ARCH_arm # include "c2_globals_arm.hpp" #endif @@ -3209,7 +3233,7 @@ class CommandLineFlags { product(uintx, InitialHeapSize, 0, \ "Initial heap size (in bytes); zero means use ergonomics") \ \ - product(uintx, MaxHeapSize, ScaleForWordSize(96*M), \ + product(uintx, MaxHeapSize, ScaleForWordSize(MIPS64_ONLY(1500) NOT_MIPS64(96) *M), \ "Maximum heap size (in bytes)") \ \ product(uintx, OldSize, ScaleForWordSize(4*M), \ diff --git a/hotspot/src/share/vm/runtime/icache.hpp b/hotspot/src/share/vm/runtime/icache.hpp index ba81a06ff59..9c0cfdb7d70 100644 --- a/hotspot/src/share/vm/runtime/icache.hpp +++ b/hotspot/src/share/vm/runtime/icache.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_ICACHE_HPP #define SHARE_VM_RUNTIME_ICACHE_HPP @@ -86,7 +92,12 @@ class AbstractICache : AllStatic { #ifdef TARGET_ARCH_ppc # include "icache_ppc.hpp" #endif - +#ifdef TARGET_ARCH_mips +# include "icache_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "icache_loongarch.hpp" +#endif class ICacheStubGenerator : public StubCodeGenerator { diff --git a/hotspot/src/share/vm/runtime/java.cpp b/hotspot/src/share/vm/runtime/java.cpp index 0a263b017cf..9ba0decaae7 100644 --- a/hotspot/src/share/vm/runtime/java.cpp +++ b/hotspot/src/share/vm/runtime/java.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/classLoader.hpp" #include "classfile/symbolTable.hpp" @@ -84,6 +90,12 @@ #ifdef TARGET_ARCH_ppc # include "vm_version_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "vm_version_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "vm_version_loongarch.hpp" +#endif #if INCLUDE_ALL_GCS #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp" #include "gc_implementation/parallelScavenge/psScavenge.hpp" diff --git a/hotspot/src/share/vm/runtime/javaCalls.hpp b/hotspot/src/share/vm/runtime/javaCalls.hpp index 6126bbe75ef..1747e2b2ee7 100644 --- a/hotspot/src/share/vm/runtime/javaCalls.hpp +++ b/hotspot/src/share/vm/runtime/javaCalls.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_JAVACALLS_HPP #define SHARE_VM_RUNTIME_JAVACALLS_HPP @@ -49,6 +55,12 @@ #ifdef TARGET_ARCH_ppc # include "jniTypes_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "jniTypes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "jniTypes_loongarch.hpp" +#endif // A JavaCallWrapper is constructed before each JavaCall and destructed after the call. // Its purpose is to allocate/deallocate a new handle block and to save/restore the last diff --git a/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp b/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp index 129a01e293f..c2b1b2e6c3b 100644 --- a/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp +++ b/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_JAVAFRAMEANCHOR_HPP #define SHARE_VM_RUNTIME_JAVAFRAMEANCHOR_HPP @@ -80,6 +86,12 @@ friend class JavaCallWrapper; #ifdef TARGET_ARCH_x86 # include "javaFrameAnchor_x86.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "javaFrameAnchor_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "javaFrameAnchor_loongarch.hpp" +#endif #ifdef TARGET_ARCH_aarch64 # include "javaFrameAnchor_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/runtime/os.cpp b/hotspot/src/share/vm/runtime/os.cpp index 96eed036705..28c78409e7e 100644 --- a/hotspot/src/share/vm/runtime/os.cpp +++ b/hotspot/src/share/vm/runtime/os.cpp @@ -1122,7 +1122,8 @@ bool os::is_first_C_frame(frame* fr) { uintptr_t old_fp = (uintptr_t)fr->link(); if ((old_fp & fp_align_mask) != 0) return true; - if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp) return true; + // The check for old_fp and ufp is harmful on MIPS due to its special ABI. + if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_MIPS64(|| old_fp == ufp)) return true; // stack grows downwards; if old_fp is below current fp or if the stack // frame is too large, either the stack is corrupted or fp is not saved diff --git a/hotspot/src/share/vm/runtime/os.hpp b/hotspot/src/share/vm/runtime/os.hpp index 836c231b03e..0ca6e645982 100644 --- a/hotspot/src/share/vm/runtime/os.hpp +++ b/hotspot/src/share/vm/runtime/os.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_OS_HPP #define SHARE_VM_RUNTIME_OS_HPP @@ -857,6 +863,12 @@ class os: AllStatic { #ifdef TARGET_OS_ARCH_linux_x86 # include "os_linux_x86.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_mips +# include "os_linux_mips.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_loongarch +# include "os_linux_loongarch.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_aarch64 # include "os_linux_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/runtime/prefetch.inline.hpp b/hotspot/src/share/vm/runtime/prefetch.inline.hpp index f4e30de34d9..fec16f842c9 100644 --- a/hotspot/src/share/vm/runtime/prefetch.inline.hpp +++ b/hotspot/src/share/vm/runtime/prefetch.inline.hpp @@ -46,6 +46,12 @@ #ifdef TARGET_OS_ARCH_linux_ppc # include "prefetch_linux_ppc.inline.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_mips +# include "prefetch_linux_mips.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_loongarch +# include "prefetch_linux_loongarch.inline.hpp" +#endif // Solaris #ifdef TARGET_OS_ARCH_solaris_x86 diff --git a/hotspot/src/share/vm/runtime/registerMap.hpp b/hotspot/src/share/vm/runtime/registerMap.hpp index 67ef212d659..1e26dfcba4e 100644 --- a/hotspot/src/share/vm/runtime/registerMap.hpp +++ b/hotspot/src/share/vm/runtime/registerMap.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_REGISTERMAP_HPP #define SHARE_VM_RUNTIME_REGISTERMAP_HPP @@ -45,6 +51,12 @@ #ifdef TARGET_ARCH_ppc # include "register_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "register_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "register_loongarch.hpp" +#endif class JavaThread; @@ -156,6 +168,12 @@ class RegisterMap : public StackObj { #ifdef TARGET_ARCH_ppc # include "registerMap_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "registerMap_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "registerMap_loongarch.hpp" +#endif }; diff --git a/hotspot/src/share/vm/runtime/relocator.hpp b/hotspot/src/share/vm/runtime/relocator.hpp index bb19c75fe65..53f3c9f6bdb 100644 --- a/hotspot/src/share/vm/runtime/relocator.hpp +++ b/hotspot/src/share/vm/runtime/relocator.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_RELOCATOR_HPP #define SHARE_VM_RUNTIME_RELOCATOR_HPP @@ -45,6 +51,12 @@ #ifdef TARGET_ARCH_ppc # include "bytes_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "bytes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "bytes_loongarch.hpp" +#endif // This code has been converted from the 1.1E java virtual machine // Thanks to the JavaTopics group for using the code diff --git a/hotspot/src/share/vm/runtime/safepoint.cpp b/hotspot/src/share/vm/runtime/safepoint.cpp index 440617c8026..be0e4dd13c6 100644 --- a/hotspot/src/share/vm/runtime/safepoint.cpp +++ b/hotspot/src/share/vm/runtime/safepoint.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/symbolTable.hpp" #include "classfile/systemDictionary.hpp" @@ -78,6 +84,14 @@ # include "nativeInst_ppc.hpp" # include "vmreg_ppc.inline.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "nativeInst_mips.hpp" +# include "vmreg_mips.inline.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "nativeInst_loongarch.hpp" +# include "vmreg_loongarch.inline.hpp" +#endif #if INCLUDE_ALL_GCS #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp" #include "gc_implementation/shared/suspendibleThreadSet.hpp" diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp index 5f540247f9b..abcd6066b9c 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp +++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" @@ -82,6 +88,15 @@ # include "nativeInst_ppc.hpp" # include "vmreg_ppc.inline.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "nativeInst_mips.hpp" +# include "vmreg_mips.inline.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "nativeInst_loongarch.hpp" +# include "vmreg_loongarch.inline.hpp" +#endif + #ifdef COMPILER1 #include "c1/c1_Runtime1.hpp" #endif @@ -220,7 +235,6 @@ void SharedRuntime::print_ic_miss_histogram() { } } #endif // PRODUCT - #if INCLUDE_ALL_GCS // G1 write-barrier pre: executed before a pointer store. diff --git a/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp b/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp index 37880d8a5c5..3987880b16b 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp +++ b/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020, These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "prims/jni.h" #include "runtime/interfaceSupport.hpp" @@ -534,6 +540,15 @@ static SAFEBUF int __ieee754_rem_pio2(double x, double *y) { * then 3 2 * sin(x) = x + (S1*x + (x *(r-y/2)+y)) */ +#if defined(MIPS) || defined(LOONGARCH) +// TODO: LA +#undef S1 +#undef S2 +#undef S3 +#undef S4 +#undef S5 +#undef S6 +#endif static const double S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ diff --git a/hotspot/src/share/vm/runtime/stackValueCollection.cpp b/hotspot/src/share/vm/runtime/stackValueCollection.cpp index 87747683118..fe81c1bfd81 100644 --- a/hotspot/src/share/vm/runtime/stackValueCollection.cpp +++ b/hotspot/src/share/vm/runtime/stackValueCollection.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "runtime/stackValueCollection.hpp" #ifdef TARGET_ARCH_x86 @@ -42,6 +48,12 @@ #ifdef TARGET_ARCH_ppc # include "jniTypes_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "jniTypes_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "jniTypes_loongarch.hpp" +#endif PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC diff --git a/hotspot/src/share/vm/runtime/statSampler.cpp b/hotspot/src/share/vm/runtime/statSampler.cpp index 41f469622f0..3b430890620 100644 --- a/hotspot/src/share/vm/runtime/statSampler.cpp +++ b/hotspot/src/share/vm/runtime/statSampler.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020 Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" @@ -51,6 +57,12 @@ #ifdef TARGET_ARCH_ppc # include "vm_version_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "vm_version_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "vm_version_loongarch.hpp" +#endif // -------------------------------------------------------- // StatSamplerTask diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp index e18b9127df9..9bf933762a4 100644 --- a/hotspot/src/share/vm/runtime/stubRoutines.hpp +++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_STUBROUTINES_HPP #define SHARE_VM_RUNTIME_STUBROUTINES_HPP @@ -49,6 +55,12 @@ #ifdef TARGET_ARCH_ppc # include "nativeInst_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "nativeInst_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "nativeInst_loongarch.hpp" +#endif // StubRoutines provides entry points to assembly routines used by // compiled code and the run-time system. Platform-specific entry @@ -116,6 +128,10 @@ class StubRoutines: AllStatic { # include "stubRoutines_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "stubRoutines_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "stubRoutines_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "stubRoutines_loongarch_64.hpp" #endif static jint _verify_oop_count; diff --git a/hotspot/src/share/vm/runtime/thread.cpp b/hotspot/src/share/vm/runtime/thread.cpp index e6586c40cbc..3db678ff482 100644 --- a/hotspot/src/share/vm/runtime/thread.cpp +++ b/hotspot/src/share/vm/runtime/thread.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/classLoader.hpp" #include "classfile/javaClasses.hpp" diff --git a/hotspot/src/share/vm/runtime/thread.hpp b/hotspot/src/share/vm/runtime/thread.hpp index 1c19ab72909..aa69217eeff 100644 --- a/hotspot/src/share/vm/runtime/thread.hpp +++ b/hotspot/src/share/vm/runtime/thread.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_THREAD_HPP #define SHARE_VM_RUNTIME_THREAD_HPP @@ -1711,6 +1717,12 @@ class JavaThread: public Thread { #ifdef TARGET_OS_ARCH_linux_x86 # include "thread_linux_x86.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_mips +# include "thread_linux_mips.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_loongarch +# include "thread_linux_loongarch.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_aarch64 # include "thread_linux_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/runtime/threadLocalStorage.hpp b/hotspot/src/share/vm/runtime/threadLocalStorage.hpp index 58c1afc810e..0938b2eddae 100644 --- a/hotspot/src/share/vm/runtime/threadLocalStorage.hpp +++ b/hotspot/src/share/vm/runtime/threadLocalStorage.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_RUNTIME_THREADLOCALSTORAGE_HPP #define SHARE_VM_RUNTIME_THREADLOCALSTORAGE_HPP @@ -51,6 +57,12 @@ class ThreadLocalStorage : AllStatic { #ifdef TARGET_OS_ARCH_linux_x86 # include "threadLS_linux_x86.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_mips +# include "threadLS_linux_mips.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_loongarch +# include "threadLS_linux_loongarch.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_aarch64 # include "threadLS_linux_aarch64.hpp" #endif diff --git a/hotspot/src/share/vm/runtime/virtualspace.cpp b/hotspot/src/share/vm/runtime/virtualspace.cpp index 66392b75f13..5ced38d8389 100644 --- a/hotspot/src/share/vm/runtime/virtualspace.cpp +++ b/hotspot/src/share/vm/runtime/virtualspace.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -147,6 +148,15 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large, bool special = large && !os::can_commit_large_page_memory(); char* base = NULL; +#if defined MIPS && !defined ZERO + size_t opt_reg_addr = 5 * os::Linux::page_size(); + static int code_cache_init_flag = 1; + if (UseCodeCacheAllocOpt && code_cache_init_flag && executable) { + code_cache_init_flag = 0; + requested_address = (char*) opt_reg_addr; + } +#endif + if (requested_address != 0) { requested_address -= noaccess_prefix; // adjust requested address assert(requested_address != NULL, "huge noaccess prefix?"); @@ -193,6 +203,12 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large, if (failed_to_reserve_as_requested(base, requested_address, size, false)) { // OS ignored requested address. Try different address. base = NULL; +#if defined MIPS && !defined ZERO + if (UseCodeCacheAllocOpt && requested_address == (char*) opt_reg_addr) { + requested_address = NULL; + base = os::reserve_memory(size, NULL, alignment); + } +#endif } } else { base = os::reserve_memory(size, NULL, alignment); diff --git a/hotspot/src/share/vm/runtime/vmStructs.cpp b/hotspot/src/share/vm/runtime/vmStructs.cpp index 32e3921b2b5..c6cc4c4329f 100644 --- a/hotspot/src/share/vm/runtime/vmStructs.cpp +++ b/hotspot/src/share/vm/runtime/vmStructs.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "classfile/dictionary.hpp" #include "classfile/javaClasses.hpp" @@ -122,6 +128,12 @@ #ifdef TARGET_ARCH_ppc # include "vmStructs_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "vmStructs_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "vmStructs_loongarch.hpp" +#endif #ifdef TARGET_OS_ARCH_linux_x86 # include "vmStructs_linux_x86.hpp" #endif @@ -149,6 +161,12 @@ #ifdef TARGET_OS_ARCH_linux_ppc # include "vmStructs_linux_ppc.hpp" #endif +#ifdef TARGET_OS_ARCH_linux_mips +# include "vmStructs_linux_mips.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_loongarch +# include "vmStructs_linux_loongarch.hpp" +#endif #ifdef TARGET_OS_ARCH_aix_ppc # include "vmStructs_aix_ppc.hpp" #endif @@ -208,6 +226,10 @@ # include "adfiles/adGlobals_zero.hpp" #elif defined TARGET_ARCH_MODEL_ppc_64 # include "adfiles/adGlobals_ppc_64.hpp" +#elif defined TARGET_ARCH_MODEL_mips_64 +# include "adfiles/adGlobals_mips_64.hpp" +#elif defined TARGET_ARCH_MODEL_loongarch_64 +# include "adfiles/adGlobals_loongarch_64.hpp" #endif #endif // COMPILER2 diff --git a/hotspot/src/share/vm/runtime/vm_version.cpp b/hotspot/src/share/vm/runtime/vm_version.cpp index 91f9c70f5a3..d8dcfcfccad 100644 --- a/hotspot/src/share/vm/runtime/vm_version.cpp +++ b/hotspot/src/share/vm/runtime/vm_version.cpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "memory/universe.hpp" #include "oops/oop.inline.hpp" @@ -44,6 +50,12 @@ #ifdef TARGET_ARCH_ppc # include "vm_version_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "vm_version_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "vm_version_loongarch.hpp" +#endif const char* Abstract_VM_Version::_s_vm_release = Abstract_VM_Version::vm_release(); const char* Abstract_VM_Version::_s_internal_vm_info_string = Abstract_VM_Version::internal_vm_info_string(); @@ -193,6 +205,14 @@ const char* Abstract_VM_Version::jre_release_version() { #else #define CPU "ppc64" #endif +#elif defined(MIPS64) +#if defined(VM_LITTLE_ENDIAN) +#define CPU "mips64el" +#else +#define CPU "mips64" +#endif +#elif defined(LOONGARCH64) +#define CPU "loongarch64" #else #define CPU IA32_ONLY("x86") \ IA64_ONLY("ia64") \ diff --git a/hotspot/src/share/vm/utilities/copy.hpp b/hotspot/src/share/vm/utilities/copy.hpp index c1d82c70838..73b858b86e0 100644 --- a/hotspot/src/share/vm/utilities/copy.hpp +++ b/hotspot/src/share/vm/utilities/copy.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2023. These + * modifications are Copyright (c) 2015, 2023, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_UTILITIES_COPY_HPP #define SHARE_VM_UTILITIES_COPY_HPP @@ -350,6 +356,13 @@ class Copy : AllStatic { #ifdef TARGET_ARCH_ppc # include "copy_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "copy_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "copy_loongarch.hpp" +#endif + }; diff --git a/hotspot/src/share/vm/utilities/debug.cpp b/hotspot/src/share/vm/utilities/debug.cpp index 58a32a2b834..1026585f84c 100644 --- a/hotspot/src/share/vm/utilities/debug.cpp +++ b/hotspot/src/share/vm/utilities/debug.cpp @@ -690,6 +690,7 @@ void help() { tty->print_cr(" pns($sp, $ebp, $pc) on Linux/x86 or"); tty->print_cr(" pns($sp, $fp, $pc) on Linux/AArch64 or"); tty->print_cr(" pns($sp, 0, $pc) on Linux/ppc64 or"); + tty->print_cr(" pns($sp, $s8, $pc) on Linux/mips or"); tty->print_cr(" pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC"); tty->print_cr(" - in gdb do 'set overload-resolution off' before calling pns()"); tty->print_cr(" - in dbx do 'frame 1' before calling pns()"); diff --git a/hotspot/src/share/vm/utilities/globalDefinitions.hpp b/hotspot/src/share/vm/utilities/globalDefinitions.hpp index 81866b84099..61fc0c48a24 100644 --- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp +++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_UTILITIES_GLOBALDEFINITIONS_HPP #define SHARE_VM_UTILITIES_GLOBALDEFINITIONS_HPP @@ -455,6 +461,12 @@ enum RTMState { #ifdef TARGET_ARCH_ppc # include "globalDefinitions_ppc.hpp" #endif +#ifdef TARGET_ARCH_mips +# include "globalDefinitions_mips.hpp" +#endif +#ifdef TARGET_ARCH_loongarch +# include "globalDefinitions_loongarch.hpp" +#endif /* * If a platform does not support native stack walking diff --git a/hotspot/src/share/vm/utilities/macros.hpp b/hotspot/src/share/vm/utilities/macros.hpp index 599e1074de5..41ef06e27fe 100644 --- a/hotspot/src/share/vm/utilities/macros.hpp +++ b/hotspot/src/share/vm/utilities/macros.hpp @@ -22,6 +22,12 @@ * */ +/* + * This file has been modified by Loongson Technology in 2020. These + * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #ifndef SHARE_VM_UTILITIES_MACROS_HPP #define SHARE_VM_UTILITIES_MACROS_HPP @@ -373,6 +379,30 @@ #define NOT_SPARC(code) code #endif +#ifdef MIPS64 +#ifndef MIPS +#define MIPS +#endif +#define MIPS64_ONLY(code) code +#define NOT_MIPS64(code) +#else +#undef MIPS +#define MIPS64_ONLY(code) +#define NOT_MIPS64(code) code +#endif + +#ifdef LOONGARCH64 +#ifndef LOONGARCH +#define LOONGARCH +#endif +#define LOONGARCH64_ONLY(code) code +#define NOT_LOONGARCH64(code) +#else +#undef LOONGARCH +#define LOONGARCH64_ONLY(code) +#define NOT_LOONGARCH64(code) code +#endif + #if defined(PPC32) || defined(PPC64) #ifndef PPC #define PPC diff --git a/hotspot/src/share/vm/utilities/taskqueue.hpp b/hotspot/src/share/vm/utilities/taskqueue.hpp index bc06caccb46..46be35a3256 100644 --- a/hotspot/src/share/vm/utilities/taskqueue.hpp +++ b/hotspot/src/share/vm/utilities/taskqueue.hpp @@ -121,11 +121,22 @@ class TaskQueueSuper: public CHeapObj { Age(const Age& age) { _data = age._data; } Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; } +#if !defined MIPS && !defined LOONGARCH Age get() const volatile { return _data; } void set(Age age) volatile { _data = age._data; } idx_t top() const volatile { return _fields._top; } idx_t tag() const volatile { return _fields._tag; } +#else + Age get() const volatile { + size_t res = OrderAccess::load_ptr_acquire((volatile intptr_t*) &_data); + return *(Age*)(&res); + } + void set(Age age) volatile { OrderAccess::release_store_ptr((volatile intptr_t*) &_data, *(size_t*)(&age._data)); } + + idx_t top() const volatile { return OrderAccess::load_acquire((volatile idx_t*) &(_fields._top)); } + idx_t tag() const volatile { return OrderAccess::load_acquire((volatile idx_t*) &(_fields._tag)); } +#endif // Increment top; if it wraps, increment tag also. void increment() { @@ -195,23 +206,50 @@ class TaskQueueSuper: public CHeapObj { public: TaskQueueSuper() : _bottom(0), _age() {} +#if defined MIPS || defined LOONGARCH + inline uint get_bottom() const { + return OrderAccess::load_acquire((volatile juint*)&_bottom); + } + + inline void set_bottom(uint new_bottom) { + OrderAccess::release_store(&_bottom, new_bottom); + } +#endif // Return true if the TaskQueue contains/does not contain any tasks. - bool peek() const { return _bottom != _age.top(); } + bool peek() const { +#if defined MIPS || defined LOONGARCH + return get_bottom() != _age.top(); +#else + return _bottom != _age.top(); +#endif + } bool is_empty() const { return size() == 0; } // Return an estimate of the number of elements in the queue. // The "careful" version admits the possibility of pop_local/pop_global // races. uint size() const { +#if defined MIPS || defined LOONGARCH + return size(get_bottom(), _age.top()); +#else return size(_bottom, _age.top()); +#endif } uint dirty_size() const { +#if defined MIPS || defined LOONGARCH + return dirty_size(get_bottom(), _age.top()); +#else return dirty_size(_bottom, _age.top()); +#endif } void set_empty() { +#if defined MIPS || defined LOONGARCH + set_bottom(0); +#else _bottom = 0; +#endif _age.set(0); } @@ -263,7 +301,9 @@ class GenericTaskQueue: public TaskQueueSuper { typedef typename TaskQueueSuper::Age Age; typedef typename TaskQueueSuper::idx_t idx_t; +#if !defined MIPS && !defined LOONGARCH using TaskQueueSuper::_bottom; +#endif using TaskQueueSuper::_age; using TaskQueueSuper::increment_index; using TaskQueueSuper::decrement_index; @@ -327,7 +367,11 @@ template void GenericTaskQueue::oops_do(OopClosure* f) { // tty->print_cr("START OopTaskQueue::oops_do"); uint iters = size(); +#if defined MIPS || defined LOONGARCH + uint index = this->get_bottom(); +#else uint index = _bottom; +#endif for (uint i = 0; i < iters; ++i) { index = decrement_index(index); // tty->print_cr(" doing entry %d," INTPTR_T " -> " INTPTR_T, @@ -345,14 +389,22 @@ template bool GenericTaskQueue::push_slow(E t, uint dirty_n_elems) { if (dirty_n_elems == N - 1) { // Actually means 0, so do the push. +#if defined MIPS || defined LOONGARCH + uint localBot = this->get_bottom(); +#else uint localBot = _bottom; +#endif // g++ complains if the volatile result of the assignment is // unused, so we cast the volatile away. We cannot cast directly // to void, because gcc treats that as not using the result of the // assignment. However, casting to E& means that we trigger an // unused-value warning. So, we cast the E& to void. (void)const_cast(_elems[localBot] = t); +#if defined MIPS || defined LOONGARCH + this->set_bottom(increment_index(localBot)); +#else OrderAccess::release_store(&_bottom, increment_index(localBot)); +#endif TASKQUEUE_STATS_ONLY(stats.record_push()); return true; } @@ -407,7 +459,11 @@ bool GenericTaskQueue::pop_global(volatile E& t) { #if !(defined SPARC || defined IA32 || defined AMD64) OrderAccess::fence(); #endif +#if defined MIPS || defined LOONGARCH + uint localBot = this->get_bottom(); +#else uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom); +#endif uint n_elems = size(localBot, oldAge.top()); if (n_elems == 0) { return false; @@ -662,7 +718,11 @@ class ParallelTaskTerminator: public StackObj { template inline bool GenericTaskQueue::push(E t) { +#if defined MIPS || defined LOONGARCH + uint localBot = this->get_bottom(); +#else uint localBot = _bottom; +#endif assert(localBot < N, "_bottom out of range."); idx_t top = _age.top(); uint dirty_n_elems = dirty_size(localBot, top); @@ -674,7 +734,11 @@ GenericTaskQueue::push(E t) { // assignment. However, casting to E& means that we trigger an // unused-value warning. So, we cast the E& to void. (void) const_cast(_elems[localBot] = t); +#if defined MIPS || defined LOONGARCH + this->set_bottom(increment_index(localBot)); +#else OrderAccess::release_store(&_bottom, increment_index(localBot)); +#endif TASKQUEUE_STATS_ONLY(stats.record_push()); return true; } else { @@ -684,7 +748,11 @@ GenericTaskQueue::push(E t) { template inline bool GenericTaskQueue::pop_local(volatile E& t) { +#if defined MIPS || defined LOONGARCH + uint localBot = this->get_bottom(); +#else uint localBot = _bottom; +#endif // This value cannot be N-1. That can only occur as a result of // the assignment to bottom in this method. If it does, this method // resets the size to 0 before the next call (which is sequential, @@ -693,7 +761,11 @@ GenericTaskQueue::pop_local(volatile E& t) { assert(dirty_n_elems != N - 1, "Shouldn't be possible..."); if (dirty_n_elems == 0) return false; localBot = decrement_index(localBot); +#if defined MIPS || defined LOONGARCH + this->set_bottom(localBot); +#else _bottom = localBot; +#endif // This is necessary to prevent any read below from being reordered // before the store just above. OrderAccess::fence(); diff --git a/hotspot/src/share/vm/utilities/vmError.cpp b/hotspot/src/share/vm/utilities/vmError.cpp index fa7a32508e2..7098a98a9f7 100644 --- a/hotspot/src/share/vm/utilities/vmError.cpp +++ b/hotspot/src/share/vm/utilities/vmError.cpp @@ -22,6 +22,13 @@ * */ +/* + * This file has been modified by Loongson Technology in 2018. These + * modifications are Copyright (c) 2018 Loongson Technology, and are made + * available on the same license terms set forth above. + * +*/ + #include #include "precompiled.hpp" #include "compiler/compileBroker.hpp" @@ -488,7 +495,12 @@ void VMError::report(outputStream* st) { JDK_Version::runtime_name() : ""; const char* runtime_version = JDK_Version::runtime_version() != NULL ? JDK_Version::runtime_version() : ""; - st->print_cr("# JRE version: %s (%s) (build %s)", runtime_name, buf, runtime_version); +#ifdef LOONGSON_RUNTIME_NAME + const char* loongson_runtime_name_and_version = LOONGSON_RUNTIME_NAME; +#else + const char* loongson_runtime_name_and_version = ""; +#endif + st->print_cr("# JRE version: %s (%s) (build %s) (%s)", runtime_name, buf, runtime_version, loongson_runtime_name_and_version); st->print_cr("# Java VM: %s (%s %s %s %s)", Abstract_VM_Version::vm_name(), Abstract_VM_Version::vm_release(), diff --git a/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh b/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh index fcf1d04b6aa..5b8e7dcce58 100644 --- a/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh +++ b/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh @@ -24,6 +24,12 @@ # questions. # +# +# This file has been modified by Loongson Technology in 2023. These +# modifications are Copyright (c) 2023, Loongson Technology, and are made +# available on the same license terms set forth above. +# + ## @test Test8167409.sh ## @bug 8167409 ## @summary Invalid value passed to critical JNI function @@ -68,6 +74,18 @@ if [ $VM_CPU = "aarch64" ]; then exit 0; fi +# CriticalJNINatives is not supported for loongarch64 +if [ $VM_CPU = "loongarch64" ]; then + echo "Test Passed" + exit 0; +fi + +# CriticalJNINatives is not supported for mips64 +if [ $VM_CPU = "mips64" -o $VM_CPU = "mips64el" ]; then + echo "Test Passed" + exit 0; +fi + THIS_DIR=. cp ${TESTSRC}${FS}*.java ${THIS_DIR} diff --git a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java index fa9a6f208b3..885957cf1c2 100644 --- a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java +++ b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java @@ -34,11 +34,12 @@ public class GenericTestCaseForOtherCPU extends SHAOptionsBase.TestCase { public GenericTestCaseForOtherCPU(String optionName) { - // Execute the test case on any CPU except SPARC and X86 + // Execute the test case on any CPU except SPARC, LoongArch64 and X86 super(optionName, new NotPredicate(new OrPredicate(Platform::isSparc, new OrPredicate(Platform::isAArch64, new OrPredicate(Platform::isPPC, - new OrPredicate(Platform::isX64, Platform::isX86)))))); + new OrPredicate(Platform::isLoongArch64, + new OrPredicate(Platform::isX64, Platform::isX86))))))); } @Override diff --git a/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java index dc8c3984081..2427b2bf7b9 100644 --- a/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java +++ b/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java @@ -62,18 +62,24 @@ public class IntrinsicPredicates { = new OrPredicate( new CPUSpecificPredicate("sparc.*", new String[] { "sha1" }, null), + // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed. + new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, + null), new CPUSpecificPredicate("aarch64", new String[] { "sha1" }, - null)); + null))); public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE = new OrPredicate(new CPUSpecificPredicate("aarch64", new String[] { "sha256" }, null), new OrPredicate(new CPUSpecificPredicate("sparc.*", new String[] { "sha256" }, null), + // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed. + new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null, + null), new OrPredicate(new CPUSpecificPredicate("ppc64.*", new String[] { "sha" }, null), new CPUSpecificPredicate("ppc64le.*", new String[] { "sha" }, - null)))); + null))))); public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE = new OrPredicate( diff --git a/hotspot/test/runtime/6929067/Test6929067.sh b/hotspot/test/runtime/6929067/Test6929067.sh index 2bbb3401ce5..1a5482e6451 100644 --- a/hotspot/test/runtime/6929067/Test6929067.sh +++ b/hotspot/test/runtime/6929067/Test6929067.sh @@ -97,6 +97,10 @@ case "$ARCH" in i686) ARCH=i386 ;; + loongarch64) + COMP_FLAG="" + ARCH=loongarch64 + ;; # Assuming other ARCH values need no translation esac diff --git a/hotspot/test/runtime/Unsafe/RangeCheck.java b/hotspot/test/runtime/Unsafe/RangeCheck.java index 9ded944cb25..4d4ea2e048a 100644 --- a/hotspot/test/runtime/Unsafe/RangeCheck.java +++ b/hotspot/test/runtime/Unsafe/RangeCheck.java @@ -43,6 +43,7 @@ public static void main(String args[]) throws Exception { true, "-Xmx32m", "-XX:-TransmitErrorReport", + "-XX:-InlineUnsafeOps", // The compiler intrinsics doesn't have the assert DummyClassWithMainRangeCheck.class.getName()); OutputAnalyzer output = new OutputAnalyzer(pb.start()); diff --git a/hotspot/test/test_env.sh b/hotspot/test/test_env.sh index 5ba4f28c455..d9d8bb6b6b6 100644 --- a/hotspot/test/test_env.sh +++ b/hotspot/test/test_env.sh @@ -211,6 +211,29 @@ if [ $? = 0 ] then VM_CPU="aarch64" fi +grep "mips" vm_version.out > ${NULL} +if [ $? = 0 ] +then + VM_CPU="mips" + if [ $VM_BITS = "64" ] + then + VM_CPU="mips64" + grep "mips64el" vm_version.out > ${NULL} + if [ $? = 0 ] + then + VM_CPU="mips64el" + fi + fi +fi +grep "loongarch" vm_version.out > ${NULL} +if [ $? = 0 ] +then + VM_CPU="loongarch" + if [ $VM_BITS = "64" ] + then + VM_CPU="loongarch64" + fi +fi export VM_TYPE VM_BITS VM_OS VM_CPU echo "VM_TYPE=${VM_TYPE}" echo "VM_BITS=${VM_BITS}" diff --git a/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java b/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java index 6a14079347f..56a6375b5f4 100644 --- a/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java +++ b/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java @@ -126,6 +126,10 @@ public static boolean isAArch64() { return isArch("aarch64"); } + public static boolean isLoongArch64() { + return isArch("loongarch64"); + } + private static boolean isArch(String archnameRE) { return Pattern.compile(archnameRE, Pattern.CASE_INSENSITIVE) .matcher(osArch) @@ -136,6 +140,10 @@ public static String getOsArch() { return osArch; } + public static boolean isMIPS() { + return isArch("mips.*"); + } + /** * Return a boolean for whether we expect to be able to attach * the SA to our own processes on this system. diff --git a/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java index 7d56a4a3bc1..41825e18b35 100644 --- a/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +++ b/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java @@ -43,7 +43,7 @@ */ public class TestMutuallyExclusivePlatformPredicates { private static enum MethodGroup { - ARCH("isARM", "isPPC", "isSparc", "isX86", "isX64", "isAArch64"), + ARCH("isARM", "isPPC", "isSparc", "isX86", "isX64", "isAArch64", "isMIPS", "isLoongArch64"), BITNESS("is32bit", "is64bit"), OS("isAix", "isLinux", "isSolaris", "isWindows", "isOSX"), VM_TYPE("isClient", "isServer", "isGraal", "isMinimal"), diff --git a/jdk/make/Images.gmk b/jdk/make/Images.gmk index 991c0af7b4e..91716856553 100644 --- a/jdk/make/Images.gmk +++ b/jdk/make/Images.gmk @@ -23,6 +23,12 @@ # questions. # +# +# This file has been modified by Loongson Technology in 2022. These +# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made +# available on the same license terms set forth above. +# + include $(SPEC) include MakeBase.gmk include JavaCompilation.gmk @@ -650,6 +656,11 @@ $(JDK_IMAGE_DIR)/src.zip: $(IMAGES_OUTPUTDIR)/src.zip $(ECHO) $(LOG_INFO) Copying $(patsubst $(OUTPUT_ROOT)/%,%,$@) $(install-file) +# create link "mips64el -> mips64" for deploy +$(JDK_IMAGE_DIR)/jre/lib/mips64: $(JDK_IMAGE_DIR)/jre/lib/mips64el + $(ECHO) $(LOG_INFO) Create link from mips64 to mips64 + $(CD) $(JDK_IMAGE_DIR)/jre/lib && $(RM) mips64 && $(LN) -s mips64el mips64 + ################################################################################ # Post processing (strip etc) @@ -728,6 +739,14 @@ ifneq ($(POST_STRIP_CMD), ) endif +################################################################################ +# Loongson added list, architecture dependent files +ifeq ($(OPENJDK_TARGET_CPU), mips64) + ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little) + JDK_IMAGE_LOONGSON_LIST := $(JDK_IMAGE_DIR)/jre/lib/mips64el + endif +endif + ################################################################################ # Include the custom makefile right here, after all variables have been defined @@ -753,6 +772,7 @@ jdk-image: $(JDK_BIN_TARGETS) $(JDKJRE_BIN_TARGETS) \ $(JDKJRE_DOC_TARGETS) $(JDK_DOC_TARGETS) \ $(JDK_INFO_FILE) $(JDKJRE_STRIP_LIST) $(JDK_BIN_STRIP_LIST) \ $(JDK_IMAGE_DIR)/src.zip \ + $(JDK_IMAGE_LOONGSON_LIST) \ $(JDK_BIN_ISADIR_LINK_TARGETS) $(JDKJRE_BIN_ISADIR_LINK_TARGETS) jre-overlay-image: $(JRE_OVERLAY_BIN_TARGETS) $(JRE_OVERLAY_LIB_TARGETS) \ diff --git a/jdk/make/gensrc/GensrcMisc.gmk b/jdk/make/gensrc/GensrcMisc.gmk index 78ec501956a..0804888f3eb 100644 --- a/jdk/make/gensrc/GensrcMisc.gmk +++ b/jdk/make/gensrc/GensrcMisc.gmk @@ -23,6 +23,12 @@ # questions. # +# +# This file has been modified by Loongson Technology in 2018. These +# modifications are Copyright (c) 2018, Loongson Technology, and are made +# available on the same license terms set forth above. +# + include ProfileNames.gmk ################################################################################ @@ -39,6 +45,7 @@ $(PROFILE_VERSION_JAVA_TARGETS): \ $(SED) -e 's/@@launcher_name@@/$(LAUNCHER_NAME)/g' \ -e 's/@@java_version@@/$(RELEASE)/g' \ -e 's/@@java_runtime_version@@/$(FULL_VERSION)/g' \ + -e 's/@@loongson_runtime_name@@/$(LOONGSON_RUNTIME_NAME)/g' \ -e 's/@@java_runtime_name@@/$(RUNTIME_NAME)/g' \ -e 's/@@java_profile_name@@/$(call profile_version_name, $@)/g' \ -e 's/@@java_distro_name@@/$(DISTRO_NAME)/g' \ diff --git a/jdk/make/gensrc/GensrcMisc.gmk.orig b/jdk/make/gensrc/GensrcMisc.gmk.orig new file mode 100644 index 00000000000..78ec501956a --- /dev/null +++ b/jdk/make/gensrc/GensrcMisc.gmk.orig @@ -0,0 +1,172 @@ +# +# Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. Oracle designates this +# particular file as subject to the "Classpath" exception as provided +# by Oracle in the LICENSE file that accompanied this code. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# + +include ProfileNames.gmk + +################################################################################ +# Install the launcher name, release version string, full version +# string and the runtime name into the Version.java file. +# To be printed by java -version + +$(JDK_OUTPUTDIR)/gensrc/sun/misc/Version.java \ +$(PROFILE_VERSION_JAVA_TARGETS): \ + $(JDK_TOPDIR)/src/share/classes/sun/misc/Version.java.template + $(MKDIR) -p $(@D) + $(RM) $@ $@.tmp + $(ECHO) Generating sun/misc/Version.java $(call profile_version_name, $@) + $(SED) -e 's/@@launcher_name@@/$(LAUNCHER_NAME)/g' \ + -e 's/@@java_version@@/$(RELEASE)/g' \ + -e 's/@@java_runtime_version@@/$(FULL_VERSION)/g' \ + -e 's/@@java_runtime_name@@/$(RUNTIME_NAME)/g' \ + -e 's/@@java_profile_name@@/$(call profile_version_name, $@)/g' \ + -e 's/@@java_distro_name@@/$(DISTRO_NAME)/g' \ + -e 's/@@java_distro_version@@/$(DISTRO_VERSION)/g' \ + $< > $@.tmp + $(MV) $@.tmp $@ + +GENSRC_MISC += $(JDK_OUTPUTDIR)/gensrc/sun/misc/Version.java \ + $(PROFILE_VERSION_JAVA_TARGETS) + +########################################################################################## +# Version file for jconsole + +$(JDK_OUTPUTDIR)/gensrc/sun/tools/jconsole/Version.java: \ + $(JDK_TOPDIR)/src/share/classes/sun/tools/jconsole/Version.java.template + $(MKDIR) -p $(@D) + $(RM) $@ $@.tmp + $(ECHO) $(LOG_INFO) Generating sun/tools/jconsole/Version.java + $(SED) -e 's/@@jconsole_version@@/$(FULL_VERSION)/g' $< > $@.tmp + $(MV) $@.tmp $@ + +GENSRC_MISC += $(JDK_OUTPUTDIR)/gensrc/sun/tools/jconsole/Version.java + +################################################################################ + +ifneq ($(filter $(TOOLCHAIN_TYPE), gcc clang), ) + # Need to specify language since the template file has a non standard + # extension. + CPP_FLAGS += -x c +else ifeq ($(TOOLCHAIN_TYPE), microsoft) + CPP_FLAGS += -nologo +endif + +# Generate a java source file from a template through the C preprocessor for the +# target system. First extract the copyright notice at the start of the file. +# Run the preprocessor. Filter out the default compiler stderr output on +# Windows. Filter out all the header files output. Remove all "PREFIX_" strings +# that were added to variable references in the template files to avoid being +# matched by the preprocessor. Remove any #line directives left by the +# preprocessor. +define generate-preproc-src + $(eval $(call MakeDir, $(@D))) + ( $(NAWK) '/@@END_COPYRIGHT@@/{exit}1' $< && \ + $(CPP) $(CPP_FLAGS) $(SYSROOT_CFLAGS) $< \ + | $(GREP) -v '^$( $@ +endef + +GENSRC_SOR_FILE += $(JDK_OUTPUTDIR)/gensrc/sun/nio/ch/SocketOptionRegistry.java + +$(GENSRC_SOR_FILE): \ + $(JDK_TOPDIR)/src/share/classes/sun/nio/ch/SocketOptionRegistry.java.template + $(generate-preproc-src) + +GENSRC_MISC += $(GENSRC_SOR_FILE) + +################################################################################ + +ifneq ($(OPENJDK_TARGET_OS), windows) + + GENSRC_UC_FILE := $(JDK_OUTPUTDIR)/gensrc/sun/nio/fs/UnixConstants.java + $(GENSRC_UC_FILE): \ + $(JDK_TOPDIR)/src/unix/classes/sun/nio/fs/UnixConstants.java.template + $(generate-preproc-src) + + GENSRC_MISC += $(GENSRC_UC_FILE) + +endif + +########################################################################################## + +ifeq ($(OPENJDK_TARGET_OS), solaris) + + GENSRC_SC_FILE := $(JDK_OUTPUTDIR)/gensrc/sun/nio/fs/SolarisConstants.java + + $(GENSRC_SC_FILE): \ + $(JDK_TOPDIR)/src/solaris/classes/sun/nio/fs/SolarisConstants.java.template + $(generate-preproc-src) + + GENSRC_MISC += $(GENSRC_SC_FILE) + +endif + +########################################################################################## + + ifeq ($(OPENJDK_TARGET_OS), windows) + + AB_GENSRC_DIR := $(JDK_OUTPUTDIR)/gensrc_ab + AB_SRC_DIR := $(JDK_TOPDIR)/src/windows/classes/com/sun/java/accessibility + + ifeq ($(OPENJDK_TARGET_CPU_BITS), 32) + $(AB_GENSRC_DIR)/32bit/com/sun/java/accessibility/AccessBridgeLoader.java: \ + $(AB_SRC_DIR)/32bit/AccessBridgeLoader.java + $(install-file) + + $(AB_GENSRC_DIR)/32bit/com/sun/java/accessibility/AccessBridge.java: \ + $(AB_SRC_DIR)/AccessBridge.java + $(install-file) + + $(AB_GENSRC_DIR)/legacy/com/sun/java/accessibility/AccessBridgeLoader.java: \ + $(AB_SRC_DIR)/legacy/AccessBridgeLoader.java + $(install-file) + + $(AB_GENSRC_DIR)/legacy/com/sun/java/accessibility/AccessBridge.java: \ + $(AB_SRC_DIR)/AccessBridge.java + $(install-file) + + GENSRC_MISC += $(AB_GENSRC_DIR)/32bit/com/sun/java/accessibility/AccessBridgeLoader.java \ + $(AB_GENSRC_DIR)/legacy/com/sun/java/accessibility/AccessBridgeLoader.java \ + $(AB_GENSRC_DIR)/32bit/com/sun/java/accessibility/AccessBridge.java \ + $(AB_GENSRC_DIR)/legacy/com/sun/java/accessibility/AccessBridge.java + + else + $(AB_GENSRC_DIR)/64bit/com/sun/java/accessibility/AccessBridgeLoader.java: \ + $(AB_SRC_DIR)/64bit/AccessBridgeLoader.java + $(install-file) + + $(AB_GENSRC_DIR)/64bit/com/sun/java/accessibility/AccessBridge.java: \ + $(AB_SRC_DIR)/AccessBridge.java + $(install-file) + + GENSRC_MISC += $(AB_GENSRC_DIR)/64bit/com/sun/java/accessibility/AccessBridgeLoader.java \ + $(AB_GENSRC_DIR)/64bit/com/sun/java/accessibility/AccessBridge.java + + endif + endif + +########################################################################################## diff --git a/jdk/make/lib/SoundLibraries.gmk b/jdk/make/lib/SoundLibraries.gmk index b59a9462ec5..8ce97dc8544 100644 --- a/jdk/make/lib/SoundLibraries.gmk +++ b/jdk/make/lib/SoundLibraries.gmk @@ -23,6 +23,12 @@ # questions. # +# +# This file has been modified by Loongson Technology in 2021. These +# modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made +# available on the same license terms set forth above. +# + LIBJSOUND_SRC_DIRS := \ $(JDK_TOPDIR)/src/share/native/com/sun/media/sound \ $(JDK_TOPDIR)/src/$(OPENJDK_TARGET_OS_API_DIR)/native/com/sun/media/sound @@ -136,6 +142,14 @@ else LIBJSOUND_CFLAGS += -DX_ARCH=X_PPC endif + ifeq ($(OPENJDK_TARGET_CPU), mips64) + LIBJSOUND_CFLAGS += -DX_ARCH=X_MIPS64 + endif + + ifeq ($(OPENJDK_TARGET_CPU), loongarch64) + LIBJSOUND_CFLAGS += -DX_ARCH=X_LOONGARCH64 + endif + ifeq ($(OPENJDK_TARGET_CPU), ppc64) LIBJSOUND_CFLAGS += -DX_ARCH=X_PPC64 endif diff --git a/jdk/src/share/classes/sun/misc/Version.java.template b/jdk/src/share/classes/sun/misc/Version.java.template index 9c65c022bc1..dd660bb7560 100644 --- a/jdk/src/share/classes/sun/misc/Version.java.template +++ b/jdk/src/share/classes/sun/misc/Version.java.template @@ -23,6 +23,13 @@ * questions. */ +/* + * This file has been modified by Loongson Technology in 2018. These + * modifications are Copyright (c) 2018 Loongson Technology, and are made + * available on the same license terms set forth above. + */ + + package sun.misc; import java.io.PrintStream; @@ -50,6 +57,9 @@ public class Version { private static final String java_distro_version = "@@java_distro_version@@"; + private static final String loongson_runtime_name = + "@@loongson_runtime_name@@"; + static { init(); } diff --git a/jdk/src/share/classes/sun/misc/Version.java.template.orig b/jdk/src/share/classes/sun/misc/Version.java.template.orig new file mode 100644 index 00000000000..9c65c022bc1 --- /dev/null +++ b/jdk/src/share/classes/sun/misc/Version.java.template.orig @@ -0,0 +1,367 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package sun.misc; +import java.io.PrintStream; + +public class Version { + + + private static final String launcher_name = + "@@launcher_name@@"; + + private static final String java_version = + "@@java_version@@"; + + private static final String java_runtime_name = + "@@java_runtime_name@@"; + + private static final String java_profile_name = + "@@java_profile_name@@"; + + private static final String java_runtime_version = + "@@java_runtime_version@@"; + + private static final String java_distro_name = + "@@java_distro_name@@"; + + private static final String java_distro_version = + "@@java_distro_version@@"; + + static { + init(); + } + + public static void init() { + System.setProperty("java.version", java_version); + System.setProperty("java.runtime.version", java_runtime_version); + System.setProperty("java.runtime.name", java_runtime_name); + } + + private static boolean versionsInitialized = false; + private static int jvm_major_version = 0; + private static int jvm_minor_version = 0; + private static int jvm_micro_version = 0; + private static int jvm_update_version = 0; + private static int jvm_build_number = 0; + private static String jvm_special_version = null; + private static int jdk_major_version = 0; + private static int jdk_minor_version = 0; + private static int jdk_micro_version = 0; + private static int jdk_update_version = 0; + private static int jdk_build_number = 0; + private static String jdk_special_version = null; + + /** + * In case you were wondering this method is called by java -version. + * Sad that it prints to stderr; would be nicer if default printed on + * stdout. + */ + public static void print() { + print(System.err); + } + + /** + * This is the same as print except that it adds an extra line-feed + * at the end, typically used by the -showversion in the launcher + */ + public static void println() { + print(System.err); + System.err.println(); + } + + /** + * Give a stream, it will print version info on it. + */ + public static void print(PrintStream ps) { + boolean isHeadless = false; + + /* Report that we're running headless if the property is true */ + String headless = System.getProperty("java.awt.headless"); + if ( (headless != null) && (headless.equalsIgnoreCase("true")) ) { + isHeadless = true; + } + + /* First line: platform version. */ + ps.println(launcher_name + " version \"" + java_version + "\""); + + /* Second line: runtime version (ie, libraries). */ + + ps.print(java_runtime_name + + " (" + java_distro_name + " " + java_distro_version + ")" + + " (build " + java_runtime_version); + + if (java_profile_name.length() > 0) { + // profile name + ps.print(", profile " + java_profile_name); + } + + if (java_runtime_name.indexOf("Embedded") != -1 && isHeadless) { + // embedded builds report headless state + ps.print(", headless"); + } + ps.println(')'); + + /* Third line: JVM information. */ + String java_vm_name = System.getProperty("java.vm.name"); + String java_vm_version = System.getProperty("java.vm.version"); + String java_vm_info = System.getProperty("java.vm.info"); + ps.println(java_vm_name + + " (" + java_distro_name + " " + java_distro_version + ")" + + " (build " + java_vm_version + ", " + + java_vm_info + ")"); + } + + + /** + * Returns the major version of the running JVM if it's 1.6 or newer + * or any RE VM build. It will return 0 if it's an internal 1.5 or + * 1.4.x build. + * + * @since 1.6 + */ + public static synchronized int jvmMajorVersion() { + if (!versionsInitialized) { + initVersions(); + } + return jvm_major_version; + } + + /** + * Returns the minor version of the running JVM if it's 1.6 or newer + * or any RE VM build. It will return 0 if it's an internal 1.5 or + * 1.4.x build. + * @since 1.6 + */ + public static synchronized int jvmMinorVersion() { + if (!versionsInitialized) { + initVersions(); + } + return jvm_minor_version; + } + + + /** + * Returns the micro version of the running JVM if it's 1.6 or newer + * or any RE VM build. It will return 0 if it's an internal 1.5 or + * 1.4.x build. + * @since 1.6 + */ + public static synchronized int jvmMicroVersion() { + if (!versionsInitialized) { + initVersions(); + } + return jvm_micro_version; + } + + /** + * Returns the update release version of the running JVM if it's + * a RE build. It will return 0 if it's an internal build. + * @since 1.6 + */ + public static synchronized int jvmUpdateVersion() { + if (!versionsInitialized) { + initVersions(); + } + return jvm_update_version; + } + + public static synchronized String jvmSpecialVersion() { + if (!versionsInitialized) { + initVersions(); + } + if (jvm_special_version == null) { + jvm_special_version = getJvmSpecialVersion(); + } + return jvm_special_version; + } + public static native String getJvmSpecialVersion(); + + /** + * Returns the build number of the running JVM if it's a RE build + * It will return 0 if it's an internal build. + * @since 1.6 + */ + public static synchronized int jvmBuildNumber() { + if (!versionsInitialized) { + initVersions(); + } + return jvm_build_number; + } + + /** + * Returns the major version of the running JDK. + * + * @since 1.6 + */ + public static synchronized int jdkMajorVersion() { + if (!versionsInitialized) { + initVersions(); + } + return jdk_major_version; + } + + /** + * Returns the minor version of the running JDK. + * @since 1.6 + */ + public static synchronized int jdkMinorVersion() { + if (!versionsInitialized) { + initVersions(); + } + return jdk_minor_version; + } + + /** + * Returns the micro version of the running JDK. + * @since 1.6 + */ + public static synchronized int jdkMicroVersion() { + if (!versionsInitialized) { + initVersions(); + } + return jdk_micro_version; + } + + /** + * Returns the update release version of the running JDK if it's + * a RE build. It will return 0 if it's an internal build. + * @since 1.6 + */ + public static synchronized int jdkUpdateVersion() { + if (!versionsInitialized) { + initVersions(); + } + return jdk_update_version; + } + + public static synchronized String jdkSpecialVersion() { + if (!versionsInitialized) { + initVersions(); + } + if (jdk_special_version == null) { + jdk_special_version = getJdkSpecialVersion(); + } + return jdk_special_version; + } + public static native String getJdkSpecialVersion(); + + /** + * Returns the build number of the running JDK if it's a RE build + * It will return 0 if it's an internal build. + * @since 1.6 + */ + public static synchronized int jdkBuildNumber() { + if (!versionsInitialized) { + initVersions(); + } + return jdk_build_number; + } + + // true if JVM exports the version info including the capabilities + private static boolean jvmVersionInfoAvailable; + private static synchronized void initVersions() { + if (versionsInitialized) { + return; + } + jvmVersionInfoAvailable = getJvmVersionInfo(); + if (!jvmVersionInfoAvailable) { + // parse java.vm.version for older JVM before the + // new JVM_GetVersionInfo is added. + // valid format of the version string is: + // n.n.n[_uu[c]][-]-bxx + CharSequence cs = System.getProperty("java.vm.version"); + if (cs.length() >= 5 && + Character.isDigit(cs.charAt(0)) && cs.charAt(1) == '.' && + Character.isDigit(cs.charAt(2)) && cs.charAt(3) == '.' && + Character.isDigit(cs.charAt(4))) { + jvm_major_version = Character.digit(cs.charAt(0), 10); + jvm_minor_version = Character.digit(cs.charAt(2), 10); + jvm_micro_version = Character.digit(cs.charAt(4), 10); + cs = cs.subSequence(5, cs.length()); + if (cs.charAt(0) == '_' && cs.length() >= 3) { + int nextChar = 0; + if (Character.isDigit(cs.charAt(1)) && + Character.isDigit(cs.charAt(2)) && + Character.isDigit(cs.charAt(3))) + { + nextChar = 4; + } else if (Character.isDigit(cs.charAt(1)) && + Character.isDigit(cs.charAt(2))) + { + nextChar = 3; + } + + try { + String uu = cs.subSequence(1, nextChar).toString(); + jvm_update_version = Integer.valueOf(uu).intValue(); + if (cs.length() >= nextChar + 1) { + char c = cs.charAt(nextChar); + if (c >= 'a' && c <= 'z') { + jvm_special_version = Character.toString(c); + nextChar++; + } + } + } catch (NumberFormatException e) { + // not conforming to the naming convention + return; + } + cs = cs.subSequence(nextChar, cs.length()); + } + if (cs.charAt(0) == '-') { + // skip the first character + // valid format: -bxx or bxx + // non-product VM will have -debug|-release appended + cs = cs.subSequence(1, cs.length()); + String[] res = cs.toString().split("-"); + for (String s : res) { + if (s.charAt(0) == 'b' && s.length() == 3 && + Character.isDigit(s.charAt(1)) && + Character.isDigit(s.charAt(2))) { + jvm_build_number = + Integer.valueOf(s.substring(1, 3)).intValue(); + break; + } + } + } + } + } + getJdkVersionInfo(); + versionsInitialized = true; + } + + // Gets the JVM version info if available and sets the jvm_*_version fields + // and its capabilities. + // + // Return false if not available which implies an old VM (Tiger or before). + private static native boolean getJvmVersionInfo(); + private static native void getJdkVersionInfo(); +} + +// Help Emacs a little because this file doesn't end in .java. +// +// Local Variables: *** +// mode: java *** +// End: *** diff --git a/jdk/src/solaris/bin/loongarch64/jvm.cfg b/jdk/src/solaris/bin/loongarch64/jvm.cfg new file mode 100644 index 00000000000..42a06755da8 --- /dev/null +++ b/jdk/src/solaris/bin/loongarch64/jvm.cfg @@ -0,0 +1,36 @@ +# Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. Oracle designates this +# particular file as subject to the "Classpath" exception as provided +# by Oracle in the LICENSE file that accompanied this code. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# +# +# List of JVMs that can be used as an option to java, javac, etc. +# Order is important -- first in this list is the default JVM. +# NOTE that this both this file and its format are UNSUPPORTED and +# WILL GO AWAY in a future release. +# +# You may also select a JVM in an arbitrary location with the +# "-XXaltjvm=" option, but that too is unsupported +# and may not be available in a future release. +# +-server KNOWN +-client IGNORE diff --git a/jdk/src/solaris/bin/mips64/jvm.cfg b/jdk/src/solaris/bin/mips64/jvm.cfg new file mode 100644 index 00000000000..42a06755da8 --- /dev/null +++ b/jdk/src/solaris/bin/mips64/jvm.cfg @@ -0,0 +1,36 @@ +# Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License version 2 only, as +# published by the Free Software Foundation. Oracle designates this +# particular file as subject to the "Classpath" exception as provided +# by Oracle in the LICENSE file that accompanied this code. +# +# This code is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# version 2 for more details (a copy is included in the LICENSE file that +# accompanied this code). +# +# You should have received a copy of the GNU General Public License version +# 2 along with this work; if not, write to the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +# or visit www.oracle.com if you need additional information or have any +# questions. +# +# +# List of JVMs that can be used as an option to java, javac, etc. +# Order is important -- first in this list is the default JVM. +# NOTE that this both this file and its format are UNSUPPORTED and +# WILL GO AWAY in a future release. +# +# You may also select a JVM in an arbitrary location with the +# "-XXaltjvm=" option, but that too is unsupported +# and may not be available in a future release. +# +-server KNOWN +-client IGNORE diff --git a/jdk/test/jdk/jfr/event/os/TestCPUInformation.java b/jdk/test/jdk/jfr/event/os/TestCPUInformation.java index 17c8419cbc0..a8b76cb71a8 100644 --- a/jdk/test/jdk/jfr/event/os/TestCPUInformation.java +++ b/jdk/test/jdk/jfr/event/os/TestCPUInformation.java @@ -54,8 +54,8 @@ public static void main(String[] args) throws Throwable { Events.assertField(event, "hwThreads").atLeast(1); Events.assertField(event, "cores").atLeast(1); Events.assertField(event, "sockets").atLeast(1); - Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390"); - Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390"); + Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch"); + Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch"); } } } diff --git a/jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher b/jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher new file mode 100755 index 00000000000..e69de29bb2d diff --git a/jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher b/jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher new file mode 100644 index 00000000000..e69de29bb2d diff --git a/jdk/test/sun/security/pkcs11/PKCS11Test.java b/jdk/test/sun/security/pkcs11/PKCS11Test.java index 70c4b1e4174..e3882bf5798 100644 --- a/jdk/test/sun/security/pkcs11/PKCS11Test.java +++ b/jdk/test/sun/security/pkcs11/PKCS11Test.java @@ -21,6 +21,11 @@ * questions. */ + /* + * This file has been modified by Loongson Technology in 2022, These + * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ // common infrastructure for SunPKCS11 tests @@ -596,6 +601,9 @@ boolean checkSupport(List supportedEC, "/usr/lib64/"}); osMap.put("Linux-ppc64-64", new String[]{"/usr/lib64/"}); osMap.put("Linux-ppc64le-64", new String[]{"/usr/lib64/"}); + osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"}); + osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/", + "/usr/lib64/" }); osMap.put("Windows-x86-32", new String[]{ PKCS11_BASE + "/nss/lib/windows-i586/".replace('/', SEP)}); osMap.put("Windows-amd64-64", new String[]{ diff --git a/jdk/test/sun/security/pkcs11/PKCS11Test.java.orig b/jdk/test/sun/security/pkcs11/PKCS11Test.java.orig new file mode 100644 index 00000000000..70c4b1e4174 --- /dev/null +++ b/jdk/test/sun/security/pkcs11/PKCS11Test.java.orig @@ -0,0 +1,704 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + + +// common infrastructure for SunPKCS11 tests + +import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.StringReader; +import java.lang.reflect.Constructor; +import java.nio.charset.StandardCharsets; +import java.security.AlgorithmParameters; +import java.security.InvalidAlgorithmParameterException; +import java.security.KeyPairGenerator; +import java.security.NoSuchProviderException; +import java.security.Provider; +import java.security.ProviderException; +import java.security.Security; + +import java.security.spec.ECGenParameterSpec; +import java.security.spec.ECParameterSpec; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.ServiceLoader; +import java.util.Set; + +public abstract class PKCS11Test { + + private boolean enableSM = false; + + static final Properties props = System.getProperties(); + + static final String PKCS11 = "PKCS11"; + + // directory of the test source + static final String BASE = System.getProperty("test.src", "."); + + static final char SEP = File.separatorChar; + + private static final String DEFAULT_POLICY = + BASE + SEP + ".." + SEP + "policy"; + + // directory corresponding to BASE in the /closed hierarchy + static final String CLOSED_BASE; + + static { + // hack + String absBase = new File(BASE).getAbsolutePath(); + int k = absBase.indexOf(SEP + "test" + SEP + "sun" + SEP); + if (k < 0) k = 0; + String p1 = absBase.substring(0, k + 6); + String p2 = absBase.substring(k + 5); + CLOSED_BASE = p1 + "closed" + p2; + + // set it as a system property to make it available in policy file + System.setProperty("closed.base", CLOSED_BASE); + } + + static String NSPR_PREFIX = ""; + + // NSS version info + public static enum ECCState { None, Basic, Extended }; + static double nss_version = -1; + static ECCState nss_ecc_status = ECCState.Extended; + + // The NSS library we need to search for in getNSSLibDir() + // Default is "libsoftokn3.so", listed as "softokn3" + // The other is "libnss3.so", listed as "nss3". + static String nss_library = "softokn3"; + + // NSS versions of each library. It is simplier to keep nss_version + // for quick checking for generic testing than many if-else statements. + static double softoken3_version = -1; + static double nss3_version = -1; + + static Provider getSunPKCS11(String config) throws Exception { + Class clazz = Class.forName("sun.security.pkcs11.SunPKCS11"); + Constructor cons = clazz.getConstructor(new Class[] {String.class}); + Object obj = cons.newInstance(new Object[] {config}); + return (Provider)obj; + } + + public abstract void main(Provider p) throws Exception; + + private void premain(Provider p) throws Exception { + // set a security manager and policy before a test case runs, + // and disable them after the test case finished + try { + if (enableSM) { + System.setSecurityManager(new SecurityManager()); + } + long start = System.currentTimeMillis(); + System.out.printf( + "Running test with provider %s (security manager %s) ...%n", + p.getName(), enableSM ? "enabled" : "disabled"); + main(p); + long stop = System.currentTimeMillis(); + System.out.println("Completed test with provider " + p.getName() + + " (" + (stop - start) + " ms)."); + } finally { + if (enableSM) { + System.setSecurityManager(null); + } + } + } + + public static void main(PKCS11Test test) throws Exception { + main(test, null); + } + + public static void main(PKCS11Test test, String[] args) throws Exception { + if (args != null) { + if (args.length > 0 && "sm".equals(args[0])) { + test.enableSM = true; + } + if (test.enableSM) { + System.setProperty("java.security.policy", + (args.length > 1) ? BASE + SEP + args[1] + : DEFAULT_POLICY); + } + } + + Provider[] oldProviders = Security.getProviders(); + try { + System.out.println("Beginning test run " + test.getClass().getName() + "..."); + testDefault(test); + testNSS(test); + testDeimos(test); + } finally { + // NOTE: Do not place a 'return' in any finally block + // as it will suppress exceptions and hide test failures. + Provider[] newProviders = Security.getProviders(); + boolean found = true; + // Do not restore providers if nothing changed. This is especailly + // useful for ./Provider/Login.sh, where a SecurityManager exists. + if (oldProviders.length == newProviders.length) { + found = false; + for (int i = 0; i + // Version: NSS + // Here, stands for NSS version. + static double getNSSInfo(String library) { + // look for two types of headers in NSS libraries + String nssHeader1 = "$Header: NSS"; + String nssHeader2 = "Version: NSS"; + boolean found = false; + String s = null; + int i = 0; + String libfile = ""; + + if (library.compareTo("softokn3") == 0 && softoken3_version > -1) + return softoken3_version; + if (library.compareTo("nss3") == 0 && nss3_version > -1) + return nss3_version; + + try { + String libdir = getNSSLibDir(); + if (libdir == null) { + return 0.0; + } + libfile = libdir + System.mapLibraryName(library); + try (FileInputStream is = new FileInputStream(libfile)) { + byte[] data = new byte[1000]; + int read = 0; + + while (is.available() > 0) { + if (read == 0) { + read = is.read(data, 0, 1000); + } else { + // Prepend last 100 bytes in case the header was split + // between the reads. + System.arraycopy(data, 900, data, 0, 100); + read = 100 + is.read(data, 100, 900); + } + + s = new String(data, 0, read, StandardCharsets.US_ASCII); + i = s.indexOf(nssHeader1); + if (i > 0 || (i = s.indexOf(nssHeader2)) > 0) { + found = true; + // If the nssHeader is before 920 we can break, otherwise + // we may not have the whole header so do another read. If + // no bytes are in the stream, that is ok, found is true. + if (i < 920) { + break; + } + } + } + } + } catch (Exception e) { + e.printStackTrace(); + } + + if (!found) { + System.out.println("lib" + library + + " version not found, set to 0.0: " + libfile); + nss_version = 0.0; + return nss_version; + } + + // the index after whitespace after nssHeader + int afterheader = s.indexOf("NSS", i) + 4; + String version = String.valueOf(s.charAt(afterheader)); + for (char c = s.charAt(++afterheader); + c == '.' || (c >= '0' && c <= '9'); + c = s.charAt(++afterheader)) { + version += c; + } + + // If a "dot dot" release, strip the extra dots for double parsing + String[] dot = version.split("\\."); + if (dot.length > 2) { + version = dot[0]+"."+dot[1]; + for (int j = 2; dot.length > j; j++) { + version += dot[j]; + } + } + + // Convert to double for easier version value checking + try { + nss_version = Double.parseDouble(version); + } catch (NumberFormatException e) { + System.out.println("===== Content start ====="); + System.out.println(s); + System.out.println("===== Content end ====="); + System.out.println("Failed to parse lib" + library + + " version. Set to 0.0"); + e.printStackTrace(); + } + + System.out.print("lib" + library + " version = "+version+". "); + + // Check for ECC + if (s.indexOf("Basic") > 0) { + nss_ecc_status = ECCState.Basic; + System.out.println("ECC Basic."); + } else if (s.indexOf("Extended") > 0) { + nss_ecc_status = ECCState.Extended; + System.out.println("ECC Extended."); + } else { + System.out.println("ECC None."); + } + + if (library.compareTo("softokn3") == 0) { + softoken3_version = nss_version; + } else if (library.compareTo("nss3") == 0) { + nss3_version = nss_version; + } + + return nss_version; + } + + // Used to set the nss_library file to search for libsoftokn3.so + public static void useNSS() { + nss_library = "nss3"; + } + + public static void testNSS(PKCS11Test test) throws Exception { + String libdir = getNSSLibDir(); + if (libdir == null) { + return; + } + String base = getBase(); + + if (loadNSPR(libdir) == false) { + return; + } + + String libfile = libdir + System.mapLibraryName(nss_library); + + String customDBdir = System.getProperty("CUSTOM_DB_DIR"); + String dbdir = (customDBdir != null) ? + customDBdir : + base + SEP + "nss" + SEP + "db"; + // NSS always wants forward slashes for the config path + dbdir = dbdir.replace('\\', '/'); + + String customConfig = System.getProperty("CUSTOM_P11_CONFIG"); + String customConfigName = System.getProperty("CUSTOM_P11_CONFIG_NAME", "p11-nss.txt"); + String p11config = (customConfig != null) ? + customConfig : + base + SEP + "nss" + SEP + customConfigName; + + System.setProperty("pkcs11test.nss.lib", libfile); + System.setProperty("pkcs11test.nss.db", dbdir); + Provider p = getSunPKCS11(p11config); + test.premain(p); + } + + // Generate a vector of supported elliptic curves of a given provider + static List getKnownCurves(Provider p) throws Exception { + int index; + int begin; + int end; + String curve; + + List results = new ArrayList<>(); + // Get Curves to test from SunEC. + String kcProp = Security.getProvider("SunEC"). + getProperty("AlgorithmParameters.EC SupportedCurves"); + + if (kcProp == null) { + throw new RuntimeException( + "\"AlgorithmParameters.EC SupportedCurves property\" not found"); + } + + System.out.println("Finding supported curves using list from SunEC\n"); + index = 0; + for (;;) { + // Each set of curve names is enclosed with brackets. + begin = kcProp.indexOf('[', index); + end = kcProp.indexOf(']', index); + if (begin == -1 || end == -1) { + break; + } + + /* + * Each name is separated by a comma. + * Just get the first name in the set. + */ + index = end + 1; + begin++; + end = kcProp.indexOf(',', begin); + if (end == -1) { + // Only one name in the set. + end = index -1; + } + + curve = kcProp.substring(begin, end); + + // Native ECDSA verification doesn't support curveSM2 + if ("curveSM2".equalsIgnoreCase(curve)) { + System.out.print("\t " + curve + ": Unsupported\n"); + continue; + } + + ECParameterSpec e = getECParameterSpec(p, curve); + System.out.print("\t "+ curve + ": "); + try { + KeyPairGenerator kpg = KeyPairGenerator.getInstance("EC", p); + kpg.initialize(e); + kpg.generateKeyPair(); + results.add(e); + System.out.println("Supported"); + } catch (ProviderException ex) { + System.out.println("Unsupported: PKCS11: " + + ex.getCause().getMessage()); + } catch (InvalidAlgorithmParameterException ex) { + System.out.println("Unsupported: Key Length: " + + ex.getMessage()); + } + } + + if (results.size() == 0) { + throw new RuntimeException("No supported EC curves found"); + } + + return results; + } + + private static ECParameterSpec getECParameterSpec(Provider p, String name) + throws Exception { + + AlgorithmParameters parameters = + AlgorithmParameters.getInstance("EC", p); + + parameters.init(new ECGenParameterSpec(name)); + + return parameters.getParameterSpec(ECParameterSpec.class); + } + + // Check support for a curve with a provided Vector of EC support + boolean checkSupport(List supportedEC, + ECParameterSpec curve) { + for (ECParameterSpec ec: supportedEC) { + if (ec.equals(curve)) { + return true; + } + } + return false; + } + + private static final Map osMap; + + // Location of the NSS libraries on each supported platform + static { + osMap = new HashMap<>(); + osMap.put("SunOS-sparc-32", new String[]{"/usr/lib/mps/"}); + osMap.put("SunOS-sparcv9-64", new String[]{"/usr/lib/mps/64/"}); + osMap.put("SunOS-x86-32", new String[]{"/usr/lib/mps/"}); + osMap.put("SunOS-amd64-64", new String[]{"/usr/lib/mps/64/"}); + osMap.put("Linux-i386-32", new String[]{ + "/usr/lib/i386-linux-gnu/", "/usr/lib32/", "/usr/lib/"}); + osMap.put("Linux-amd64-64", new String[]{ + "/usr/lib/x86_64-linux-gnu/", "/usr/lib/x86_64-linux-gnu/nss/", + "/usr/lib64/"}); + osMap.put("Linux-ppc64-64", new String[]{"/usr/lib64/"}); + osMap.put("Linux-ppc64le-64", new String[]{"/usr/lib64/"}); + osMap.put("Windows-x86-32", new String[]{ + PKCS11_BASE + "/nss/lib/windows-i586/".replace('/', SEP)}); + osMap.put("Windows-amd64-64", new String[]{ + PKCS11_BASE + "/nss/lib/windows-amd64/".replace('/', SEP)}); + osMap.put("MacOSX-x86_64-64", new String[]{ + PKCS11_BASE + "/nss/lib/macosx-x86_64/"}); + } + + private final static char[] hexDigits = "0123456789abcdef".toCharArray(); + + static final boolean badNSSVersion = + getNSSVersion() >= 3.11 && getNSSVersion() < 3.12; + + public static String toString(byte[] b) { + if (b == null) { + return "(null)"; + } + StringBuilder sb = new StringBuilder(b.length * 3); + for (int i = 0; i < b.length; i++) { + int k = b[i] & 0xff; + if (i != 0) { + sb.append(':'); + } + sb.append(hexDigits[k >>> 4]); + sb.append(hexDigits[k & 0xf]); + } + return sb.toString(); + } + + public static byte[] parse(String s) { + if (s.equals("(null)")) { + return null; + } + try { + int n = s.length(); + ByteArrayOutputStream out = new ByteArrayOutputStream(n / 3); + StringReader r = new StringReader(s); + while (true) { + int b1 = nextNibble(r); + if (b1 < 0) { + break; + } + int b2 = nextNibble(r); + if (b2 < 0) { + throw new RuntimeException("Invalid string " + s); + } + int b = (b1 << 4) | b2; + out.write(b); + } + return out.toByteArray(); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static int nextNibble(StringReader r) throws IOException { + while (true) { + int ch = r.read(); + if (ch == -1) { + return -1; + } else if ((ch >= '0') && (ch <= '9')) { + return ch - '0'; + } else if ((ch >= 'a') && (ch <= 'f')) { + return ch - 'a' + 10; + } else if ((ch >= 'A') && (ch <= 'F')) { + return ch - 'A' + 10; + } + } + } + + static byte[] generateData(int length) { + byte data[] = new byte[length]; + for (int i=0; i T[] concat(T[] a, T[] b) { + if ((b == null) || (b.length == 0)) { + return a; + } + T[] r = (T[])java.lang.reflect.Array.newInstance(a.getClass().getComponentType(), a.length + b.length); + System.arraycopy(a, 0, r, 0, a.length); + System.arraycopy(b, 0, r, a.length, b.length); + return r; + } + + /** + * Returns supported algorithms of specified type. + */ + static List getSupportedAlgorithms(String type, String alg, + Provider p) { + // prepare a list of supported algorithms + List algorithms = new ArrayList<>(); + Set services = p.getServices(); + for (Provider.Service service : services) { + if (service.getType().equals(type) + && service.getAlgorithm().startsWith(alg)) { + algorithms.add(service.getAlgorithm()); + } + } + return algorithms; + } + +}