From ed6b242d8b1c867a23ba904138eed8733d7a8c02 Mon Sep 17 00:00:00 2001
From: jeremiah <jeremiazhao@tencent.com>
Date: Thu, 18 Apr 2024 20:23:24 +0800
Subject: [PATCH] FEAT(loongarch): Support loongarch with 8u402

--story=117047250
---
 .../autoconf/build-aux/autoconf-config.guess  |     3 +
 common/autoconf/build-aux/autoconf-config.sub |     1 +
 common/autoconf/build-aux/config.guess        |    11 +
 common/autoconf/configure.ac                  |     7 +
 common/autoconf/generated-configure.sh        |    84 +
 common/autoconf/platform.m4                   |    71 +
 common/autoconf/spec.gmk.in                   |    23 +
 hotspot/agent/make/saenv.sh                   |    14 +
 .../agent/src/os/linux/LinuxDebuggerLocal.c   |    72 +-
 hotspot/agent/src/os/linux/Makefile           |    10 +-
 hotspot/agent/src/os/linux/libproc.h          |    15 +-
 hotspot/agent/src/os/linux/ps_proc.c          |     8 +-
 .../classes/sun/jvm/hotspot/HotSpotAgent.java |    12 +
 .../sun/jvm/hotspot/asm/Disassembler.java     |     6 +
 .../MachineDescriptionLOONGARCH64.java        |    41 +
 .../debugger/MachineDescriptionMIPS64.java    |    41 +
 .../debugger/linux/LinuxCDebugger.java        |    18 +
 .../linux/LinuxThreadContextFactory.java      |     6 +
 .../loongarch64/LinuxLOONGARCH64CFrame.java   |    80 +
 .../LinuxLOONGARCH64ThreadContext.java        |    47 +
 .../linux/mips64/LinuxMIPS64CFrame.java       |    80 +
 .../mips64/LinuxMIPS64ThreadContext.java      |    47 +
 .../loongarch64/LOONGARCH64ThreadContext.java |   123 +
 .../debugger/mips64/MIPS64ThreadContext.java  |   123 +
 .../hotspot/debugger/posix/elf/ELFHeader.java |     2 +
 .../debugger/proc/ProcDebuggerLocal.java      |    12 +
 .../loongarch64/ProcLOONGARCH64Thread.java    |    92 +
 .../ProcLOONGARCH64ThreadContext.java         |    47 +
 .../ProcLOONGARCH64ThreadFactory.java         |    45 +
 .../proc/mips64/ProcMIPS64Thread.java         |    92 +
 .../proc/mips64/ProcMIPS64ThreadContext.java  |    47 +
 .../proc/mips64/ProcMIPS64ThreadFactory.java  |    45 +
 .../debugger/remote/RemoteDebuggerClient.java |    12 +
 .../loongarch64/RemoteLOONGARCH64Thread.java  |    54 +
 .../RemoteLOONGARCH64ThreadContext.java       |    51 +
 .../RemoteLOONGARCH64ThreadFactory.java       |    45 +
 .../remote/mips64/RemoteMIPS64Thread.java     |    54 +
 .../mips64/RemoteMIPS64ThreadContext.java     |    51 +
 .../mips64/RemoteMIPS64ThreadFactory.java     |    45 +
 .../sun/jvm/hotspot/runtime/Threads.java      |     6 +
 .../LinuxLOONGARCH64JavaThreadPDAccess.java   |   133 +
 .../LinuxMIPS64JavaThreadPDAccess.java        |   132 +
 .../LOONGARCH64CurrentFrameGuess.java         |   217 +
 .../runtime/loongarch64/LOONGARCH64Frame.java |   534 +
 .../LOONGARCH64JavaCallWrapper.java           |    57 +
 .../loongarch64/LOONGARCH64RegisterMap.java   |    52 +
 .../mips64/MIPS64CurrentFrameGuess.java       |   217 +
 .../hotspot/runtime/mips64/MIPS64Frame.java   |   547 +
 .../runtime/mips64/MIPS64JavaCallWrapper.java |    57 +
 .../runtime/mips64/MIPS64RegisterMap.java     |    52 +
 .../jvm/hotspot/utilities/PlatformInfo.java   |    11 +
 hotspot/make/defs.make                        |    37 +-
 hotspot/make/linux/Makefile                   |     4 +
 hotspot/make/linux/makefiles/defs.make        |    56 +
 hotspot/make/linux/makefiles/gcc.make         |    11 +-
 hotspot/make/linux/makefiles/loongarch64.make |    43 +
 hotspot/make/linux/makefiles/mips64.make      |    43 +
 hotspot/make/linux/makefiles/sa.make          |     8 +
 hotspot/make/linux/makefiles/saproc.make      |    12 +
 hotspot/make/linux/makefiles/sparcWorks.make  |     7 +
 hotspot/make/linux/makefiles/vm.make          |    28 +
 hotspot/make/linux/platform_loongarch64       |    17 +
 hotspot/make/linux/platform_mips64            |    17 +
 hotspot/make/sa.files                         |    16 +
 .../aarch64/vm/c1_LIRAssembler_aarch64.cpp    |    17 +-
 .../aarch64/vm/c1_LIRGenerator_aarch64.cpp    |    19 +-
 .../cpu/loongarch/vm/assembler_loongarch.cpp  |   855 +
 .../cpu/loongarch/vm/assembler_loongarch.hpp  |  2810 ++++
 .../vm/assembler_loongarch.inline.hpp         |    33 +
 .../vm/bytecodeInterpreter_loongarch.hpp      |   110 +
 .../bytecodeInterpreter_loongarch.inline.hpp  |   286 +
 .../cpu/loongarch/vm/bytecodes_loongarch.cpp  |    38 +
 .../cpu/loongarch/vm/bytecodes_loongarch.hpp  |    31 +
 .../src/cpu/loongarch/vm/bytes_loongarch.hpp  |    75 +
 .../vm/c1_CodeStubs_loongarch_64.cpp          |   387 +
 .../cpu/loongarch/vm/c1_Defs_loongarch.hpp    |    79 +
 .../loongarch/vm/c1_FpuStackSim_loongarch.hpp |    32 +
 .../vm/c1_FpuStackSim_loongarch_64.cpp        |    31 +
 .../loongarch/vm/c1_FrameMap_loongarch.hpp    |   143 +
 .../loongarch/vm/c1_FrameMap_loongarch_64.cpp |   362 +
 .../vm/c1_LIRAssembler_loongarch.hpp          |    83 +
 .../vm/c1_LIRAssembler_loongarch_64.cpp       |  3377 ++++
 .../vm/c1_LIRGenerator_loongarch_64.cpp       |  1442 ++
 .../loongarch/vm/c1_LinearScan_loongarch.hpp  |    70 +
 .../vm/c1_LinearScan_loongarch_64.cpp         |    33 +
 .../vm/c1_MacroAssembler_loongarch.hpp        |   112 +
 .../vm/c1_MacroAssembler_loongarch_64.cpp     |   346 +
 .../loongarch/vm/c1_Runtime1_loongarch_64.cpp |  1252 ++
 .../cpu/loongarch/vm/c1_globals_loongarch.hpp |    69 +
 .../cpu/loongarch/vm/c2_globals_loongarch.hpp |    87 +
 .../cpu/loongarch/vm/c2_init_loongarch.cpp    |    34 +
 .../cpu/loongarch/vm/codeBuffer_loongarch.hpp |    35 +
 .../cpu/loongarch/vm/compiledIC_loongarch.cpp |   167 +
 .../src/cpu/loongarch/vm/copy_loongarch.hpp   |    90 +
 .../vm/cppInterpreterGenerator_loongarch.hpp  |    53 +
 .../loongarch/vm/cppInterpreter_loongarch.cpp |   215 +
 .../src/cpu/loongarch/vm/debug_loongarch.cpp  |    51 +
 .../cpu/loongarch/vm/depChecker_loongarch.cpp |    30 +
 .../cpu/loongarch/vm/depChecker_loongarch.hpp |    31 +
 .../loongarch/vm/disassembler_loongarch.hpp   |    37 +
 .../src/cpu/loongarch/vm/frame_loongarch.cpp  |   711 +
 .../src/cpu/loongarch/vm/frame_loongarch.hpp  |   229 +
 .../loongarch/vm/frame_loongarch.inline.hpp   |   312 +
 .../vm/globalDefinitions_loongarch.hpp        |    41 +
 .../cpu/loongarch/vm/globals_loongarch.hpp    |   103 +
 .../cpu/loongarch/vm/icBuffer_loongarch.cpp   |   101 +
 .../src/cpu/loongarch/vm/icache_loongarch.cpp |    42 +
 .../src/cpu/loongarch/vm/icache_loongarch.hpp |    41 +
 .../loongarch/vm/interp_masm_loongarch_64.cpp |  1960 +++
 .../loongarch/vm/interp_masm_loongarch_64.hpp |   269 +
 .../vm/interpreterGenerator_loongarch.hpp     |    51 +
 .../loongarch/vm/interpreterRT_loongarch.hpp  |    66 +
 .../vm/interpreterRT_loongarch_64.cpp         |   274 +
 .../loongarch/vm/interpreter_loongarch.hpp    |    50 +
 .../loongarch/vm/interpreter_loongarch_64.cpp |   277 +
 .../vm/javaFrameAnchor_loongarch.hpp          |    87 +
 .../vm/jniFastGetField_loongarch_64.cpp       |   169 +
 .../cpu/loongarch/vm/jniTypes_loongarch.hpp   |   144 +
 hotspot/src/cpu/loongarch/vm/jni_loongarch.h  |    51 +
 hotspot/src/cpu/loongarch/vm/loongarch.ad     |    24 +
 hotspot/src/cpu/loongarch/vm/loongarch_64.ad  | 12861 ++++++++++++++
 .../loongarch/vm/macroAssembler_loongarch.cpp |  3895 +++++
 .../loongarch/vm/macroAssembler_loongarch.hpp |   771 +
 .../vm/macroAssembler_loongarch.inline.hpp    |    34 +
 .../vm/metaspaceShared_loongarch_64.cpp       |   120 +
 .../loongarch/vm/methodHandles_loongarch.cpp  |   566 +
 .../loongarch/vm/methodHandles_loongarch.hpp  |    62 +
 .../cpu/loongarch/vm/nativeInst_loongarch.cpp |   485 +
 .../cpu/loongarch/vm/nativeInst_loongarch.hpp |   513 +
 .../loongarch/vm/registerMap_loongarch.hpp    |    45 +
 .../vm/register_definitions_loongarch.cpp     |   103 +
 .../cpu/loongarch/vm/register_loongarch.cpp   |    59 +
 .../cpu/loongarch/vm/register_loongarch.hpp   |   436 +
 .../cpu/loongarch/vm/relocInfo_loongarch.cpp  |   130 +
 .../cpu/loongarch/vm/relocInfo_loongarch.hpp  |    40 +
 .../cpu/loongarch/vm/runtime_loongarch_64.cpp |   199 +
 .../vm/sharedRuntime_loongarch_64.cpp         |  3453 ++++
 .../vm/stubGenerator_loongarch_64.cpp         |  3445 ++++
 .../vm/stubRoutines_loongarch_64.cpp          |   264 +
 .../vm/stubRoutines_loongarch_64.hpp          |    60 +
 ...templateInterpreterGenerator_loongarch.hpp |    35 +
 .../vm/templateInterpreter_loongarch.hpp      |    41 +
 .../vm/templateInterpreter_loongarch_64.cpp   |  2335 +++
 .../vm/templateTable_loongarch_64.cpp         |  4024 +++++
 .../vm/templateTable_loongarch_64.hpp         |    44 +
 .../cpu/loongarch/vm/vmStructs_loongarch.hpp  |    68 +
 .../loongarch/vm/vm_version_ext_loongarch.cpp |    84 +
 .../loongarch/vm/vm_version_ext_loongarch.hpp |    54 +
 .../cpu/loongarch/vm/vm_version_loongarch.cpp |   443 +
 .../cpu/loongarch/vm/vm_version_loongarch.hpp |   299 +
 .../src/cpu/loongarch/vm/vmreg_loongarch.cpp  |    51 +
 .../src/cpu/loongarch/vm/vmreg_loongarch.hpp  |    35 +
 .../loongarch/vm/vmreg_loongarch.inline.hpp   |    66 +
 .../loongarch/vm/vtableStubs_loongarch_64.cpp |   300 +
 hotspot/src/cpu/mips/vm/assembler_mips.cpp    |   774 +
 hotspot/src/cpu/mips/vm/assembler_mips.hpp    |  1789 ++
 .../src/cpu/mips/vm/assembler_mips.inline.hpp |    33 +
 .../cpu/mips/vm/bytecodeInterpreter_mips.cpp  |    53 +
 .../cpu/mips/vm/bytecodeInterpreter_mips.hpp  |   110 +
 .../vm/bytecodeInterpreter_mips.inline.hpp    |   286 +
 hotspot/src/cpu/mips/vm/bytecodes_mips.cpp    |    38 +
 hotspot/src/cpu/mips/vm/bytecodes_mips.hpp    |    31 +
 hotspot/src/cpu/mips/vm/bytes_mips.hpp        |   193 +
 hotspot/src/cpu/mips/vm/c2_globals_mips.hpp   |   100 +
 hotspot/src/cpu/mips/vm/c2_init_mips.cpp      |    34 +
 hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp   |    35 +
 hotspot/src/cpu/mips/vm/compiledIC_mips.cpp   |   173 +
 hotspot/src/cpu/mips/vm/copy_mips.hpp         |    90 +
 .../mips/vm/cppInterpreterGenerator_mips.hpp  |    53 +
 .../src/cpu/mips/vm/cppInterpreter_mips.cpp   |   215 +
 .../src/cpu/mips/vm/cppInterpreter_mips.hpp   |    40 +
 hotspot/src/cpu/mips/vm/debug_mips.cpp        |    51 +
 hotspot/src/cpu/mips/vm/depChecker_mips.cpp   |    30 +
 hotspot/src/cpu/mips/vm/depChecker_mips.hpp   |    31 +
 hotspot/src/cpu/mips/vm/disassembler_mips.hpp |    37 +
 hotspot/src/cpu/mips/vm/frame_mips.cpp        |   711 +
 hotspot/src/cpu/mips/vm/frame_mips.hpp        |   229 +
 hotspot/src/cpu/mips/vm/frame_mips.inline.hpp |   312 +
 .../cpu/mips/vm/globalDefinitions_mips.hpp    |    41 +
 hotspot/src/cpu/mips/vm/globals_mips.hpp      |   124 +
 hotspot/src/cpu/mips/vm/icBuffer_mips.cpp     |    97 +
 hotspot/src/cpu/mips/vm/icache_mips.cpp       |    41 +
 hotspot/src/cpu/mips/vm/icache_mips.hpp       |    41 +
 .../src/cpu/mips/vm/interp_masm_mips_64.cpp   |  2084 +++
 .../src/cpu/mips/vm/interp_masm_mips_64.hpp   |   269 +
 .../cpu/mips/vm/interpreterGenerator_mips.hpp |    49 +
 .../src/cpu/mips/vm/interpreterRT_mips.hpp    |    61 +
 .../src/cpu/mips/vm/interpreterRT_mips_64.cpp |   259 +
 hotspot/src/cpu/mips/vm/interpreter_mips.hpp  |    50 +
 .../src/cpu/mips/vm/interpreter_mips_64.cpp   |   286 +
 .../src/cpu/mips/vm/javaFrameAnchor_mips.hpp  |    87 +
 .../cpu/mips/vm/jniFastGetField_mips_64.cpp   |   172 +
 hotspot/src/cpu/mips/vm/jniTypes_mips.hpp     |   144 +
 hotspot/src/cpu/mips/vm/jni_mips.h            |    51 +
 .../src/cpu/mips/vm/macroAssembler_mips.cpp   |  4332 +++++
 .../src/cpu/mips/vm/macroAssembler_mips.hpp   |   701 +
 .../mips/vm/macroAssembler_mips.inline.hpp    |    34 +
 .../cpu/mips/vm/metaspaceShared_mips_64.cpp   |   123 +
 .../src/cpu/mips/vm/methodHandles_mips.cpp    |   576 +
 .../src/cpu/mips/vm/methodHandles_mips.hpp    |    62 +
 hotspot/src/cpu/mips/vm/mips.ad               |    25 +
 hotspot/src/cpu/mips/vm/mips_64.ad            | 14036 ++++++++++++++++
 hotspot/src/cpu/mips/vm/nativeInst_mips.cpp   |  1829 ++
 hotspot/src/cpu/mips/vm/nativeInst_mips.hpp   |   735 +
 hotspot/src/cpu/mips/vm/registerMap_mips.hpp  |    47 +
 .../cpu/mips/vm/register_definitions_mips.cpp |   103 +
 hotspot/src/cpu/mips/vm/register_mips.cpp     |    52 +
 hotspot/src/cpu/mips/vm/register_mips.hpp     |   346 +
 hotspot/src/cpu/mips/vm/relocInfo_mips.cpp    |   156 +
 hotspot/src/cpu/mips/vm/relocInfo_mips.hpp    |    40 +
 hotspot/src/cpu/mips/vm/runtime_mips_64.cpp   |   206 +
 .../src/cpu/mips/vm/sharedRuntime_mips_64.cpp |  3816 +++++
 .../src/cpu/mips/vm/stubGenerator_mips_64.cpp |  2147 +++
 .../src/cpu/mips/vm/stubRoutines_mips_64.cpp  |    35 +
 .../src/cpu/mips/vm/stubRoutines_mips_64.hpp  |    59 +
 .../vm/templateInterpreterGenerator_mips.hpp  |    35 +
 .../cpu/mips/vm/templateInterpreter_mips.hpp  |    41 +
 .../mips/vm/templateInterpreter_mips_64.cpp   |  2306 +++
 .../src/cpu/mips/vm/templateTable_mips.hpp    |    34 +
 .../src/cpu/mips/vm/templateTable_mips_64.cpp |  4623 +++++
 .../src/cpu/mips/vm/templateTable_mips_64.hpp |    44 +
 hotspot/src/cpu/mips/vm/vmStructs_mips.hpp    |    68 +
 .../src/cpu/mips/vm/vm_version_ext_mips.cpp   |    89 +
 .../src/cpu/mips/vm/vm_version_ext_mips.hpp   |    54 +
 hotspot/src/cpu/mips/vm/vm_version_mips.cpp   |   510 +
 hotspot/src/cpu/mips/vm/vm_version_mips.hpp   |   221 +
 hotspot/src/cpu/mips/vm/vmreg_mips.cpp        |    51 +
 hotspot/src/cpu/mips/vm/vmreg_mips.hpp        |    35 +
 hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp |    68 +
 .../src/cpu/mips/vm/vtableStubs_mips_64.cpp   |   301 +
 .../src/cpu/x86/vm/c1_LIRAssembler_x86.cpp    |    10 +
 .../src/cpu/x86/vm/c1_LIRGenerator_x86.cpp    |    21 +-
 hotspot/src/os/linux/vm/os_linux.cpp          |    32 +-
 hotspot/src/os/linux/vm/os_perf_linux.cpp     |     6 +
 .../vm/assembler_linux_loongarch.cpp          |    92 +
 .../vm/atomic_linux_loongarch.inline.hpp      |   206 +
 .../vm/bytes_linux_loongarch.inline.hpp       |    37 +
 .../vm/copy_linux_loongarch.inline.hpp        |   125 +
 .../vm/globals_linux_loongarch.hpp            |    43 +
 .../vm/orderAccess_linux_loongarch.inline.hpp |   115 +
 .../linux_loongarch/vm/os_linux_loongarch.cpp |   750 +
 .../linux_loongarch/vm/os_linux_loongarch.hpp |    39 +
 .../vm/prefetch_linux_loongarch.inline.hpp    |    56 +
 .../vm/threadLS_linux_loongarch.cpp           |    84 +
 .../vm/threadLS_linux_loongarch.hpp           |    61 +
 .../vm/thread_linux_loongarch.cpp             |    99 +
 .../vm/thread_linux_loongarch.hpp             |    75 +
 .../vm/vmStructs_linux_loongarch.hpp          |    55 +
 .../vm/vm_version_linux_loongarch.cpp         |    29 +
 .../linux_mips/vm/assembler_linux_mips.cpp    |   111 +
 .../vm/atomic_linux_mips.inline.hpp           |   258 +
 .../linux_mips/vm/bytes_linux_mips.inline.hpp |    37 +
 .../linux_mips/vm/copy_linux_mips.inline.hpp  |   125 +
 .../linux_mips/vm/globals_linux_mips.hpp      |    51 +
 .../src/os_cpu/linux_mips/vm/linux_mips.ad    |   153 +
 hotspot/src/os_cpu/linux_mips/vm/linux_mips.s |    25 +
 .../src/os_cpu/linux_mips/vm/linux_mips_64.ad |    50 +
 .../vm/orderAccess_linux_mips.inline.hpp      |   115 +
 .../os_cpu/linux_mips/vm/os_linux_mips.cpp    |  1015 ++
 .../os_cpu/linux_mips/vm/os_linux_mips.hpp    |    39 +
 .../vm/prefetch_linux_mips.inline.hpp         |    58 +
 .../linux_mips/vm/threadLS_linux_mips.cpp     |    84 +
 .../linux_mips/vm/threadLS_linux_mips.hpp     |    61 +
 .../linux_mips/vm/thread_linux_mips.cpp       |    99 +
 .../linux_mips/vm/thread_linux_mips.hpp       |    75 +
 .../linux_mips/vm/vmStructs_linux_mips.hpp    |    55 +
 .../linux_mips/vm/vm_version_linux_mips.cpp   |    28 +
 hotspot/src/share/tools/hsdis/Makefile        |    13 +
 hotspot/src/share/tools/hsdis/hsdis.c         |    10 +
 hotspot/src/share/vm/adlc/main.cpp            |    14 +
 hotspot/src/share/vm/asm/assembler.hpp        |    20 +
 hotspot/src/share/vm/asm/assembler.inline.hpp |    12 +
 hotspot/src/share/vm/asm/codeBuffer.cpp       |     7 +
 hotspot/src/share/vm/asm/codeBuffer.hpp       |    12 +
 hotspot/src/share/vm/asm/macroAssembler.hpp   |    13 +-
 .../share/vm/asm/macroAssembler.inline.hpp    |    12 +
 hotspot/src/share/vm/asm/register.hpp         |    12 +
 hotspot/src/share/vm/c1/c1_Defs.hpp           |    12 +
 hotspot/src/share/vm/c1/c1_FpuStackSim.hpp    |     9 +
 hotspot/src/share/vm/c1/c1_FrameMap.cpp       |     9 +
 hotspot/src/share/vm/c1/c1_FrameMap.hpp       |     9 +
 hotspot/src/share/vm/c1/c1_LIR.cpp            |   220 +-
 hotspot/src/share/vm/c1/c1_LIR.hpp            |   199 +-
 hotspot/src/share/vm/c1/c1_LIRAssembler.cpp   |    22 +
 hotspot/src/share/vm/c1/c1_LIRAssembler.hpp   |    12 +
 hotspot/src/share/vm/c1/c1_LIRGenerator.cpp   |   114 +-
 hotspot/src/share/vm/c1/c1_LIRGenerator.hpp   |    17 +-
 hotspot/src/share/vm/c1/c1_LinearScan.cpp     |    81 +-
 hotspot/src/share/vm/c1/c1_LinearScan.hpp     |     9 +
 hotspot/src/share/vm/c1/c1_MacroAssembler.hpp |     9 +
 hotspot/src/share/vm/c1/c1_Runtime1.cpp       |    48 +
 hotspot/src/share/vm/c1/c1_globals.hpp        |     9 +
 .../share/vm/classfile/bytecodeAssembler.cpp  |    12 +
 .../share/vm/classfile/classFileStream.hpp    |    12 +
 .../src/share/vm/classfile/stackMapTable.hpp  |    12 +
 hotspot/src/share/vm/classfile/verifier.cpp   |    12 +
 hotspot/src/share/vm/code/codeBlob.cpp        |    12 +
 hotspot/src/share/vm/code/compiledIC.hpp      |    12 +
 hotspot/src/share/vm/code/relocInfo.hpp       |    45 +-
 hotspot/src/share/vm/code/vmreg.hpp           |    18 +
 .../src/share/vm/compiler/disassembler.cpp    |    12 +
 .../src/share/vm/compiler/disassembler.hpp    |    12 +
 .../parallelScavenge/cardTableExtension.hpp   |     3 +
 .../parallelScavenge/parMarkBitMap.cpp        |     3 +
 .../psCompactionManager.inline.hpp            |     6 +
 .../parallelScavenge/psParallelCompact.cpp    |     9 +
 .../parallelScavenge/psParallelCompact.hpp    |     6 +
 .../psPromotionManager.inline.hpp             |    24 +-
 .../parallelScavenge/psScavenge.inline.hpp    |    21 +-
 .../vm/interpreter/abstractInterpreter.hpp    |    10 +
 hotspot/src/share/vm/interpreter/bytecode.hpp |    12 +
 .../vm/interpreter/bytecodeInterpreter.hpp    |    15 +
 .../bytecodeInterpreter.inline.hpp            |    12 +
 .../share/vm/interpreter/bytecodeStream.hpp   |    12 +
 .../src/share/vm/interpreter/bytecodes.cpp    |    12 +
 .../src/share/vm/interpreter/bytecodes.hpp    |    12 +
 .../share/vm/interpreter/cppInterpreter.hpp   |    12 +
 .../interpreter/cppInterpreterGenerator.hpp   |    12 +
 .../src/share/vm/interpreter/interpreter.hpp  |    12 +
 .../vm/interpreter/interpreterGenerator.hpp   |    12 +
 .../vm/interpreter/interpreterRuntime.cpp     |    14 +-
 .../vm/interpreter/interpreterRuntime.hpp     |    14 +-
 .../vm/interpreter/templateInterpreter.hpp    |    12 +
 .../templateInterpreterGenerator.hpp          |    12 +
 .../share/vm/interpreter/templateTable.hpp    |    14 +
 .../share/vm/jfr/utilities/jfrBigEndian.hpp   |     2 +-
 .../src/share/vm/jfr/writers/jfrEncoders.hpp  |    12 +
 hotspot/src/share/vm/memory/barrierSet.hpp    |    25 +-
 .../src/share/vm/memory/cardTableModRefBS.hpp |    12 +-
 hotspot/src/share/vm/memory/cardTableRS.cpp   |    10 +-
 hotspot/src/share/vm/memory/cardTableRS.hpp   |     9 +-
 hotspot/src/share/vm/memory/metaspace.cpp     |    13 +-
 hotspot/src/share/vm/oops/constantPool.hpp    |    13 +
 hotspot/src/share/vm/oops/klass.hpp           |    17 +-
 hotspot/src/share/vm/oops/oop.hpp             |     8 +-
 hotspot/src/share/vm/oops/oop.inline.hpp      |    12 +
 hotspot/src/share/vm/oops/oop.pcgc.inline.hpp |     8 +-
 hotspot/src/share/vm/opto/buildOopMap.cpp     |    12 +
 hotspot/src/share/vm/opto/bytecodeInfo.cpp    |    11 +
 hotspot/src/share/vm/opto/c2_globals.hpp      |    12 +
 hotspot/src/share/vm/opto/c2compiler.cpp      |    10 +
 hotspot/src/share/vm/opto/chaitin.hpp         |    14 +
 hotspot/src/share/vm/opto/compile.cpp         |    10 +
 hotspot/src/share/vm/opto/compile.hpp         |     2 +-
 hotspot/src/share/vm/opto/gcm.cpp             |    10 +
 hotspot/src/share/vm/opto/lcm.cpp             |    10 +
 hotspot/src/share/vm/opto/locknode.hpp        |    10 +
 hotspot/src/share/vm/opto/matcher.cpp         |    10 +
 hotspot/src/share/vm/opto/output.cpp          |    43 +
 hotspot/src/share/vm/opto/output.hpp          |    10 +
 hotspot/src/share/vm/opto/regmask.cpp         |    10 +
 hotspot/src/share/vm/opto/regmask.hpp         |    10 +
 hotspot/src/share/vm/opto/runtime.cpp         |    10 +
 hotspot/src/share/vm/opto/type.cpp            |    16 +
 hotspot/src/share/vm/prims/jniCheck.cpp       |    12 +
 hotspot/src/share/vm/prims/jni_md.h           |    12 +
 .../vm/prims/jvmtiClassFileReconstituter.cpp  |    12 +
 hotspot/src/share/vm/prims/methodHandles.hpp  |    13 +
 .../src/share/vm/runtime/atomic.inline.hpp    |    12 +
 .../src/share/vm/runtime/deoptimization.cpp   |    18 +
 hotspot/src/share/vm/runtime/dtraceJSDT.hpp   |    12 +
 hotspot/src/share/vm/runtime/frame.cpp        |    13 +
 hotspot/src/share/vm/runtime/frame.hpp        |    16 +
 hotspot/src/share/vm/runtime/frame.inline.hpp |    18 +
 hotspot/src/share/vm/runtime/globals.hpp      |    26 +-
 hotspot/src/share/vm/runtime/icache.hpp       |    13 +-
 hotspot/src/share/vm/runtime/java.cpp         |    12 +
 hotspot/src/share/vm/runtime/javaCalls.hpp    |    12 +
 .../src/share/vm/runtime/javaFrameAnchor.hpp  |    12 +
 hotspot/src/share/vm/runtime/os.cpp           |     3 +-
 hotspot/src/share/vm/runtime/os.hpp           |    12 +
 .../src/share/vm/runtime/prefetch.inline.hpp  |     6 +
 hotspot/src/share/vm/runtime/registerMap.hpp  |    18 +
 hotspot/src/share/vm/runtime/relocator.hpp    |    12 +
 hotspot/src/share/vm/runtime/safepoint.cpp    |    14 +
 .../src/share/vm/runtime/sharedRuntime.cpp    |    16 +-
 .../share/vm/runtime/sharedRuntimeTrig.cpp    |    15 +
 .../share/vm/runtime/stackValueCollection.cpp |    12 +
 hotspot/src/share/vm/runtime/statSampler.cpp  |    12 +
 hotspot/src/share/vm/runtime/stubRoutines.hpp |    16 +
 hotspot/src/share/vm/runtime/thread.cpp       |     6 +
 hotspot/src/share/vm/runtime/thread.hpp       |    12 +
 .../share/vm/runtime/threadLocalStorage.hpp   |    12 +
 hotspot/src/share/vm/runtime/virtualspace.cpp |    16 +
 hotspot/src/share/vm/runtime/vmStructs.cpp    |    22 +
 hotspot/src/share/vm/runtime/vm_version.cpp   |    20 +
 hotspot/src/share/vm/utilities/copy.hpp       |    13 +
 hotspot/src/share/vm/utilities/debug.cpp      |     1 +
 .../share/vm/utilities/globalDefinitions.hpp  |    12 +
 hotspot/src/share/vm/utilities/macros.hpp     |    30 +
 hotspot/src/share/vm/utilities/taskqueue.hpp  |    74 +-
 hotspot/src/share/vm/utilities/vmError.cpp    |    14 +-
 .../argumentcorruption/Test8167409.sh         |    18 +
 .../testcases/GenericTestCaseForOtherCPU.java |     5 +-
 .../sha/predicate/IntrinsicPredicates.java    |    10 +-
 hotspot/test/runtime/6929067/Test6929067.sh   |     4 +
 hotspot/test/runtime/Unsafe/RangeCheck.java   |     1 +
 hotspot/test/test_env.sh                      |    23 +
 .../com/oracle/java/testlibrary/Platform.java |     8 +
 ...stMutuallyExclusivePlatformPredicates.java |     2 +-
 jdk/make/Images.gmk                           |    20 +
 jdk/make/gensrc/GensrcMisc.gmk                |     7 +
 jdk/make/gensrc/GensrcMisc.gmk.orig           |   172 +
 jdk/make/lib/SoundLibraries.gmk               |    14 +
 .../classes/sun/misc/Version.java.template    |    10 +
 .../sun/misc/Version.java.template.orig       |   367 +
 jdk/src/solaris/bin/loongarch64/jvm.cfg       |    36 +
 jdk/src/solaris/bin/mips64/jvm.cfg            |    36 +
 .../jdk/jfr/event/os/TestCPUInformation.java  |     4 +-
 .../bootstrap/linux-loongarch64/launcher      |     0
 .../bootstrap/linux-mips64el/launcher         |     0
 jdk/test/sun/security/pkcs11/PKCS11Test.java  |     8 +
 .../sun/security/pkcs11/PKCS11Test.java.orig  |   704 +
 413 files changed, 112423 insertions(+), 205 deletions(-)
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
 create mode 100644 hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
 create mode 100644 hotspot/make/linux/makefiles/loongarch64.make
 create mode 100644 hotspot/make/linux/makefiles/mips64.make
 create mode 100644 hotspot/make/linux/platform_loongarch64
 create mode 100644 hotspot/make/linux/platform_mips64
 create mode 100644 hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/jni_loongarch.h
 create mode 100644 hotspot/src/cpu/loongarch/vm/loongarch.ad
 create mode 100644 hotspot/src/cpu/loongarch/vm/loongarch_64.ad
 create mode 100644 hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/register_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/register_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp
 create mode 100644 hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/assembler_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/assembler_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/bytecodes_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/bytecodes_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/bytes_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/c2_globals_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/c2_init_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/compiledIC_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/copy_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/debug_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/depChecker_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/depChecker_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/disassembler_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/frame_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/frame_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/frame_mips.inline.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/globals_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/icBuffer_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/icache_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/icache_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/interpreter_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/jniTypes_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/jni_mips.h
 create mode 100644 hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/methodHandles_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/methodHandles_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/mips.ad
 create mode 100644 hotspot/src/cpu/mips/vm/mips_64.ad
 create mode 100644 hotspot/src/cpu/mips/vm/nativeInst_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/nativeInst_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/registerMap_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/register_definitions_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/register_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/register_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/relocInfo_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/relocInfo_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/runtime_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/templateTable_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/vmStructs_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/vm_version_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/vm_version_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/vmreg_mips.cpp
 create mode 100644 hotspot/src/cpu/mips/vm/vmreg_mips.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp
 create mode 100644 hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp
 create mode 100644 hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/linux_mips.s
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp
 create mode 100644 hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp
 create mode 100644 jdk/make/gensrc/GensrcMisc.gmk.orig
 create mode 100644 jdk/src/share/classes/sun/misc/Version.java.template.orig
 create mode 100644 jdk/src/solaris/bin/loongarch64/jvm.cfg
 create mode 100644 jdk/src/solaris/bin/mips64/jvm.cfg
 create mode 100755 jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher
 create mode 100644 jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher
 create mode 100644 jdk/test/sun/security/pkcs11/PKCS11Test.java.orig

diff --git a/common/autoconf/build-aux/autoconf-config.guess b/common/autoconf/build-aux/autoconf-config.guess
index 15ee4389269..3d7555b52d3 100644
--- a/common/autoconf/build-aux/autoconf-config.guess
+++ b/common/autoconf/build-aux/autoconf-config.guess
@@ -977,6 +977,9 @@ EOF
 	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
 	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
 	;;
+    loongarch64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     or32:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
 	exit ;;
diff --git a/common/autoconf/build-aux/autoconf-config.sub b/common/autoconf/build-aux/autoconf-config.sub
index 1aab2b303e3..bd910bddbe1 100644
--- a/common/autoconf/build-aux/autoconf-config.sub
+++ b/common/autoconf/build-aux/autoconf-config.sub
@@ -275,6 +275,7 @@ case $basic_machine in
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
 	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
+	| loongarch | loongarch64 \
 	| m32c | m32r | m32rle | m68000 | m68k | m88k \
 	| maxq | mb | microblaze | mcore | mep \
 	| mips | mipsbe | mipseb | mipsel | mipsle \
diff --git a/common/autoconf/build-aux/config.guess b/common/autoconf/build-aux/config.guess
index 355c91e4ebb..d03d029ce39 100644
--- a/common/autoconf/build-aux/config.guess
+++ b/common/autoconf/build-aux/config.guess
@@ -86,4 +86,15 @@ if [ "x$OUT" = x ]; then
   fi
 fi
 
+# Test and fix little endian MIPS.
+if [ "x$OUT" = x ]; then
+  if [ `uname -s` = Linux ]; then
+    if [ `uname -m` = mipsel ]; then
+      OUT=mipsel-unknown-linux-gnu
+    elif [ `uname -m` = mips64el ]; then
+      OUT=mips64el-unknown-linux-gnu
+    fi
+  fi
+fi
+
 echo $OUT
diff --git a/common/autoconf/configure.ac b/common/autoconf/configure.ac
index 151e5a109f8..5072409dd4c 100644
--- a/common/autoconf/configure.ac
+++ b/common/autoconf/configure.ac
@@ -23,6 +23,12 @@
 # questions.
 #
 
+#
+# This file has been modified by Loongson Technology in 2018. These
+# modifications are Copyright (c) 2018 Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 ###############################################################################
 #
 # Includes and boilerplate
@@ -186,6 +192,7 @@ FLAGS_SETUP_INIT_FLAGS
 # Now we can test some aspects on the target using configure macros.
 PLATFORM_SETUP_OPENJDK_TARGET_BITS
 PLATFORM_SETUP_OPENJDK_TARGET_ENDIANNESS
+GET_BUILDER_AND_HOST_DATA
 
 # Configure flags for the tools
 FLAGS_SETUP_COMPILER_FLAGS_FOR_LIBS
diff --git a/common/autoconf/generated-configure.sh b/common/autoconf/generated-configure.sh
index a89cb30f373..19b18ece406 100644
--- a/common/autoconf/generated-configure.sh
+++ b/common/autoconf/generated-configure.sh
@@ -716,6 +716,9 @@ SET_EXECUTABLE_ORIGIN
 SHARED_LIBRARY_FLAGS
 CXX_FLAG_REORDER
 C_FLAG_REORDER
+HOST_NAME
+BUILDER_NAME
+BUILDER_ID
 SYSROOT_LDFLAGS
 SYSROOT_CFLAGS
 RC_FLAGS
@@ -4069,6 +4072,12 @@ fi
 # questions.
 #
 
+#
+# This file has been modified by Loongson Technology in 2022. These
+# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD.
 # Converts autoconf style CPU name to OpenJDK style, into
 # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN.
@@ -13918,6 +13927,18 @@ test -n "$target_alias" &&
       VAR_CPU_BITS=64
       VAR_CPU_ENDIAN=big
       ;;
+    mips64el)
+      VAR_CPU=mips64
+      VAR_CPU_ARCH=mips
+      VAR_CPU_BITS=64
+      VAR_CPU_ENDIAN=little
+      ;;
+    loongarch64)
+      VAR_CPU=loongarch64
+      VAR_CPU_ARCH=loongarch
+      VAR_CPU_BITS=64
+      VAR_CPU_ENDIAN=little
+      ;;
     *)
       as_fn_error $? "unsupported cpu $build_cpu" "$LINENO" 5
       ;;
@@ -14056,6 +14077,18 @@ $as_echo "$OPENJDK_BUILD_OS-$OPENJDK_BUILD_CPU" >&6; }
       VAR_CPU_BITS=64
       VAR_CPU_ENDIAN=big
       ;;
+    mips64el)
+      VAR_CPU=mips64
+      VAR_CPU_ARCH=mips
+      VAR_CPU_BITS=64
+      VAR_CPU_ENDIAN=little
+      ;;
+    loongarch64)
+      VAR_CPU=loongarch64
+      VAR_CPU_ARCH=loongarch
+      VAR_CPU_BITS=64
+      VAR_CPU_ENDIAN=little
+      ;;
     *)
       as_fn_error $? "unsupported cpu $host_cpu" "$LINENO" 5
       ;;
@@ -14178,6 +14211,8 @@ $as_echo "$COMPILE_TYPE" >&6; }
     OPENJDK_TARGET_CPU_LEGACY_LIB="i386"
   elif test "x$OPENJDK_TARGET_CPU" = xx86_64; then
     OPENJDK_TARGET_CPU_LEGACY_LIB="amd64"
+  elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
+    OPENJDK_TARGET_CPU_LEGACY_LIB="mips64el"
   fi
 
 
@@ -14211,6 +14246,9 @@ $as_echo "$COMPILE_TYPE" >&6; }
   elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then
     # On all platforms except macosx, we replace x86_64 with amd64.
     OPENJDK_TARGET_CPU_OSARCH="amd64"
+  elif test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
+    # System.getProperty("os.arch"): mips64 -> mips64el
+    OPENJDK_TARGET_CPU_OSARCH="mips64el"
   fi
 
 
@@ -14220,6 +14258,8 @@ $as_echo "$COMPILE_TYPE" >&6; }
   elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then
     # On all platforms except macosx, we replace x86_64 with amd64.
     OPENJDK_TARGET_CPU_JLI="amd64"
+  elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
+    OPENJDK_TARGET_CPU_JLI="mips64el"
   fi
   # Now setup the -D flags for building libjli.
   OPENJDK_TARGET_CPU_JLI_CFLAGS="-DLIBARCHNAME='\"$OPENJDK_TARGET_CPU_JLI\"'"
@@ -14232,6 +14272,9 @@ $as_echo "$COMPILE_TYPE" >&6; }
   elif test "x$OPENJDK_TARGET_OS" = xmacosx && test "x$TOOLCHAIN_TYPE" = xclang ; then
     OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -stdlib=libc++ -mmacosx-version-min=\$(MACOSX_VERSION_MIN)"
   fi
+  if test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
+    OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -DLIBARCH32NAME='\"mips32el\"' -DLIBARCH64NAME='\"mips64el\"'"
+  fi
 
 
   # Setup OPENJDK_TARGET_OS_API_DIR, used in source paths.
@@ -42412,6 +42455,47 @@ $as_echo "$ac_cv_c_bigendian" >&6; }
   fi
 
 
+BUILDER_NAME="$build_os"
+BUILDER_ID="Custom build ($(date))"
+if test -f /etc/issue; then
+  etc_issue_info=`cat /etc/issue`
+  if test -n "$etc_issue_info"; then
+    BUILDER_NAME=`cat /etc/issue | head -n 1 | cut -d " " -f 1`
+  fi
+fi
+if test -f /etc/redhat-release; then
+  etc_issue_info=`cat /etc/redhat-release`
+  if test -n "$etc_issue_info"; then
+    BUILDER_NAME=`cat /etc/redhat-release | head -n 1 | cut -d " " -f 1`
+  fi
+fi
+if test -f /etc/neokylin-release; then
+  etc_issue_info=`cat /etc/neokylin-release`
+  if test -n "$etc_issue_info"; then
+    BUILDER_NAME=`cat /etc/neokylin-release | head -n 1 | cut -d " " -f 1`
+  fi
+fi
+if test -z "$BUILDER_NAME"; then
+  BUILDER_NAME="unknown"
+fi
+BUILDER_NAME=`echo $BUILDER_NAME | sed -r "s/-//g"`
+if test -n "$OPENJDK_TARGET_CPU_OSARCH"; then
+  HOST_NAME="$OPENJDK_TARGET_CPU_OSARCH"
+else
+  HOST_NAME="unknown"
+fi
+if test -f "/usr/bin/cpp"; then
+  # gcc_with_arch_info=`gcc -v 2>&1 | grep '\-\-with-arch=' | sed 's/.*--with-arch=//;s/ .*$//'`
+  gcc_with_arch_info=`cpp -dM /dev/null | grep '\<_MIPS_ARCH\>' | sed 's/^#define _MIPS_ARCH "//;s/"$//'`
+  if test -n "$gcc_with_arch_info"; then
+    HOST_NAME="$gcc_with_arch_info"
+  fi
+fi
+
+
+
+
+
 # Configure flags for the tools
 
   ###############################################################################
diff --git a/common/autoconf/platform.m4 b/common/autoconf/platform.m4
index 51df988f619..51cc28c312f 100644
--- a/common/autoconf/platform.m4
+++ b/common/autoconf/platform.m4
@@ -23,6 +23,12 @@
 # questions.
 #
 
+#
+# This file has been modified by Loongson Technology in 2022. These
+# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD.
 # Converts autoconf style CPU name to OpenJDK style, into
 # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN.
@@ -96,6 +102,18 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU],
       VAR_CPU_BITS=64
       VAR_CPU_ENDIAN=big
       ;;
+    mips64el)
+      VAR_CPU=mips64
+      VAR_CPU_ARCH=mips
+      VAR_CPU_BITS=64
+      VAR_CPU_ENDIAN=little
+      ;;
+    loongarch64)
+      VAR_CPU=loongarch64
+      VAR_CPU_ARCH=loongarch
+      VAR_CPU_BITS=64
+      VAR_CPU_ENDIAN=little
+      ;;
     *)
       AC_MSG_ERROR([unsupported cpu $1])
       ;;
@@ -283,6 +301,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS],
     OPENJDK_TARGET_CPU_LEGACY_LIB="i386"
   elif test "x$OPENJDK_TARGET_CPU" = xx86_64; then
     OPENJDK_TARGET_CPU_LEGACY_LIB="amd64"
+  elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
+    OPENJDK_TARGET_CPU_LEGACY_LIB="mips64el"
   fi
   AC_SUBST(OPENJDK_TARGET_CPU_LEGACY_LIB)
 
@@ -316,6 +336,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS],
   elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then
     # On all platforms except macosx, we replace x86_64 with amd64.
     OPENJDK_TARGET_CPU_OSARCH="amd64"
+  elif test "x$OPENJDK_TARGET_OS" = xlinux && test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
+    # System.getProperty("os.arch"): mips64 -> mips64el
+    OPENJDK_TARGET_CPU_OSARCH="mips64el"
   fi
   AC_SUBST(OPENJDK_TARGET_CPU_OSARCH)
 
@@ -325,6 +348,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS],
   elif test "x$OPENJDK_TARGET_OS" != xmacosx && test "x$OPENJDK_TARGET_CPU" = xx86_64; then
     # On all platforms except macosx, we replace x86_64 with amd64.
     OPENJDK_TARGET_CPU_JLI="amd64"
+  elif test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
+    OPENJDK_TARGET_CPU_JLI="mips64el"
   fi
   # Now setup the -D flags for building libjli.
   OPENJDK_TARGET_CPU_JLI_CFLAGS="-DLIBARCHNAME='\"$OPENJDK_TARGET_CPU_JLI\"'"
@@ -337,6 +362,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS],
   elif test "x$OPENJDK_TARGET_OS" = xmacosx && test "x$TOOLCHAIN_TYPE" = xclang ; then
     OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -stdlib=libc++ -mmacosx-version-min=\$(MACOSX_VERSION_MIN)"
   fi
+  if test "x$OPENJDK_TARGET_CPU" = xmips64 && test "x$OPENJDK_TARGET_CPU_ENDIAN" = xlittle; then
+    OPENJDK_TARGET_CPU_JLI_CFLAGS="$OPENJDK_TARGET_CPU_JLI_CFLAGS -DLIBARCH32NAME='\"mips32el\"' -DLIBARCH64NAME='\"mips64el\"'"
+  fi
   AC_SUBST(OPENJDK_TARGET_CPU_JLI_CFLAGS)
 
   # Setup OPENJDK_TARGET_OS_API_DIR, used in source paths.
@@ -550,3 +578,46 @@ AC_DEFUN_ONCE([PLATFORM_SETUP_OPENJDK_TARGET_ENDIANNESS],
     AC_MSG_ERROR([The tested endian in the target ($ENDIAN) differs from the endian expected to be found in the target ($OPENJDK_TARGET_CPU_ENDIAN)])
   fi
 ])
+
+AC_DEFUN([GET_BUILDER_AND_HOST_DATA],
+[
+BUILDER_NAME="$build_os"
+BUILDER_ID="Custom build ($(date))"
+if test -f /etc/issue; then
+  etc_issue_info=`cat /etc/issue`
+  if test -n "$etc_issue_info"; then
+    BUILDER_NAME=`cat /etc/issue | head -n 1 | cut -d " " -f 1`
+  fi
+fi
+if test -f /etc/redhat-release; then
+  etc_issue_info=`cat /etc/redhat-release`
+  if test -n "$etc_issue_info"; then
+    BUILDER_NAME=`cat /etc/redhat-release | head -n 1 | cut -d " " -f 1`
+  fi
+fi
+if test -f /etc/neokylin-release; then
+  etc_issue_info=`cat /etc/neokylin-release`
+  if test -n "$etc_issue_info"; then
+    BUILDER_NAME=`cat /etc/neokylin-release | head -n 1 | cut -d " " -f 1`
+  fi
+fi
+if test -z "$BUILDER_NAME"; then
+  BUILDER_NAME="unknown"
+fi
+BUILDER_NAME=`echo $BUILDER_NAME | sed -r "s/-//g"`
+if test -n "$OPENJDK_TARGET_CPU_OSARCH"; then
+  HOST_NAME="$OPENJDK_TARGET_CPU_OSARCH"
+else
+  HOST_NAME="unknown"
+fi
+if test -f "/usr/bin/cpp"; then
+  # gcc_with_arch_info=`gcc -v 2>&1 | grep '\-\-with-arch=' | sed 's/.*--with-arch=//;s/ .*$//'`
+  gcc_with_arch_info=`cpp -dM /dev/null | grep '\<_MIPS_ARCH\>' | sed 's/^#define _MIPS_ARCH "//;s/"$//'`
+  if test -n "$gcc_with_arch_info"; then
+    HOST_NAME="$gcc_with_arch_info"
+  fi
+fi
+AC_SUBST(BUILDER_ID)
+AC_SUBST(BUILDER_NAME)
+AC_SUBST(HOST_NAME)
+])
diff --git a/common/autoconf/spec.gmk.in b/common/autoconf/spec.gmk.in
index 0263c4dbb30..9d039103086 100644
--- a/common/autoconf/spec.gmk.in
+++ b/common/autoconf/spec.gmk.in
@@ -23,6 +23,12 @@
 # questions.
 #
 
+#
+# This file has been modified by Loongson Technology in 2023. These
+# modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 # Configured @DATE_WHEN_CONFIGURED@ to build
 # for target system @OPENJDK_TARGET_OS@-@OPENJDK_TARGET_CPU@
 #   (called @OPENJDK_TARGET_AUTOCONF_NAME@ by autoconf)
@@ -219,6 +225,23 @@ else
 endif
 JRE_RELEASE_VERSION:=$(FULL_VERSION)
 
+# Build OS and host values for use in Loongson OpenJDK release
+BUILDER_ID:=@BUILDER_ID@
+BUILDER_NAME:=@BUILDER_NAME@
+HOST_NAME:=@HOST_NAME@
+
+# Loongson OpenJDK Version info
+VER=8.1.18
+ifeq ($(HOST_NAME), )
+  HOST_NAME=unknown
+endif
+ifeq ($(BUILDER_NAME), )
+  BUILDER_NAME=unknown
+endif
+HOST_NAME_STRING=-$(HOST_NAME)
+BUILDER_NAME_STRING=-$(BUILDER_NAME)
+LOONGSON_RUNTIME_NAME=Loongson $(VER)$(HOST_NAME_STRING)$(BUILDER_NAME_STRING)
+
 # How to compile the code: release, fastdebug or slowdebug
 DEBUG_LEVEL:=@DEBUG_LEVEL@
 
diff --git a/hotspot/agent/make/saenv.sh b/hotspot/agent/make/saenv.sh
index ab9a0a431c4..a2de3fc3291 100644
--- a/hotspot/agent/make/saenv.sh
+++ b/hotspot/agent/make/saenv.sh
@@ -23,6 +23,12 @@
 #  
 #
 
+#
+# This file has been modified by Loongson Technology in 2020. These
+# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 # This file sets common environment variables for all SA scripts
 
 OS=`uname`
@@ -42,6 +48,14 @@ if [ "$OS" = "Linux" ]; then
      SA_LIBPATH=$STARTDIR/../src/os/linux/amd64:$STARTDIR/linux/amd64
      OPTIONS="-Dsa.library.path=$SA_LIBPATH"
      CPU=amd64
+   elif [ "$ARCH" = "mips64" ] ; then
+     SA_LIBPATH=$STARTDIR/../src/os/linux/mips:$STARTDIR/linux/mips
+     OPTIONS="-Dsa.library.path=$SA_LIBPATH"
+     CPU=mips
+   elif [ "$ARCH" = "loongarch64" ] ; then
+     SA_LIBPATH=$STARTDIR/../src/os/linux/loongarch64:$STARTDIR/linux/loongarch64
+     OPTIONS="-Dsa.library.path=$SA_LIBPATH"
+     CPU=loongarch64
    else
      SA_LIBPATH=$STARTDIR/../src/os/linux/i386:$STARTDIR/linux/i386
      OPTIONS="-Dsa.library.path=$SA_LIBPATH"
diff --git a/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c b/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c
index d6a0c7d9a93..b3b1380b298 100644
--- a/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c
+++ b/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c
@@ -22,6 +22,13 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ *
+ */
+
 #include <jni.h>
 #include "libproc.h"
 
@@ -49,10 +56,18 @@
 #include "sun_jvm_hotspot_debugger_sparc_SPARCThreadContext.h"
 #endif
 
+#if defined(mips64el) || defined(mips64)
+#include "sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext.h"
+#endif
+
 #ifdef aarch64
 #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
 #endif
 
+#ifdef loongarch64
+#include "sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext.h"
+#endif
+
 static jfieldID p_ps_prochandle_ID = 0;
 static jfieldID threadList_ID = 0;
 static jfieldID loadObjectList_ID = 0;
@@ -337,7 +352,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
   return (err == PS_OK)? array : 0;
 }
 
-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(aarch64)
+#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) || defined(aarch64) || defined(loongarch64)
 JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
   (JNIEnv *env, jobject this_obj, jint lwp_id) {
 
@@ -364,6 +379,12 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
 #endif
 #if defined(sparc) || defined(sparcv9)
 #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
+#endif
+#ifdef loongarch64
+#define NPRGREG sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_NPRGREG
+#endif
+#if defined(mips64) || defined(mips64el)
+#define NPRGREG sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_NPRGREG
 #endif
 
   array = (*env)->NewLongArray(env, NPRGREG);
@@ -470,6 +491,55 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
   }
 #endif /* aarch64 */
 
+#if defined(loongarch64)
+
+#define REG_INDEX(reg) sun_jvm_hotspot_debugger_loongarch64_LOONGARCH64ThreadContext_##reg
+
+  {
+    int i;
+    for (i = 0; i < 31; i++)
+      regs[i] = gregs.regs[i];
+    regs[REG_INDEX(PC)] = gregs.csr_era;
+  }
+#endif /* loongarch64 */
+#if defined(mips64) || defined(mips64el)
+
+#define REG_INDEX(reg) sun_jvm_hotspot_debugger_mips64_MIPS64ThreadContext_##reg
+
+  regs[REG_INDEX(ZERO)]  = gregs.regs[0];
+  regs[REG_INDEX(AT)]  = gregs.regs[1];
+  regs[REG_INDEX(V0)]  = gregs.regs[2];
+  regs[REG_INDEX(V1)]  = gregs.regs[3];
+  regs[REG_INDEX(A0)]  = gregs.regs[4];
+  regs[REG_INDEX(A1)]  = gregs.regs[5];
+  regs[REG_INDEX(A2)]  = gregs.regs[6];
+  regs[REG_INDEX(A3)]  = gregs.regs[7];
+  regs[REG_INDEX(T0)]  = gregs.regs[8];
+  regs[REG_INDEX(T1)]  = gregs.regs[9];
+  regs[REG_INDEX(T2)]  = gregs.regs[10];
+  regs[REG_INDEX(T3)]  = gregs.regs[11];
+  regs[REG_INDEX(T4)]  = gregs.regs[12];
+  regs[REG_INDEX(T5)]  = gregs.regs[13];
+  regs[REG_INDEX(T6)]  = gregs.regs[14];
+  regs[REG_INDEX(T7)]  = gregs.regs[15];
+  regs[REG_INDEX(S0)]  = gregs.regs[16];
+  regs[REG_INDEX(S1)]  = gregs.regs[17];
+  regs[REG_INDEX(S2)]  = gregs.regs[18];
+  regs[REG_INDEX(S3)]  = gregs.regs[19];
+  regs[REG_INDEX(S4)]  = gregs.regs[20];
+  regs[REG_INDEX(S5)]  = gregs.regs[21];
+  regs[REG_INDEX(S6)]  = gregs.regs[22];
+  regs[REG_INDEX(S7)]  = gregs.regs[23];
+  regs[REG_INDEX(T8)]  = gregs.regs[24];
+  regs[REG_INDEX(T9)]  = gregs.regs[25];
+  regs[REG_INDEX(K0)]  = gregs.regs[26];
+  regs[REG_INDEX(K1)]  = gregs.regs[27];
+  regs[REG_INDEX(GP)]  = gregs.regs[28];
+  regs[REG_INDEX(SP)]  = gregs.regs[29];
+  regs[REG_INDEX(FP)]  = gregs.regs[30];
+  regs[REG_INDEX(S8)]  = gregs.regs[30];
+  regs[REG_INDEX(RA)]  = gregs.regs[31];
+#endif /* mips64 */
 
   (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT);
   return array;
diff --git a/hotspot/agent/src/os/linux/Makefile b/hotspot/agent/src/os/linux/Makefile
index c0b5c869c1f..2cc50b6fab1 100644
--- a/hotspot/agent/src/os/linux/Makefile
+++ b/hotspot/agent/src/os/linux/Makefile
@@ -22,7 +22,13 @@
 #  
 #
 
-ARCH := $(shell if ([ `uname -m` = "ia64" ])  ; then echo ia64 ; elif ([ `uname -m` = "x86_64" ]) ; then echo amd64; elif ([ `uname -m` = "sparc64" ]) ; then echo sparc; else echo i386 ; fi )
+#
+# This file has been modified by Loongson Technology in 2020. These
+# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
+ARCH := $(shell if ([ `uname -m` = "ia64" ])  ; then echo ia64 ; elif ([ `uname -m` = "mips64el" ]) ; then echo mips64 ; elif ([ `uname -m` = "x86_64" ]) ; then echo amd64; elif ([ `uname -m` = "sparc64" ]) ; then echo sparc; else echo i386 ; fi )
 GCC      = gcc
 
 JAVAH    = ${JAVA_HOME}/bin/javah
@@ -53,6 +59,8 @@ $(ARCH)/LinuxDebuggerLocal.o: LinuxDebuggerLocal.c
         $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \
 		sun.jvm.hotspot.debugger.x86.X86ThreadContext \
 		sun.jvm.hotspot.debugger.sparc.SPARCThreadContext \
+		sun.jvm.hotspot.debugger.mips64.MIPS64ThreadContext \
+		sun.jvm.hotspot.debugger.loongarch64.LOONGARCH64ThreadContext \
 		sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext \
 		sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext 
         $(GCC) $(CFLAGS) $< -o $@
diff --git a/hotspot/agent/src/os/linux/libproc.h b/hotspot/agent/src/os/linux/libproc.h
index 6b6e41cab47..5eb8211aa93 100644
--- a/hotspot/agent/src/os/linux/libproc.h
+++ b/hotspot/agent/src/os/linux/libproc.h
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef _LIBPROC_H_
 #define _LIBPROC_H_
 
@@ -36,7 +42,7 @@
 
 #include <sys/ptrace.h>
 
-#if defined(aarch64)
+#if defined(aarch64) || defined(loongarch64)
 #include "asm/ptrace.h"
 #endif
 
@@ -76,7 +82,12 @@ combination of ptrace and /proc calls.
 #include <asm/ptrace.h>
 #define user_regs_struct  pt_regs
 #endif
-#if defined(aarch64)
+
+#if defined(mips) || defined(mipsel) || defined(mips64) || defined(mips64el)
+#include <asm/ptrace.h>
+#define user_regs_struct  pt_regs
+#endif
+#if defined(aarch64) || defined(loongarch64)
 #define user_regs_struct user_pt_regs
 #endif
 
diff --git a/hotspot/agent/src/os/linux/ps_proc.c b/hotspot/agent/src/os/linux/ps_proc.c
index c4d6a9ecc5d..7000e927235 100644
--- a/hotspot/agent/src/os/linux/ps_proc.c
+++ b/hotspot/agent/src/os/linux/ps_proc.c
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022, These
+ * modifications are Copyright (c) 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -141,7 +147,7 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
 #define PTRACE_GETREGS_REQ PT_GETREGS
 #endif
 
-#ifdef PTRACE_GETREGS_REQ
+#if defined(PTRACE_GETREGS_REQ) && !defined(loongarch64)
  if (ptrace_getregs(PTRACE_GETREGS_REQ, pid, user, NULL) < 0) {
    print_debug("ptrace(PTRACE_GETREGS, ...) failed for lwp %d\n", pid);
    return false;
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java
index c963350591d..20e6f35b9cf 100644
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ *
+ */
 package sun.jvm.hotspot;
 
 import java.rmi.RemoteException;
@@ -37,6 +43,8 @@
 import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
 import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
 import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
+import sun.jvm.hotspot.debugger.MachineDescriptionMIPS64;
+import sun.jvm.hotspot.debugger.MachineDescriptionLOONGARCH64;
 import sun.jvm.hotspot.debugger.NoSuchSymbolException;
 import sun.jvm.hotspot.debugger.bsd.BsdDebuggerLocal;
 import sun.jvm.hotspot.debugger.linux.LinuxDebuggerLocal;
@@ -594,6 +602,10 @@ private void setupDebuggerLinux() {
             } else {
                     machDesc = new MachineDescriptionSPARC32Bit();
             }
+        } else if (cpu.equals("mips64")) {
+            machDesc = new MachineDescriptionMIPS64();
+        } else if (cpu.equals("loongarch64")) {
+            machDesc = new MachineDescriptionLOONGARCH64();
         } else {
           try {
             machDesc = (MachineDescription)
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java
index 993bf7bb477..1e075aa57ee 100644
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/asm/Disassembler.java
@@ -94,6 +94,12 @@ private Disassembler(long startPc, byte[] code) {
             } else if (arch.equals("amd64") || arch.equals("x86_64")) {
                path.append(sep + "lib" + sep + "amd64" + sep);
                libname +=  "-amd64.so";
+            } else if (arch.equals("mips64") || arch.equals("mips64el")) {
+               path.append(sep + "lib" + sep + "mips64" + sep);
+               libname +=  "-mips64.so";
+            } else if (arch.equals("loongarch64")) {
+               path.append(sep + "lib" + sep + "loongarch64" + sep);
+               libname +=  "-loongarch64.so";
             } else {
                path.append(sep + "lib" + sep + arch + sep);
                libname +=  "-" + arch + ".so";
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java
new file mode 100644
index 00000000000..0531427dabb
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionLOONGARCH64.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger;
+
+public class MachineDescriptionLOONGARCH64 extends MachineDescriptionTwosComplement implements MachineDescription {
+  public long getAddressSize() {
+    return 8;
+  }
+
+
+  public boolean isBigEndian() {
+    return false;
+  }
+
+  public boolean isLP64() {
+    return true;
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java
new file mode 100644
index 00000000000..1b49efd2017
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionMIPS64.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger;
+
+public class MachineDescriptionMIPS64 extends MachineDescriptionTwosComplement implements MachineDescription {
+  public long getAddressSize() {
+    return 8;
+  }
+
+
+  public boolean isBigEndian() {
+    return "big".equals(System.getProperty("sun.cpu.endian"));
+  }
+
+  public boolean isLP64() {
+    return true;
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
index f178d6a6e7f..019e794bbb4 100644
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
@@ -32,11 +32,15 @@
 import sun.jvm.hotspot.debugger.x86.*;
 import sun.jvm.hotspot.debugger.amd64.*;
 import sun.jvm.hotspot.debugger.sparc.*;
+import sun.jvm.hotspot.debugger.mips64.*;
+import sun.jvm.hotspot.debugger.loongarch64.*;
 import sun.jvm.hotspot.debugger.linux.x86.*;
 import sun.jvm.hotspot.debugger.linux.amd64.*;
 import sun.jvm.hotspot.debugger.aarch64.*;
 import sun.jvm.hotspot.debugger.linux.aarch64.*;
 import sun.jvm.hotspot.debugger.linux.sparc.*;
+import sun.jvm.hotspot.debugger.linux.mips64.*;
+import sun.jvm.hotspot.debugger.linux.loongarch64.*;
 import sun.jvm.hotspot.utilities.*;
 
 class LinuxCDebugger implements CDebugger {
@@ -106,6 +110,20 @@ public CFrame topFrameForThread(ThreadProxy thread) throws DebuggerException {
        Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
        if (pc == null) return null;
        return new LinuxAARCH64CFrame(dbg, fp, pc);
+    } else if (cpu.equals("mips64")) {
+       MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext();
+       Address sp = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
+       if (sp == null) return null;
+       Address pc  = context.getRegisterAsAddress(MIPS64ThreadContext.PC);
+       if (pc == null) return null;
+       return new LinuxMIPS64CFrame(dbg, sp, pc);
+    } else if (cpu.equals("loongarch64")) {
+       LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext();
+       Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
+       if (sp == null) return null;
+       Address pc  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC);
+       if (pc == null) return null;
+       return new LinuxLOONGARCH64CFrame(dbg, sp, pc);
     } else {
        // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
        ThreadContext context = (ThreadContext) thread.getContext();
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
index 44c2265d7a0..3b6747ac0a3 100644
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java
@@ -30,6 +30,8 @@
 import sun.jvm.hotspot.debugger.linux.ia64.*;
 import sun.jvm.hotspot.debugger.linux.x86.*;
 import sun.jvm.hotspot.debugger.linux.sparc.*;
+import sun.jvm.hotspot.debugger.linux.mips64.*;
+import sun.jvm.hotspot.debugger.linux.loongarch64.*;
 
 class LinuxThreadContextFactory {
    static ThreadContext createThreadContext(LinuxDebugger dbg) {
@@ -42,6 +44,10 @@ static ThreadContext createThreadContext(LinuxDebugger dbg) {
          return new LinuxIA64ThreadContext(dbg);
       } else if (cpu.equals("sparc")) {
          return new LinuxSPARCThreadContext(dbg);
+      } else if (cpu.equals("mips64")) {
+         return new LinuxMIPS64ThreadContext(dbg);
+      } else if (cpu.equals("loongarch64")) {
+         return new LinuxLOONGARCH64ThreadContext(dbg);
       } else  {
         try {
           Class tcc = Class.forName("sun.jvm.hotspot.debugger.linux." +
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java
new file mode 100644
index 00000000000..3b20dbbd87e
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64CFrame.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.linux.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.linux.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
+import sun.jvm.hotspot.debugger.cdbg.basic.*;
+import sun.jvm.hotspot.debugger.loongarch64.*;
+
+final public class LinuxLOONGARCH64CFrame extends BasicCFrame {
+   // package/class internals only
+   public LinuxLOONGARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
+      super(dbg.getCDebugger());
+      this.fp = fp;
+      this.pc = pc;
+      this.dbg = dbg;
+   }
+
+   // override base class impl to avoid ELF parsing
+   public ClosestSymbol closestSymbolToPC() {
+      // try native lookup in debugger.
+      return dbg.lookup(dbg.getAddressValue(pc()));
+   }
+
+   public Address pc() {
+      return pc;
+   }
+
+   public Address localVariableBase() {
+      return fp;
+   }
+
+   public CFrame sender(ThreadProxy thread) {
+      LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) thread.getContext();
+      Address sp = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
+
+      if ((fp == null) || fp.lessThan(sp)) {
+        return null;
+      }
+
+      Address nextFP = fp.getAddressAt(-2 * ADDRESS_SIZE);
+      if (nextFP == null) {
+        return null;
+      }
+      Address nextPC  = fp.getAddressAt(-1 * ADDRESS_SIZE);
+      if (nextPC == null) {
+        return null;
+      }
+      return new LinuxLOONGARCH64CFrame(dbg, nextFP, nextPC);
+   }
+
+   private static final int ADDRESS_SIZE = 8;
+   private Address pc;
+   private Address fp;
+   private LinuxDebugger dbg;
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java
new file mode 100644
index 00000000000..9f22133eaff
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/loongarch64/LinuxLOONGARCH64ThreadContext.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.linux.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.loongarch64.*;
+import sun.jvm.hotspot.debugger.linux.*;
+
+public class LinuxLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
+  private LinuxDebugger debugger;
+
+  public LinuxLOONGARCH64ThreadContext(LinuxDebugger debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java
new file mode 100644
index 00000000000..2e3eb564da2
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64CFrame.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.linux.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.linux.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
+import sun.jvm.hotspot.debugger.cdbg.basic.*;
+import sun.jvm.hotspot.debugger.mips64.*;
+
+final public class LinuxMIPS64CFrame extends BasicCFrame {
+   // package/class internals only
+   public LinuxMIPS64CFrame(LinuxDebugger dbg, Address ebp, Address pc) {
+      super(dbg.getCDebugger());
+      this.ebp = ebp;
+      this.pc = pc;
+      this.dbg = dbg;
+   }
+
+   // override base class impl to avoid ELF parsing
+   public ClosestSymbol closestSymbolToPC() {
+      // try native lookup in debugger.
+      return dbg.lookup(dbg.getAddressValue(pc()));
+   }
+
+   public Address pc() {
+      return pc;
+   }
+
+   public Address localVariableBase() {
+      return ebp;
+   }
+
+   public CFrame sender(ThreadProxy thread) {
+      MIPS64ThreadContext context = (MIPS64ThreadContext) thread.getContext();
+      Address esp = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
+
+      if ( (ebp == null) || ebp.lessThan(esp) ) {
+        return null;
+      }
+
+      Address nextEBP = ebp.getAddressAt( 0 * ADDRESS_SIZE);
+      if (nextEBP == null) {
+        return null;
+      }
+      Address nextPC  = ebp.getAddressAt( 1 * ADDRESS_SIZE);
+      if (nextPC == null) {
+        return null;
+      }
+      return new LinuxMIPS64CFrame(dbg, nextEBP, nextPC);
+   }
+
+   private static final int ADDRESS_SIZE = 4;
+   private Address pc;
+   private Address ebp;
+   private LinuxDebugger dbg;
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java
new file mode 100644
index 00000000000..98e0f3f0bcf
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/mips64/LinuxMIPS64ThreadContext.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.linux.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.mips64.*;
+import sun.jvm.hotspot.debugger.linux.*;
+
+public class LinuxMIPS64ThreadContext extends MIPS64ThreadContext {
+  private LinuxDebugger debugger;
+
+  public LinuxMIPS64ThreadContext(LinuxDebugger debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java
new file mode 100644
index 00000000000..90b0cf97e35
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/loongarch64/LOONGARCH64ThreadContext.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
+
+/** Specifies the thread context on loongarch64 platforms; only a sub-portion
+    of the context is guaranteed to be present on all operating
+    systems. */
+
+public abstract class LOONGARCH64ThreadContext implements ThreadContext {
+
+  // NOTE: the indices for the various registers must be maintained as
+  // listed across various operating systems. However, only a small
+  // subset of the registers' values are guaranteed to be present (and
+  // must be present for the SA's stack walking to work): EAX, EBX,
+  // ECX, EDX, ESI, EDI, EBP, ESP, and EIP.
+
+  public static final int ZERO = 0;
+  public static final int RA = 1;
+  public static final int TP = 2;
+  public static final int SP = 3;
+  public static final int A0 = 4;
+  public static final int A1 = 5;
+  public static final int A2 = 6;
+  public static final int A3 = 7;
+  public static final int A4 = 8;
+  public static final int A5 = 9;
+  public static final int A6 = 10;
+  public static final int A7 = 11;
+  public static final int T0 = 12;
+  public static final int T1 = 13;
+  public static final int T2 = 14;
+  public static final int T3 = 15;
+  public static final int T4 = 16;
+  public static final int T5 = 17;
+  public static final int T6 = 18;
+  public static final int T7 = 19;
+  public static final int T8 = 20;
+  public static final int RX = 21;
+  public static final int FP = 22;
+  public static final int S0 = 23;
+  public static final int S1 = 24;
+  public static final int S2 = 25;
+  public static final int S3 = 26;
+  public static final int S4 = 27;
+  public static final int S5 = 28;
+  public static final int S6 = 29;
+  public static final int S7 = 30;
+  public static final int S8 = 31;
+  public static final int PC = 32;
+  public static final int NPRGREG = 33;
+
+  private static final String[] regNames = {
+    "ZERO",    "RA",    "TP",    "SP",
+    "A0",      "A1",    "A2",    "A3",
+    "A4",      "A5",    "A6",    "A7",
+    "T0",      "T1",    "T2",    "T3",
+    "T4",      "T5",    "T6",    "T7",
+    "T8",      "RX",    "FP",    "S0",
+    "S1",      "S2",    "S3",    "S4",
+    "S5",      "S6",    "S7",    "S8",
+    "PC"
+  };
+
+  private long[] data;
+
+  public LOONGARCH64ThreadContext() {
+    data = new long[NPRGREG];
+  }
+
+  public int getNumRegisters() {
+    return NPRGREG;
+  }
+
+  public String getRegisterName(int index) {
+    return regNames[index];
+  }
+
+  public void setRegister(int index, long value) {
+    data[index] = value;
+  }
+
+  public long getRegister(int index) {
+    return data[index];
+  }
+
+  public CFrame getTopFrame(Debugger dbg) {
+    return null;
+  }
+
+  /** This can't be implemented in this class since we would have to
+      tie the implementation to, for example, the debugging system */
+  public abstract void setRegisterAsAddress(int index, Address value);
+
+  /** This can't be implemented in this class since we would have to
+      tie the implementation to, for example, the debugging system */
+  public abstract Address getRegisterAsAddress(int index);
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java
new file mode 100644
index 00000000000..c57ee9dfc97
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/mips64/MIPS64ThreadContext.java
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
+
+/** Specifies the thread context on mips64 platforms; only a sub-portion
+    of the context is guaranteed to be present on all operating
+    systems. */
+
+public abstract class MIPS64ThreadContext implements ThreadContext {
+
+  // NOTE: the indices for the various registers must be maintained as
+  // listed across various operating systems. However, only a small
+  // subset of the registers' values are guaranteed to be present (and
+  // must be present for the SA's stack walking to work): EAX, EBX,
+  // ECX, EDX, ESI, EDI, EBP, ESP, and EIP.
+
+  public static final int ZERO = 0;
+  public static final int AT = 1;
+  public static final int V0 = 2;
+  public static final int V1 = 3;
+  public static final int A0 = 4;
+  public static final int A1 = 5;
+  public static final int A2 = 6;
+  public static final int A3 = 7;
+  public static final int T0 = 8;
+  public static final int T1 = 9;
+  public static final int T2 = 10;
+  public static final int T3 = 11;
+  public static final int T4 = 12;
+  public static final int T5 = 13;
+  public static final int T6 = 14;
+  public static final int T7 = 15;
+  public static final int S0 = 16;
+  public static final int S1 = 17;
+  public static final int S2 = 18;
+  public static final int S3 = 19;
+  public static final int S4 = 20;
+  public static final int S5 = 21;
+  public static final int S6 = 22;
+  public static final int S7 = 23;
+  public static final int T8 = 24;
+  public static final int T9 = 25;
+  public static final int K0 = 26;
+  public static final int K1 = 27;
+  public static final int GP = 28;
+  public static final int SP = 29;
+  public static final int FP = 30;
+  public static final int RA = 31;
+  public static final int PC = 32;
+  public static final int NPRGREG = 33;
+
+  private static final String[] regNames = {
+    "ZERO",    "AT",    "V0",    "V1",
+    "A0",      "A1",    "A2",    "A3",
+    "T0",      "T1",    "T2",    "T3",
+    "T4",      "T5",    "T6",    "T7",
+    "S0",      "S1",    "S2",    "S3",
+    "S4",      "S5",    "S6",    "S7",
+    "T8",      "T9",    "K0",    "K1",
+    "GP",      "SP",    "FP",    "RA",
+    "PC"
+  };
+
+  private long[] data;
+
+  public MIPS64ThreadContext() {
+    data = new long[NPRGREG];
+  }
+
+  public int getNumRegisters() {
+    return NPRGREG;
+  }
+
+  public String getRegisterName(int index) {
+    return regNames[index];
+  }
+
+  public void setRegister(int index, long value) {
+    data[index] = value;
+  }
+
+  public long getRegister(int index) {
+    return data[index];
+  }
+
+  public CFrame getTopFrame(Debugger dbg) {
+    return null;
+  }
+
+  /** This can't be implemented in this class since we would have to
+      tie the implementation to, for example, the debugging system */
+  public abstract void setRegisterAsAddress(int index, Address value);
+
+  /** This can't be implemented in this class since we would have to
+      tie the implementation to, for example, the debugging system */
+  public abstract Address getRegisterAsAddress(int index);
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
index 7113a3a497b..24273888c2d 100644
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/posix/elf/ELFHeader.java
@@ -63,6 +63,8 @@ public interface ELFHeader {
     public static final int ARCH_i860 = 7;
     /** MIPS architecture type. */
     public static final int ARCH_MIPS = 8;
+    /** LOONGARCH architecture type. */
+    public static final int ARCH_LOONGARCH = 9;
 
     /** Returns a file type which is defined by the file type constants. */
     public short getFileType();
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
index ca1a2575ff4..2afa6c55f88 100644
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
@@ -34,10 +34,14 @@
 import sun.jvm.hotspot.debugger.proc.aarch64.*;
 import sun.jvm.hotspot.debugger.proc.sparc.*;
 import sun.jvm.hotspot.debugger.proc.x86.*;
+import sun.jvm.hotspot.debugger.proc.mips64.*;
+import sun.jvm.hotspot.debugger.proc.loongarch64.*;
 import sun.jvm.hotspot.debugger.amd64.*;
 import sun.jvm.hotspot.debugger.aarch64.*;
 import sun.jvm.hotspot.debugger.sparc.*;
 import sun.jvm.hotspot.debugger.x86.*;
+import sun.jvm.hotspot.debugger.mips64.*;
+import sun.jvm.hotspot.debugger.loongarch64.*;
 import sun.jvm.hotspot.utilities.*;
 
 /** <P> An implementation of the JVMDebugger interface which sits on
@@ -92,6 +96,14 @@ public ProcDebuggerLocal(MachineDescription machDesc, boolean useCache) {
             threadFactory = new ProcAARCH64ThreadFactory(this);
             pcRegIndex = AARCH64ThreadContext.PC;
             fpRegIndex = AARCH64ThreadContext.FP;
+        } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
+            threadFactory = new ProcMIPS64ThreadFactory(this);
+            pcRegIndex = MIPS64ThreadContext.PC;
+            fpRegIndex = MIPS64ThreadContext.FP;
+        } else if (cpu.equals("loongarch64")) {
+            threadFactory = new ProcLOONGARCH64ThreadFactory(this);
+            pcRegIndex = LOONGARCH64ThreadContext.PC;
+            fpRegIndex = LOONGARCH64ThreadContext.FP;
         } else {
           try {
             Class tfc = Class.forName("sun.jvm.hotspot.debugger.proc." +
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java
new file mode 100644
index 00000000000..42a31e3486c
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64Thread.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.loongarch64.*;
+import sun.jvm.hotspot.debugger.proc.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class ProcLOONGARCH64Thread implements ThreadProxy {
+  private ProcDebugger debugger;
+  private int         id;
+
+  public ProcLOONGARCH64Thread(ProcDebugger debugger, Address addr) {
+    this.debugger = debugger;
+
+    // FIXME: the size here should be configurable. However, making it
+    // so would produce a dependency on the "types" package from the
+    // debugger package, which is not desired.
+    this.id       = (int) addr.getCIntegerAt(0, 4, true);
+  }
+
+  public ProcLOONGARCH64Thread(ProcDebugger debugger, long id) {
+    this.debugger = debugger;
+    this.id = (int) id;
+  }
+
+  public ThreadContext getContext() throws IllegalThreadStateException {
+    ProcLOONGARCH64ThreadContext context = new ProcLOONGARCH64ThreadContext(debugger);
+    long[] regs = debugger.getThreadIntegerRegisterSet(id);
+    /*
+       _NGREG in reg.h is defined to be 19. Because we have included
+       debug registers LOONGARCH64ThreadContext.NPRGREG is 25.
+    */
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(regs.length <= LOONGARCH64ThreadContext.NPRGREG, "size of register set is greater than " + LOONGARCH64ThreadContext.NPRGREG);
+    }
+    for (int i = 0; i < regs.length; i++) {
+      context.setRegister(i, regs[i]);
+    }
+    return context;
+  }
+
+  public boolean canSetContext() throws DebuggerException {
+    return false;
+  }
+
+  public void setContext(ThreadContext context)
+    throws IllegalThreadStateException, DebuggerException {
+    throw new DebuggerException("Unimplemented");
+  }
+
+  public String toString() {
+    return "t@" + id;
+  }
+
+  public boolean equals(Object obj) {
+    if ((obj == null) || !(obj instanceof ProcLOONGARCH64Thread)) {
+      return false;
+    }
+
+    return (((ProcLOONGARCH64Thread) obj).id == id);
+  }
+
+  public int hashCode() {
+    return id;
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java
new file mode 100644
index 00000000000..9054f16506a
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadContext.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.loongarch64.*;
+import sun.jvm.hotspot.debugger.proc.*;
+
+public class ProcLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
+  private ProcDebugger debugger;
+
+  public ProcLOONGARCH64ThreadContext(ProcDebugger debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java
new file mode 100644
index 00000000000..bc643351244
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/loongarch64/ProcLOONGARCH64ThreadFactory.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.proc.*;
+
+public class ProcLOONGARCH64ThreadFactory implements ProcThreadFactory {
+  private ProcDebugger debugger;
+
+  public ProcLOONGARCH64ThreadFactory(ProcDebugger debugger) {
+    this.debugger = debugger;
+  }
+
+  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
+    return new ProcLOONGARCH64Thread(debugger, threadIdentifierAddr);
+  }
+
+  public ThreadProxy createThreadWrapper(long id) {
+    return new ProcLOONGARCH64Thread(debugger, id);
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java
new file mode 100644
index 00000000000..5c1e0be8932
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64Thread.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.mips64.*;
+import sun.jvm.hotspot.debugger.proc.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class ProcMIPS64Thread implements ThreadProxy {
+  private ProcDebugger debugger;
+  private int         id;
+
+  public ProcMIPS64Thread(ProcDebugger debugger, Address addr) {
+    this.debugger = debugger;
+
+    // FIXME: the size here should be configurable. However, making it
+    // so would produce a dependency on the "types" package from the
+    // debugger package, which is not desired.
+    this.id       = (int) addr.getCIntegerAt(0, 4, true);
+  }
+
+  public ProcMIPS64Thread(ProcDebugger debugger, long id) {
+    this.debugger = debugger;
+    this.id = (int) id;
+  }
+
+  public ThreadContext getContext() throws IllegalThreadStateException {
+    ProcMIPS64ThreadContext context = new ProcMIPS64ThreadContext(debugger);
+    long[] regs = debugger.getThreadIntegerRegisterSet(id);
+    /*
+       _NGREG in reg.h is defined to be 19. Because we have included
+       debug registers MIPS64ThreadContext.NPRGREG is 25.
+    */
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(regs.length <= MIPS64ThreadContext.NPRGREG, "size of register set is greater than " + MIPS64ThreadContext.NPRGREG);
+    }
+    for (int i = 0; i < regs.length; i++) {
+      context.setRegister(i, regs[i]);
+    }
+    return context;
+  }
+
+  public boolean canSetContext() throws DebuggerException {
+    return false;
+  }
+
+  public void setContext(ThreadContext context)
+    throws IllegalThreadStateException, DebuggerException {
+    throw new DebuggerException("Unimplemented");
+  }
+
+  public String toString() {
+    return "t@" + id;
+  }
+
+  public boolean equals(Object obj) {
+    if ((obj == null) || !(obj instanceof ProcMIPS64Thread)) {
+      return false;
+    }
+
+    return (((ProcMIPS64Thread) obj).id == id);
+  }
+
+  public int hashCode() {
+    return id;
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java
new file mode 100644
index 00000000000..d44223d768a
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadContext.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2002, 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.mips64.*;
+import sun.jvm.hotspot.debugger.proc.*;
+
+public class ProcMIPS64ThreadContext extends MIPS64ThreadContext {
+  private ProcDebugger debugger;
+
+  public ProcMIPS64ThreadContext(ProcDebugger debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java
new file mode 100644
index 00000000000..bad478fc5ca
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/mips64/ProcMIPS64ThreadFactory.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.proc.*;
+
+public class ProcMIPS64ThreadFactory implements ProcThreadFactory {
+  private ProcDebugger debugger;
+
+  public ProcMIPS64ThreadFactory(ProcDebugger debugger) {
+    this.debugger = debugger;
+  }
+
+  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
+    return new ProcMIPS64Thread(debugger, threadIdentifierAddr);
+  }
+
+  public ThreadProxy createThreadWrapper(long id) {
+    return new ProcMIPS64Thread(debugger, id);
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
index ffa61b548e7..9cf3ee2da33 100644
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
@@ -33,6 +33,8 @@
 import sun.jvm.hotspot.debugger.remote.sparc.*;
 import sun.jvm.hotspot.debugger.remote.x86.*;
 import sun.jvm.hotspot.debugger.remote.amd64.*;
+import sun.jvm.hotspot.debugger.remote.mips64.*;
+import sun.jvm.hotspot.debugger.remote.loongarch64.*;
 
 /** An implementation of Debugger which wraps a
     RemoteDebugger, providing remote debugging via RMI.
@@ -70,6 +72,16 @@ public RemoteDebuggerClient(RemoteDebugger remoteDebugger) throws DebuggerExcept
         cachePageSize = 4096;
         cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
         unalignedAccessesOkay = true;
+      } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
+        threadFactory = new RemoteMIPS64ThreadFactory(this);
+        cachePageSize = 4096;
+        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
+        unalignedAccessesOkay = true;
+      } else if (cpu.equals("loongarch64")) {
+        threadFactory = new RemoteLOONGARCH64ThreadFactory(this);
+        cachePageSize = 4096;
+        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
+        unalignedAccessesOkay = true;
       } else {
         try {
           Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." +
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java
new file mode 100644
index 00000000000..01e3f8954bb
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64Thread.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.loongarch64.*;
+import sun.jvm.hotspot.debugger.remote.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class RemoteLOONGARCH64Thread extends RemoteThread  {
+  public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, Address addr) {
+     super(debugger, addr);
+  }
+
+  public RemoteLOONGARCH64Thread(RemoteDebuggerClient debugger, long id) {
+     super(debugger, id);
+  }
+
+  public ThreadContext getContext() throws IllegalThreadStateException {
+    RemoteLOONGARCH64ThreadContext context = new RemoteLOONGARCH64ThreadContext(debugger);
+    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
+                                  debugger.getThreadIntegerRegisterSet(id);
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(regs.length == LOONGARCH64ThreadContext.NPRGREG, "size of register set must match");
+    }
+    for (int i = 0; i < regs.length; i++) {
+      context.setRegister(i, regs[i]);
+    }
+    return context;
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java
new file mode 100644
index 00000000000..ad25bccc8d2
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadContext.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.loongarch64.*;
+import sun.jvm.hotspot.debugger.remote.*;
+
+public class RemoteLOONGARCH64ThreadContext extends LOONGARCH64ThreadContext {
+  private RemoteDebuggerClient debugger;
+
+  public RemoteLOONGARCH64ThreadContext(RemoteDebuggerClient debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  /** This can't be implemented in this class since we would have to
+      tie the implementation to, for example, the debugging system */
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  /** This can't be implemented in this class since we would have to
+      tie the implementation to, for example, the debugging system */
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java
new file mode 100644
index 00000000000..d8bf50ea5ba
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/loongarch64/RemoteLOONGARCH64ThreadFactory.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.remote.*;
+
+public class RemoteLOONGARCH64ThreadFactory implements RemoteThreadFactory {
+  private RemoteDebuggerClient debugger;
+
+  public RemoteLOONGARCH64ThreadFactory(RemoteDebuggerClient debugger) {
+    this.debugger = debugger;
+  }
+
+  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
+    return new RemoteLOONGARCH64Thread(debugger, threadIdentifierAddr);
+  }
+
+  public ThreadProxy createThreadWrapper(long id) {
+    return new RemoteLOONGARCH64Thread(debugger, id);
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java
new file mode 100644
index 00000000000..a9285a3b946
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64Thread.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.mips64.*;
+import sun.jvm.hotspot.debugger.remote.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class RemoteMIPS64Thread extends RemoteThread  {
+  public RemoteMIPS64Thread(RemoteDebuggerClient debugger, Address addr) {
+     super(debugger, addr);
+  }
+
+  public RemoteMIPS64Thread(RemoteDebuggerClient debugger, long id) {
+     super(debugger, id);
+  }
+
+  public ThreadContext getContext() throws IllegalThreadStateException {
+    RemoteMIPS64ThreadContext context = new RemoteMIPS64ThreadContext(debugger);
+    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
+                                  debugger.getThreadIntegerRegisterSet(id);
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(regs.length == MIPS64ThreadContext.NPRGREG, "size of register set must match");
+    }
+    for (int i = 0; i < regs.length; i++) {
+      context.setRegister(i, regs[i]);
+    }
+    return context;
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java
new file mode 100644
index 00000000000..4d711f9ba7c
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadContext.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.mips64.*;
+import sun.jvm.hotspot.debugger.remote.*;
+
+public class RemoteMIPS64ThreadContext extends MIPS64ThreadContext {
+  private RemoteDebuggerClient debugger;
+
+  public RemoteMIPS64ThreadContext(RemoteDebuggerClient debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  /** This can't be implemented in this class since we would have to
+      tie the implementation to, for example, the debugging system */
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  /** This can't be implemented in this class since we would have to
+      tie the implementation to, for example, the debugging system */
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java
new file mode 100644
index 00000000000..020a2f1ff96
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/mips64/RemoteMIPS64ThreadFactory.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.remote.*;
+
+public class RemoteMIPS64ThreadFactory implements RemoteThreadFactory {
+  private RemoteDebuggerClient debugger;
+
+  public RemoteMIPS64ThreadFactory(RemoteDebuggerClient debugger) {
+    this.debugger = debugger;
+  }
+
+  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
+    return new RemoteMIPS64Thread(debugger, threadIdentifierAddr);
+  }
+
+  public ThreadProxy createThreadWrapper(long id) {
+    return new RemoteMIPS64Thread(debugger, id);
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java
index 842a3b357dd..81efdd02f86 100644
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java
@@ -34,6 +34,8 @@
 import sun.jvm.hotspot.runtime.win32_x86.Win32X86JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
+import sun.jvm.hotspot.runtime.linux_mips64.LinuxMIPS64JavaThreadPDAccess;
+import sun.jvm.hotspot.runtime.linux_loongarch64.LinuxLOONGARCH64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
@@ -90,6 +92,10 @@ private static synchronized void initialize(TypeDataBase db) {
                 access = new LinuxSPARCJavaThreadPDAccess();
             } else if (cpu.equals("aarch64")) {
                 access = new LinuxAARCH64JavaThreadPDAccess();
+            } else if (cpu.equals("mips64")) {
+                access = new LinuxMIPS64JavaThreadPDAccess();
+            } else if (cpu.equals("loongarch64")) {
+                access = new LinuxLOONGARCH64JavaThreadPDAccess();
             } else {
               try {
                 access = (JavaThreadPDAccess)
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java
new file mode 100644
index 00000000000..77c45c2e998
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_loongarch64/LinuxLOONGARCH64JavaThreadPDAccess.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.linux_loongarch64;
+
+import java.io.*;
+import java.util.*;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.loongarch64.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.runtime.loongarch64.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class LinuxLOONGARCH64JavaThreadPDAccess implements JavaThreadPDAccess {
+  private static AddressField  lastJavaFPField;
+  private static AddressField  osThreadField;
+
+  // Field from OSThread
+  private static CIntegerField osThreadThreadIDField;
+
+  // This is currently unneeded but is being kept in case we change
+  // the currentFrameGuess algorithm
+  private static final long GUESS_SCAN_RANGE = 128 * 1024;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("JavaThread");
+    osThreadField = type.getAddressField("_osthread");
+
+    Type anchorType = db.lookupType("JavaFrameAnchor");
+    lastJavaFPField = anchorType.getAddressField("_last_Java_fp");
+
+    Type osThreadType = db.lookupType("OSThread");
+    osThreadThreadIDField = osThreadType.getCIntegerField("_thread_id");
+  }
+
+  public Address getLastJavaFP(Address addr) {
+    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
+  }
+
+  public Address getLastJavaPC(Address addr) {
+    return null;
+  }
+
+  public Address getBaseOfStackPointer(Address addr) {
+    return null;
+  }
+
+  public Frame getLastFramePD(JavaThread thread, Address addr) {
+    Address fp = thread.getLastJavaFP();
+    if (fp == null) {
+      return null; // no information
+    }
+    return new LOONGARCH64Frame(thread.getLastJavaSP(), fp);
+  }
+
+  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
+    return new LOONGARCH64RegisterMap(thread, updateMap);
+  }
+
+  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
+    ThreadProxy t = getThreadProxy(addr);
+    LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext();
+    LOONGARCH64CurrentFrameGuess guesser = new LOONGARCH64CurrentFrameGuess(context, thread);
+    if (!guesser.run(GUESS_SCAN_RANGE)) {
+      return null;
+    }
+    if (guesser.getPC() == null) {
+      return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP());
+    } else {
+      return new LOONGARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
+    }
+  }
+
+  public void printThreadIDOn(Address addr, PrintStream tty) {
+    tty.print(getThreadProxy(addr));
+  }
+
+  public void printInfoOn(Address threadAddr, PrintStream tty) {
+    tty.print("Thread id: ");
+    printThreadIDOn(threadAddr, tty);
+    // tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
+  }
+
+  public Address getLastSP(Address addr) {
+    ThreadProxy t = getThreadProxy(addr);
+    LOONGARCH64ThreadContext context = (LOONGARCH64ThreadContext) t.getContext();
+    return context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
+  }
+
+  public ThreadProxy getThreadProxy(Address addr) {
+    // Addr is the address of the JavaThread.
+    // Fetch the OSThread (for now and for simplicity, not making a
+    // separate "OSThread" class in this package)
+    Address osThreadAddr = osThreadField.getValue(addr);
+    // Get the address of the _thread_id from the OSThread
+    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
+
+    JVMDebugger debugger = VM.getVM().getDebugger();
+    return debugger.getThreadForIdentifierAddress(threadIdAddr);
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java
new file mode 100644
index 00000000000..a0fd73fa673
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_mips64/LinuxMIPS64JavaThreadPDAccess.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.linux_mips64;
+
+import java.io.*;
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.mips64.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.runtime.mips64.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class LinuxMIPS64JavaThreadPDAccess implements JavaThreadPDAccess {
+  private static AddressField  lastJavaFPField;
+  private static AddressField  osThreadField;
+
+  // Field from OSThread
+  private static CIntegerField osThreadThreadIDField;
+
+  // This is currently unneeded but is being kept in case we change
+  // the currentFrameGuess algorithm
+  private static final long GUESS_SCAN_RANGE = 128 * 1024;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("JavaThread");
+    osThreadField           = type.getAddressField("_osthread");
+
+    Type anchorType = db.lookupType("JavaFrameAnchor");
+    lastJavaFPField         = anchorType.getAddressField("_last_Java_fp");
+
+    Type osThreadType = db.lookupType("OSThread");
+    osThreadThreadIDField   = osThreadType.getCIntegerField("_thread_id");
+  }
+
+  public    Address getLastJavaFP(Address addr) {
+    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
+  }
+
+  public    Address getLastJavaPC(Address addr) {
+    return null;
+  }
+
+  public    Address getBaseOfStackPointer(Address addr) {
+    return null;
+  }
+
+  public    Frame getLastFramePD(JavaThread thread, Address addr) {
+    Address fp = thread.getLastJavaFP();
+    if (fp == null) {
+      return null; // no information
+    }
+    return new MIPS64Frame(thread.getLastJavaSP(), fp);
+  }
+
+  public    RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
+    return new MIPS64RegisterMap(thread, updateMap);
+  }
+
+  public    Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
+    ThreadProxy t = getThreadProxy(addr);
+    MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext();
+    MIPS64CurrentFrameGuess guesser = new MIPS64CurrentFrameGuess(context, thread);
+    if (!guesser.run(GUESS_SCAN_RANGE)) {
+      return null;
+    }
+    if (guesser.getPC() == null) {
+      return new MIPS64Frame(guesser.getSP(), guesser.getFP());
+    } else {
+      return new MIPS64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
+    }
+  }
+
+  public    void printThreadIDOn(Address addr, PrintStream tty) {
+    tty.print(getThreadProxy(addr));
+  }
+
+  public    void printInfoOn(Address threadAddr, PrintStream tty) {
+    tty.print("Thread id: ");
+    printThreadIDOn(threadAddr, tty);
+//    tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
+  }
+
+  public    Address getLastSP(Address addr) {
+    ThreadProxy t = getThreadProxy(addr);
+    MIPS64ThreadContext context = (MIPS64ThreadContext) t.getContext();
+    return context.getRegisterAsAddress(MIPS64ThreadContext.SP);
+  }
+
+  public    ThreadProxy getThreadProxy(Address addr) {
+    // Addr is the address of the JavaThread.
+    // Fetch the OSThread (for now and for simplicity, not making a
+    // separate "OSThread" class in this package)
+    Address osThreadAddr = osThreadField.getValue(addr);
+    // Get the address of the _thread_id from the OSThread
+    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
+
+    JVMDebugger debugger = VM.getVM().getDebugger();
+    return debugger.getThreadForIdentifierAddress(threadIdAddr);
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java
new file mode 100644
index 00000000000..0208e6e2241
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64CurrentFrameGuess.java
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.loongarch64.*;
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.interpreter.*;
+import sun.jvm.hotspot.runtime.*;
+
+/** <P> Should be able to be used on all loongarch64 platforms we support
+    (Win32, Solaris/loongarch64, and soon Linux) to implement JavaThread's
+    "currentFrameGuess()" functionality. Input is an LOONGARCH64ThreadContext;
+    output is SP, FP, and PC for an LOONGARCH64Frame. Instantiation of the
+    LOONGARCH64Frame is left to the caller, since we may need to subclass
+    LOONGARCH64Frame to support signal handler frames on Unix platforms. </P>
+
+    <P> Algorithm is to walk up the stack within a given range (say,
+    512K at most) looking for a plausible PC and SP for a Java frame,
+    also considering those coming in from the context. If we find a PC
+    that belongs to the VM (i.e., in generated code like the
+    interpreter or CodeCache) then we try to find an associated EBP.
+    We repeat this until we either find a complete frame or run out of
+    stack to look at. </P> */
+
+public class LOONGARCH64CurrentFrameGuess {
+  private LOONGARCH64ThreadContext context;
+  private JavaThread       thread;
+  private Address          spFound;
+  private Address          fpFound;
+  private Address          pcFound;
+
+  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG")
+                                       != null;
+
+  public LOONGARCH64CurrentFrameGuess(LOONGARCH64ThreadContext context,
+                              JavaThread thread) {
+    this.context = context;
+    this.thread  = thread;
+  }
+
+  /** Returns false if not able to find a frame within a reasonable range. */
+  public boolean run(long regionInBytesToSearch) {
+    Address sp  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.SP);
+    Address pc  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.PC);
+    Address fp  = context.getRegisterAsAddress(LOONGARCH64ThreadContext.FP);
+    if (sp == null) {
+      // Bail out if no last java frame eithe
+      if (thread.getLastJavaSP() != null) {
+        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
+        return true;
+      }
+      // Bail out
+      return false;
+    }
+    Address end = sp.addOffsetTo(regionInBytesToSearch);
+    VM vm       = VM.getVM();
+
+    setValues(null, null, null); // Assume we're not going to find anything
+
+    if (vm.isJavaPCDbg(pc)) {
+      if (vm.isClientCompiler()) {
+        // If the topmost frame is a Java frame, we are (pretty much)
+        // guaranteed to have a viable EBP. We should be more robust
+        // than this (we have the potential for losing entire threads'
+        // stack traces) but need to see how much work we really have
+        // to do here. Searching the stack for an (SP, FP) pair is
+        // hard since it's easy to misinterpret inter-frame stack
+        // pointers as base-of-frame pointers; we also don't know the
+        // sizes of C1 frames (not registered in the nmethod) so can't
+        // derive them from ESP.
+
+        setValues(sp, fp, pc);
+        return true;
+      } else {
+        if (vm.getInterpreter().contains(pc)) {
+          if (DEBUG) {
+            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
+                               sp + ", fp = " + fp + ", pc = " + pc);
+          }
+          setValues(sp, fp, pc);
+          return true;
+        }
+
+        // For the server compiler, EBP is not guaranteed to be valid
+        // for compiled code. In addition, an earlier attempt at a
+        // non-searching algorithm (see below) failed because the
+        // stack pointer from the thread context was pointing
+        // (considerably) beyond the ostensible end of the stack, into
+        // garbage; walking from the topmost frame back caused a crash.
+        //
+        // This algorithm takes the current PC as a given and tries to
+        // find the correct corresponding SP by walking up the stack
+        // and repeatedly performing stackwalks (very inefficient).
+        //
+        // FIXME: there is something wrong with stackwalking across
+        // adapter frames...this is likely to be the root cause of the
+        // failure with the simpler algorithm below.
+
+        for (long offset = 0;
+             offset < regionInBytesToSearch;
+             offset += vm.getAddressSize()) {
+          try {
+            Address curSP = sp.addOffsetTo(offset);
+            Frame frame = new LOONGARCH64Frame(curSP, null, pc);
+            RegisterMap map = thread.newRegisterMap(false);
+            while (frame != null) {
+              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
+                // We were able to traverse all the way to the
+                // bottommost Java frame.
+                // This sp looks good. Keep it.
+                if (DEBUG) {
+                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
+                }
+                setValues(curSP, null, pc);
+                return true;
+              }
+              frame = frame.sender(map);
+            }
+          } catch (Exception e) {
+            if (DEBUG) {
+              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
+            }
+            // Bad SP. Try another.
+          }
+        }
+
+        // Were not able to find a plausible SP to go with this PC.
+        // Bail out.
+        return false;
+
+        /*
+        // Original algorithm which does not work because SP was
+        // pointing beyond where it should have:
+
+        // For the server compiler, EBP is not guaranteed to be valid
+        // for compiled code. We see whether the PC is in the
+        // interpreter and take care of that, otherwise we run code
+        // (unfortunately) duplicated from LOONGARCH64Frame.senderForCompiledFrame.
+
+        CodeCache cc = vm.getCodeCache();
+        if (cc.contains(pc)) {
+          CodeBlob cb = cc.findBlob(pc);
+
+          // See if we can derive a frame pointer from SP and PC
+          // NOTE: This is the code duplicated from LOONGARCH64Frame
+          Address saved_fp = null;
+          int llink_offset = cb.getLinkOffset();
+          if (llink_offset >= 0) {
+            // Restore base-pointer, since next frame might be an interpreter frame.
+            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
+            saved_fp = fp_addr.getAddressAt(0);
+          }
+
+          setValues(sp, saved_fp, pc);
+          return true;
+        }
+        */
+      }
+    } else {
+      // If the current program counter was not known to us as a Java
+      // PC, we currently assume that we are in the run-time system
+      // and attempt to look to thread-local storage for saved ESP and
+      // EBP. Note that if these are null (because we were, in fact,
+      // in Java code, i.e., vtable stubs or similar, and the SA
+      // didn't have enough insight into the target VM to understand
+      // that) then we are going to lose the entire stack trace for
+      // the thread, which is sub-optimal. FIXME.
+
+      if (DEBUG) {
+        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
+                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
+      }
+      if (thread.getLastJavaSP() == null) {
+        return false; // No known Java frames on stack
+      }
+      setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
+      return true;
+    }
+  }
+
+  public Address getSP() { return spFound; }
+  public Address getFP() { return fpFound; }
+  /** May be null if getting values from thread-local storage; take
+      care to call the correct LOONGARCH64Frame constructor to recover this if
+      necessary */
+  public Address getPC() { return pcFound; }
+
+  private void setValues(Address sp, Address fp, Address pc) {
+    spFound = sp;
+    fpFound = fp;
+    pcFound = pc;
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java
new file mode 100644
index 00000000000..fdf0c79c1ac
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64Frame.java
@@ -0,0 +1,534 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.loongarch64;
+
+import java.util.*;
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.compiler.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.oops.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+/** Specialization of and implementation of abstract methods of the
+    Frame class for the loongarch64 family of CPUs. */
+
+public class LOONGARCH64Frame extends Frame {
+  private static final boolean DEBUG;
+  static {
+    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.loongarch64.LOONGARCH64Frame.DEBUG") != null;
+  }
+
+  // Java frames
+  private static final int JAVA_FRAME_LINK_OFFSET             =  0;
+  private static final int JAVA_FRAME_RETURN_ADDR_OFFSET      =  1;
+  private static final int JAVA_FRAME_SENDER_SP_OFFSET        =  2;
+
+  // Native frames
+  private static final int NATIVE_FRAME_LINK_OFFSET           =  -2;
+  private static final int NATIVE_FRAME_RETURN_ADDR_OFFSET    =  -1;
+  private static final int NATIVE_FRAME_SENDER_SP_OFFSET      =  0;
+
+  // Interpreter frames
+  private static final int INTERPRETER_FRAME_MIRROR_OFFSET    =  2; // for native calls only
+  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
+  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_LOCALS_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_MDX_OFFSET       = INTERPRETER_FRAME_MIRROR_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_CACHE_OFFSET     = INTERPRETER_FRAME_MDX_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_BCX_OFFSET       = INTERPRETER_FRAME_CACHE_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+  private static final int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+
+  // Entry frames
+  private static final int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -9;
+
+  // Native frames
+  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
+
+  private static VMReg fp = new VMReg(22 << 1);
+
+  // an additional field beyond sp and pc:
+  Address raw_fp; // frame pointer
+  private Address raw_unextendedSP;
+
+  private LOONGARCH64Frame() {
+  }
+
+  private void adjustForDeopt() {
+    if ( pc != null) {
+      // Look for a deopt pc and if it is deopted convert to original pc
+      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
+      if (cb != null && cb.isJavaMethod()) {
+        NMethod nm = (NMethod) cb;
+        if (pc.equals(nm.deoptHandlerBegin())) {
+          if (Assert.ASSERTS_ENABLED) {
+            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
+          }
+          // adjust pc if frame is deoptimized.
+          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
+          deoptimized = true;
+        }
+      }
+    }
+  }
+
+  public LOONGARCH64Frame(Address raw_sp, Address raw_fp, Address pc) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_sp;
+    this.raw_fp = raw_fp;
+    this.pc = pc;
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("LOONGARCH64Frame(sp, fp, pc): " + this);
+      dumpStack();
+    }
+  }
+
+  public LOONGARCH64Frame(Address raw_sp, Address raw_fp) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_sp;
+    this.raw_fp = raw_fp;
+    this.pc = raw_fp.getAddressAt(1 * VM.getVM().getAddressSize());
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("LOONGARCH64Frame(sp, fp): " + this);
+      dumpStack();
+    }
+  }
+
+  public LOONGARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_unextendedSp;
+    this.raw_fp = raw_fp;
+    this.pc = pc;
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("LOONGARCH64Frame(sp, unextendedSP, fp, pc): " + this);
+      dumpStack();
+    }
+
+  }
+
+  public Object clone() {
+    LOONGARCH64Frame frame = new LOONGARCH64Frame();
+    frame.raw_sp = raw_sp;
+    frame.raw_unextendedSP = raw_unextendedSP;
+    frame.raw_fp = raw_fp;
+    frame.pc = pc;
+    frame.deoptimized = deoptimized;
+    return frame;
+  }
+
+  public boolean equals(Object arg) {
+    if (arg == null) {
+      return false;
+    }
+
+    if (!(arg instanceof LOONGARCH64Frame)) {
+      return false;
+    }
+
+    LOONGARCH64Frame other = (LOONGARCH64Frame) arg;
+
+    return (AddressOps.equal(getSP(), other.getSP()) &&
+            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
+            AddressOps.equal(getFP(), other.getFP()) &&
+            AddressOps.equal(getPC(), other.getPC()));
+  }
+
+  public int hashCode() {
+    if (raw_sp == null) {
+      return 0;
+    }
+
+    return raw_sp.hashCode();
+  }
+
+  public String toString() {
+    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
+         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
+         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
+         ", pc: " + (pc == null? "null" : pc.toString());
+  }
+
+  // accessors for the instance variables
+  public Address getFP() { return raw_fp; }
+  public Address getSP() { return raw_sp; }
+  public Address getID() { return raw_sp; }
+
+  // FIXME: not implemented yet (should be done for Solaris/LOONGARCH)
+  public boolean isSignalHandlerFrameDbg() { return false; }
+  public int     getSignalNumberDbg()      { return 0;     }
+  public String  getSignalNameDbg()        { return null;  }
+
+  public boolean isInterpretedFrameValid() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
+    }
+
+    // These are reasonable sanity checks
+    if (getFP() == null || getFP().andWithMask(0x3) != null) {
+      return false;
+    }
+
+    if (getSP() == null || getSP().andWithMask(0x3) != null) {
+      return false;
+    }
+
+    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
+      return false;
+    }
+
+    // These are hacks to keep us out of trouble.
+    // The problem with these is that they mask other problems
+    if (getFP().lessThanOrEqual(getSP())) {
+      // this attempts to deal with unsigned comparison above
+      return false;
+    }
+
+    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
+      // stack frames shouldn't be large.
+      return false;
+    }
+
+    return true;
+  }
+
+  // FIXME: not applicable in current system
+  //  void    patch_pc(Thread* thread, address pc);
+
+  public Frame sender(RegisterMap regMap, CodeBlob cb) {
+    LOONGARCH64RegisterMap map = (LOONGARCH64RegisterMap) regMap;
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+
+    // Default is we done have to follow them. The sender_for_xxx will
+    // update it accordingly
+    map.setIncludeArgumentOops(false);
+
+    if (isEntryFrame())       return senderForEntryFrame(map);
+    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
+
+    if(cb == null) {
+      cb = VM.getVM().getCodeCache().findBlob(getPC());
+    } else {
+      if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
+      }
+    }
+
+    if (cb != null) {
+      return senderForCompiledFrame(map, cb);
+    }
+
+    // Must be native-compiled frame, i.e. the marshaling code for native
+    // methods that exists in the core system.
+    return new LOONGARCH64Frame(getSenderSP(), getLink(), getSenderPC());
+  }
+
+  private Frame senderForEntryFrame(LOONGARCH64RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForEntryFrame");
+    }
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+    // Java frame called from C; skip all C frames and return top C
+    // frame of that chunk as the sender
+    LOONGARCH64JavaCallWrapper jcw = (LOONGARCH64JavaCallWrapper) getEntryFrameCallWrapper();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
+      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
+    }
+    LOONGARCH64Frame fr;
+    if (jcw.getLastJavaPC() != null) {
+      fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
+    } else {
+      fr = new LOONGARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
+    }
+    map.clear();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
+    }
+    return fr;
+  }
+
+  //------------------------------------------------------------------------------
+  // frame::adjust_unextended_sp
+  private void adjustUnextendedSP() {
+    // On loongarch, sites calling method handle intrinsics and lambda forms are treated
+    // as any other call site. Therefore, no special action is needed when we are
+    // returning to any of these call sites.
+
+    CodeBlob cb = cb();
+    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
+    if (senderNm != null) {
+      // If the sender PC is a deoptimization point, get the original PC.
+      if (senderNm.isDeoptEntry(getPC()) ||
+          senderNm.isDeoptMhEntry(getPC())) {
+        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
+      }
+    }
+  }
+
+  private Frame senderForInterpreterFrame(LOONGARCH64RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForInterpreterFrame");
+    }
+    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
+    Address sp = getSenderSP();
+    // We do not need to update the callee-save register mapping because above
+    // us is either another interpreter frame or a converter-frame, but never
+    // directly a compiled frame.
+    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
+    // However c2 no longer uses callee save register for java calls so there
+    // are no callee register to find.
+
+    if (map.getUpdateMap())
+      updateMapWithSavedLink(map, addressOfStackSlot(JAVA_FRAME_LINK_OFFSET));
+
+    return new LOONGARCH64Frame(sp, unextendedSP, getLink(), getSenderPC());
+  }
+
+  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
+    map.setLocation(fp, savedFPAddr);
+  }
+
+  private Frame senderForCompiledFrame(LOONGARCH64RegisterMap map, CodeBlob cb) {
+    if (DEBUG) {
+      System.out.println("senderForCompiledFrame");
+    }
+
+    //
+    // NOTE: some of this code is (unfortunately) duplicated in LOONGARCH64CurrentFrameGuess
+    //
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+
+    // frame owned by optimizing compiler
+    if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
+    }
+    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
+
+    // On Intel the return_address is always the word on the stack
+    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
+
+    // This is the saved value of EBP which may or may not really be an FP.
+    // It is only an FP if the sender is an interpreter frame (or C1?).
+    Address savedFPAddr = senderSP.addOffsetTo(- JAVA_FRAME_SENDER_SP_OFFSET * VM.getVM().getAddressSize());
+
+    if (map.getUpdateMap()) {
+      // Tell GC to use argument oopmaps for some runtime stubs that need it.
+      // For C1, the runtime stub might not have oop maps, so set this flag
+      // outside of update_register_map.
+      map.setIncludeArgumentOops(cb.callerMustGCArguments());
+
+      if (cb.getOopMaps() != null) {
+        OopMapSet.updateRegisterMap(this, cb, map, true);
+      }
+
+      // Since the prolog does the save and restore of EBP there is no oopmap
+      // for it so we must fill in its location as if there was an oopmap entry
+      // since if our caller was compiled code there could be live jvm state in it.
+      updateMapWithSavedLink(map, savedFPAddr);
+    }
+
+    return new LOONGARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
+  }
+
+  protected boolean hasSenderPD() {
+    // FIXME
+    // Check for null ebp? Need to do some tests.
+    return true;
+  }
+
+  public long frameSize() {
+    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
+  }
+
+  public Address getLink() {
+    if (isJavaFrame())
+      return addressOfStackSlot(JAVA_FRAME_LINK_OFFSET).getAddressAt(0);
+    return addressOfStackSlot(NATIVE_FRAME_LINK_OFFSET).getAddressAt(0);
+  }
+
+  public Address getUnextendedSP() { return raw_unextendedSP; }
+
+  // Return address:
+  public Address getSenderPCAddr() {
+    if (isJavaFrame())
+      return addressOfStackSlot(JAVA_FRAME_RETURN_ADDR_OFFSET);
+    return addressOfStackSlot(NATIVE_FRAME_RETURN_ADDR_OFFSET);
+  }
+
+  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
+
+  public Address getSenderSP()     {
+    if (isJavaFrame())
+      return addressOfStackSlot(JAVA_FRAME_SENDER_SP_OFFSET);
+    return addressOfStackSlot(NATIVE_FRAME_SENDER_SP_OFFSET);
+  }
+
+  // return address of param, zero origin index.
+  public Address getNativeParamAddr(int idx) {
+    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
+  }
+
+  public Address addressOfInterpreterFrameLocals() {
+    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
+  }
+
+  private Address addressOfInterpreterFrameBCX() {
+    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
+  }
+
+  public int getInterpreterFrameBCI() {
+    // FIXME: this is not atomic with respect to GC and is unsuitable
+    // for use in a non-debugging, or reflective, system. Need to
+    // figure out how to express this.
+    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
+    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
+    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
+    return bcpToBci(bcp, method);
+  }
+
+  public Address addressOfInterpreterFrameMDX() {
+    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
+  }
+
+  // FIXME
+  //inline int frame::interpreter_frame_monitor_size() {
+  //  return BasicObjectLock::size();
+  //}
+
+  // expression stack
+  // (the max_stack arguments are used by the GC; see class FrameClosure)
+
+  public Address addressOfInterpreterFrameExpressionStack() {
+    Address monitorEnd = interpreterFrameMonitorEnd().address();
+    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
+  }
+
+  public int getInterpreterFrameExpressionStackDirection() { return -1; }
+
+  // top of expression stack
+  public Address addressOfInterpreterFrameTOS() {
+    return getSP();
+  }
+
+  /** Expression stack from top down */
+  public Address addressOfInterpreterFrameTOSAt(int slot) {
+    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
+  }
+
+  public Address getInterpreterFrameSenderSP() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(isInterpretedFrame(), "interpreted frame expected");
+    }
+    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
+  }
+
+  // Monitors
+  public BasicObjectLock interpreterFrameMonitorBegin() {
+    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
+  }
+
+  public BasicObjectLock interpreterFrameMonitorEnd() {
+    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
+    if (Assert.ASSERTS_ENABLED) {
+      // make sure the pointer points inside the frame
+      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
+      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
+    }
+    return new BasicObjectLock(result);
+  }
+
+  public int interpreterFrameMonitorSize() {
+    return BasicObjectLock.size();
+  }
+
+  // Method
+  public Address addressOfInterpreterFrameMethod() {
+    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
+  }
+
+  // Constant pool cache
+  public Address addressOfInterpreterFrameCPCache() {
+    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
+  }
+
+  // Entry frames
+  public JavaCallWrapper getEntryFrameCallWrapper() {
+    return new LOONGARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
+  }
+
+  protected Address addressOfSavedOopResult() {
+    // offset is 2 for compiler2 and 3 for compiler1
+    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
+                               VM.getVM().getAddressSize());
+  }
+
+  protected Address addressOfSavedReceiver() {
+    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
+  }
+
+  private void dumpStack() {
+    if (getFP() != null) {
+      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
+           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
+           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
+        System.out.println(addr + ": " + addr.getAddressAt(0));
+      }
+    } else {
+      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
+           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
+           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
+        System.out.println(addr + ": " + addr.getAddressAt(0));
+      }
+    }
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java
new file mode 100644
index 00000000000..f7dbbcaacd9
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64JavaCallWrapper.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.loongarch64;
+
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.runtime.*;
+
+public class LOONGARCH64JavaCallWrapper extends JavaCallWrapper {
+  private static AddressField lastJavaFPField;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("JavaFrameAnchor");
+
+    lastJavaFPField  = type.getAddressField("_last_Java_fp");
+  }
+
+  public LOONGARCH64JavaCallWrapper(Address addr) {
+    super(addr);
+  }
+
+  public Address getLastJavaFP() {
+    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java
new file mode 100644
index 00000000000..021ef523e31
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/loongarch64/LOONGARCH64RegisterMap.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.loongarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+
+public class LOONGARCH64RegisterMap extends RegisterMap {
+
+  /** This is the only public constructor */
+  public LOONGARCH64RegisterMap(JavaThread thread, boolean updateMap) {
+    super(thread, updateMap);
+  }
+
+  protected LOONGARCH64RegisterMap(RegisterMap map) {
+    super(map);
+  }
+
+  public Object clone() {
+    LOONGARCH64RegisterMap retval = new LOONGARCH64RegisterMap(this);
+    return retval;
+  }
+
+  // no PD state to clear or copy:
+  protected void clearPD() {}
+  protected void initializePD() {}
+  protected void initializeFromPD(RegisterMap map) {}
+  protected Address getLocationPD(VMReg reg) { return null; }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java
new file mode 100644
index 00000000000..21259a4d32a
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64CurrentFrameGuess.java
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2001, 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.mips64.*;
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.interpreter.*;
+import sun.jvm.hotspot.runtime.*;
+
+/** <P> Should be able to be used on all mips64 platforms we support
+    (Win32, Solaris/mips64, and soon Linux) to implement JavaThread's
+    "currentFrameGuess()" functionality. Input is an MIPS64ThreadContext;
+    output is SP, FP, and PC for an MIPS64Frame. Instantiation of the
+    MIPS64Frame is left to the caller, since we may need to subclass
+    MIPS64Frame to support signal handler frames on Unix platforms. </P>
+
+    <P> Algorithm is to walk up the stack within a given range (say,
+    512K at most) looking for a plausible PC and SP for a Java frame,
+    also considering those coming in from the context. If we find a PC
+    that belongs to the VM (i.e., in generated code like the
+    interpreter or CodeCache) then we try to find an associated EBP.
+    We repeat this until we either find a complete frame or run out of
+    stack to look at. </P> */
+
+public class MIPS64CurrentFrameGuess {
+  private MIPS64ThreadContext context;
+  private JavaThread       thread;
+  private Address          spFound;
+  private Address          fpFound;
+  private Address          pcFound;
+
+  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG")
+                                       != null;
+
+  public MIPS64CurrentFrameGuess(MIPS64ThreadContext context,
+                              JavaThread thread) {
+    this.context = context;
+    this.thread  = thread;
+  }
+
+  /** Returns false if not able to find a frame within a reasonable range. */
+  public boolean run(long regionInBytesToSearch) {
+    Address sp  = context.getRegisterAsAddress(MIPS64ThreadContext.SP);
+    Address pc  = context.getRegisterAsAddress(MIPS64ThreadContext.PC);
+    Address fp  = context.getRegisterAsAddress(MIPS64ThreadContext.FP);
+    if (sp == null) {
+      // Bail out if no last java frame eithe
+      if (thread.getLastJavaSP() != null) {
+        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
+        return true;
+      }
+      // Bail out
+      return false;
+    }
+    Address end = sp.addOffsetTo(regionInBytesToSearch);
+    VM vm       = VM.getVM();
+
+    setValues(null, null, null); // Assume we're not going to find anything
+
+    if (vm.isJavaPCDbg(pc)) {
+      if (vm.isClientCompiler()) {
+        // If the topmost frame is a Java frame, we are (pretty much)
+        // guaranteed to have a viable EBP. We should be more robust
+        // than this (we have the potential for losing entire threads'
+        // stack traces) but need to see how much work we really have
+        // to do here. Searching the stack for an (SP, FP) pair is
+        // hard since it's easy to misinterpret inter-frame stack
+        // pointers as base-of-frame pointers; we also don't know the
+        // sizes of C1 frames (not registered in the nmethod) so can't
+        // derive them from ESP.
+
+        setValues(sp, fp, pc);
+        return true;
+      } else {
+        if (vm.getInterpreter().contains(pc)) {
+          if (DEBUG) {
+            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
+                               sp + ", fp = " + fp + ", pc = " + pc);
+          }
+          setValues(sp, fp, pc);
+          return true;
+        }
+
+        // For the server compiler, EBP is not guaranteed to be valid
+        // for compiled code. In addition, an earlier attempt at a
+        // non-searching algorithm (see below) failed because the
+        // stack pointer from the thread context was pointing
+        // (considerably) beyond the ostensible end of the stack, into
+        // garbage; walking from the topmost frame back caused a crash.
+        //
+        // This algorithm takes the current PC as a given and tries to
+        // find the correct corresponding SP by walking up the stack
+        // and repeatedly performing stackwalks (very inefficient).
+        //
+        // FIXME: there is something wrong with stackwalking across
+        // adapter frames...this is likely to be the root cause of the
+        // failure with the simpler algorithm below.
+
+        for (long offset = 0;
+             offset < regionInBytesToSearch;
+             offset += vm.getAddressSize()) {
+          try {
+            Address curSP = sp.addOffsetTo(offset);
+            Frame frame = new MIPS64Frame(curSP, null, pc);
+            RegisterMap map = thread.newRegisterMap(false);
+            while (frame != null) {
+              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
+                // We were able to traverse all the way to the
+                // bottommost Java frame.
+                // This sp looks good. Keep it.
+                if (DEBUG) {
+                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
+                }
+                setValues(curSP, null, pc);
+                return true;
+              }
+              frame = frame.sender(map);
+            }
+          } catch (Exception e) {
+            if (DEBUG) {
+              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
+            }
+            // Bad SP. Try another.
+          }
+        }
+
+        // Were not able to find a plausible SP to go with this PC.
+        // Bail out.
+        return false;
+
+        /*
+        // Original algorithm which does not work because SP was
+        // pointing beyond where it should have:
+
+        // For the server compiler, EBP is not guaranteed to be valid
+        // for compiled code. We see whether the PC is in the
+        // interpreter and take care of that, otherwise we run code
+        // (unfortunately) duplicated from MIPS64Frame.senderForCompiledFrame.
+
+        CodeCache cc = vm.getCodeCache();
+        if (cc.contains(pc)) {
+          CodeBlob cb = cc.findBlob(pc);
+
+          // See if we can derive a frame pointer from SP and PC
+          // NOTE: This is the code duplicated from MIPS64Frame
+          Address saved_fp = null;
+          int llink_offset = cb.getLinkOffset();
+          if (llink_offset >= 0) {
+            // Restore base-pointer, since next frame might be an interpreter frame.
+            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
+            saved_fp = fp_addr.getAddressAt(0);
+          }
+
+          setValues(sp, saved_fp, pc);
+          return true;
+        }
+        */
+      }
+    } else {
+      // If the current program counter was not known to us as a Java
+      // PC, we currently assume that we are in the run-time system
+      // and attempt to look to thread-local storage for saved ESP and
+      // EBP. Note that if these are null (because we were, in fact,
+      // in Java code, i.e., vtable stubs or similar, and the SA
+      // didn't have enough insight into the target VM to understand
+      // that) then we are going to lose the entire stack trace for
+      // the thread, which is sub-optimal. FIXME.
+
+      if (DEBUG) {
+        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
+                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
+      }
+      if (thread.getLastJavaSP() == null) {
+        return false; // No known Java frames on stack
+      }
+      setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
+      return true;
+    }
+  }
+
+  public Address getSP() { return spFound; }
+  public Address getFP() { return fpFound; }
+  /** May be null if getting values from thread-local storage; take
+      care to call the correct MIPS64Frame constructor to recover this if
+      necessary */
+  public Address getPC() { return pcFound; }
+
+  private void setValues(Address sp, Address fp, Address pc) {
+    spFound = sp;
+    fpFound = fp;
+    pcFound = pc;
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java
new file mode 100644
index 00000000000..0cc5cf4e7ca
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64Frame.java
@@ -0,0 +1,547 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.mips64;
+
+import java.util.*;
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.compiler.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.oops.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+/** Specialization of and implementation of abstract methods of the
+    Frame class for the mips64 family of CPUs. */
+
+public class MIPS64Frame extends Frame {
+  private static final boolean DEBUG;
+  static {
+    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.mips64.MIPS64Frame.DEBUG") != null;
+  }
+
+  // All frames
+  private static final int LINK_OFFSET                =  0;
+  private static final int RETURN_ADDR_OFFSET         =  1;
+  private static final int SENDER_SP_OFFSET           =  2;
+
+  // Interpreter frames
+  private static final int INTERPRETER_FRAME_MIRROR_OFFSET    =  2; // for native calls only
+  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
+  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
+  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
+  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
+  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
+  private static       int INTERPRETER_FRAME_BCX_OFFSET;
+  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
+
+  // Entry frames
+  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET;
+
+  // Native frames
+  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
+
+  private static VMReg rbp;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_METHOD_OFFSET - 1;
+    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MDX_OFFSET - 1;
+    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
+    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
+    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
+    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+
+    ENTRY_FRAME_CALL_WRAPPER_OFFSET = db.lookupIntConstant("frame::entry_frame_call_wrapper_offset");
+    if (VM.getVM().getAddressSize() == 4) {
+      rbp = new VMReg(5);
+    } else {
+      rbp = new VMReg(5 << 1);
+    }
+  }
+
+
+  // an additional field beyond sp and pc:
+  Address raw_fp; // frame pointer
+  private Address raw_unextendedSP;
+
+  private MIPS64Frame() {
+  }
+
+  private void adjustForDeopt() {
+    if ( pc != null) {
+      // Look for a deopt pc and if it is deopted convert to original pc
+      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
+      if (cb != null && cb.isJavaMethod()) {
+        NMethod nm = (NMethod) cb;
+        if (pc.equals(nm.deoptHandlerBegin())) {
+          if (Assert.ASSERTS_ENABLED) {
+            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
+          }
+          // adjust pc if frame is deoptimized.
+          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
+          deoptimized = true;
+        }
+      }
+    }
+  }
+
+  public MIPS64Frame(Address raw_sp, Address raw_fp, Address pc) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_sp;
+    this.raw_fp = raw_fp;
+    this.pc = pc;
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("MIPS64Frame(sp, fp, pc): " + this);
+      dumpStack();
+    }
+  }
+
+  public MIPS64Frame(Address raw_sp, Address raw_fp) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_sp;
+    this.raw_fp = raw_fp;
+    this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("MIPS64Frame(sp, fp): " + this);
+      dumpStack();
+    }
+  }
+
+  public MIPS64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_unextendedSp;
+    this.raw_fp = raw_fp;
+    this.pc = pc;
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("MIPS64Frame(sp, unextendedSP, fp, pc): " + this);
+      dumpStack();
+    }
+
+  }
+
+  public Object clone() {
+    MIPS64Frame frame = new MIPS64Frame();
+    frame.raw_sp = raw_sp;
+    frame.raw_unextendedSP = raw_unextendedSP;
+    frame.raw_fp = raw_fp;
+    frame.pc = pc;
+    frame.deoptimized = deoptimized;
+    return frame;
+  }
+
+  public boolean equals(Object arg) {
+    if (arg == null) {
+      return false;
+    }
+
+    if (!(arg instanceof MIPS64Frame)) {
+      return false;
+    }
+
+    MIPS64Frame other = (MIPS64Frame) arg;
+
+    return (AddressOps.equal(getSP(), other.getSP()) &&
+            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
+            AddressOps.equal(getFP(), other.getFP()) &&
+            AddressOps.equal(getPC(), other.getPC()));
+  }
+
+  public int hashCode() {
+    if (raw_sp == null) {
+      return 0;
+    }
+
+    return raw_sp.hashCode();
+  }
+
+  public String toString() {
+    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
+         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
+         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
+         ", pc: " + (pc == null? "null" : pc.toString());
+  }
+
+  // accessors for the instance variables
+  public Address getFP() { return raw_fp; }
+  public Address getSP() { return raw_sp; }
+  public Address getID() { return raw_sp; }
+
+  // FIXME: not implemented yet (should be done for Solaris/MIPS64)
+  public boolean isSignalHandlerFrameDbg() { return false; }
+  public int     getSignalNumberDbg()      { return 0;     }
+  public String  getSignalNameDbg()        { return null;  }
+
+  public boolean isInterpretedFrameValid() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
+    }
+
+    // These are reasonable sanity checks
+    if (getFP() == null || getFP().andWithMask(0x3) != null) {
+      return false;
+    }
+
+    if (getSP() == null || getSP().andWithMask(0x3) != null) {
+      return false;
+    }
+
+    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
+      return false;
+    }
+
+    // These are hacks to keep us out of trouble.
+    // The problem with these is that they mask other problems
+    if (getFP().lessThanOrEqual(getSP())) {
+      // this attempts to deal with unsigned comparison above
+      return false;
+    }
+
+    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
+      // stack frames shouldn't be large.
+      return false;
+    }
+
+    return true;
+  }
+
+  // FIXME: not applicable in current system
+  //  void    patch_pc(Thread* thread, address pc);
+
+  public Frame sender(RegisterMap regMap, CodeBlob cb) {
+    MIPS64RegisterMap map = (MIPS64RegisterMap) regMap;
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+
+    // Default is we done have to follow them. The sender_for_xxx will
+    // update it accordingly
+    map.setIncludeArgumentOops(false);
+
+    if (isEntryFrame())       return senderForEntryFrame(map);
+    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
+
+    if(cb == null) {
+      cb = VM.getVM().getCodeCache().findBlob(getPC());
+    } else {
+      if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
+      }
+    }
+
+    if (cb != null) {
+      return senderForCompiledFrame(map, cb);
+    }
+
+    // Must be native-compiled frame, i.e. the marshaling code for native
+    // methods that exists in the core system.
+    return new MIPS64Frame(getSenderSP(), getLink(), getSenderPC());
+  }
+
+  private Frame senderForEntryFrame(MIPS64RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForEntryFrame");
+    }
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+    // Java frame called from C; skip all C frames and return top C
+    // frame of that chunk as the sender
+    MIPS64JavaCallWrapper jcw = (MIPS64JavaCallWrapper) getEntryFrameCallWrapper();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
+      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
+    }
+    MIPS64Frame fr;
+    if (jcw.getLastJavaPC() != null) {
+      fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
+    } else {
+      fr = new MIPS64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
+    }
+    map.clear();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
+    }
+    return fr;
+  }
+
+  //------------------------------------------------------------------------------
+  // frame::adjust_unextended_sp
+  private void adjustUnextendedSP() {
+    // On mips64, sites calling method handle intrinsics and lambda forms are treated
+    // as any other call site. Therefore, no special action is needed when we are
+    // returning to any of these call sites.
+
+    CodeBlob cb = cb();
+    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
+    if (senderNm != null) {
+      // If the sender PC is a deoptimization point, get the original PC.
+      if (senderNm.isDeoptEntry(getPC()) ||
+          senderNm.isDeoptMhEntry(getPC())) {
+        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
+      }
+    }
+  }
+
+  private Frame senderForInterpreterFrame(MIPS64RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForInterpreterFrame");
+    }
+    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
+    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
+    // We do not need to update the callee-save register mapping because above
+    // us is either another interpreter frame or a converter-frame, but never
+    // directly a compiled frame.
+    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
+    // However c2 no longer uses callee save register for java calls so there
+    // are no callee register to find.
+
+    if (map.getUpdateMap())
+      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
+
+    return new MIPS64Frame(sp, unextendedSP, getLink(), getSenderPC());
+  }
+
+  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
+    map.setLocation(rbp, savedFPAddr);
+  }
+
+  private Frame senderForCompiledFrame(MIPS64RegisterMap map, CodeBlob cb) {
+    if (DEBUG) {
+      System.out.println("senderForCompiledFrame");
+    }
+
+    //
+    // NOTE: some of this code is (unfortunately) duplicated in MIPS64CurrentFrameGuess
+    //
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+
+    // frame owned by optimizing compiler
+    if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
+    }
+    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
+
+    // On Intel the return_address is always the word on the stack
+    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
+
+    // This is the saved value of EBP which may or may not really be an FP.
+    // It is only an FP if the sender is an interpreter frame (or C1?).
+    Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize());
+
+    if (map.getUpdateMap()) {
+      // Tell GC to use argument oopmaps for some runtime stubs that need it.
+      // For C1, the runtime stub might not have oop maps, so set this flag
+      // outside of update_register_map.
+      map.setIncludeArgumentOops(cb.callerMustGCArguments());
+
+      if (cb.getOopMaps() != null) {
+        OopMapSet.updateRegisterMap(this, cb, map, true);
+      }
+
+      // Since the prolog does the save and restore of EBP there is no oopmap
+      // for it so we must fill in its location as if there was an oopmap entry
+      // since if our caller was compiled code there could be live jvm state in it.
+      updateMapWithSavedLink(map, savedFPAddr);
+    }
+
+    return new MIPS64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
+  }
+
+  protected boolean hasSenderPD() {
+    // FIXME
+    // Check for null ebp? Need to do some tests.
+    return true;
+  }
+
+  public long frameSize() {
+    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
+  }
+
+  public Address getLink() {
+    return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
+  }
+
+  // FIXME: not implementable yet
+  //inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
+
+  public Address getUnextendedSP() { return raw_unextendedSP; }
+
+  // Return address:
+  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
+  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
+
+  // return address of param, zero origin index.
+  public Address getNativeParamAddr(int idx) {
+    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
+  }
+
+  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
+
+  public Address addressOfInterpreterFrameLocals() {
+    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
+  }
+
+  private Address addressOfInterpreterFrameBCX() {
+    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
+  }
+
+  public int getInterpreterFrameBCI() {
+    // FIXME: this is not atomic with respect to GC and is unsuitable
+    // for use in a non-debugging, or reflective, system. Need to
+    // figure out how to express this.
+    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
+    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
+    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
+    return bcpToBci(bcp, method);
+  }
+
+  public Address addressOfInterpreterFrameMDX() {
+    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
+  }
+
+  // FIXME
+  //inline int frame::interpreter_frame_monitor_size() {
+  //  return BasicObjectLock::size();
+  //}
+
+  // expression stack
+  // (the max_stack arguments are used by the GC; see class FrameClosure)
+
+  public Address addressOfInterpreterFrameExpressionStack() {
+    Address monitorEnd = interpreterFrameMonitorEnd().address();
+    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
+  }
+
+  public int getInterpreterFrameExpressionStackDirection() { return -1; }
+
+  // top of expression stack
+  public Address addressOfInterpreterFrameTOS() {
+    return getSP();
+  }
+
+  /** Expression stack from top down */
+  public Address addressOfInterpreterFrameTOSAt(int slot) {
+    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
+  }
+
+  public Address getInterpreterFrameSenderSP() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(isInterpretedFrame(), "interpreted frame expected");
+    }
+    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
+  }
+
+  // Monitors
+  public BasicObjectLock interpreterFrameMonitorBegin() {
+    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
+  }
+
+  public BasicObjectLock interpreterFrameMonitorEnd() {
+    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
+    if (Assert.ASSERTS_ENABLED) {
+      // make sure the pointer points inside the frame
+      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
+      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
+    }
+    return new BasicObjectLock(result);
+  }
+
+  public int interpreterFrameMonitorSize() {
+    return BasicObjectLock.size();
+  }
+
+  // Method
+  public Address addressOfInterpreterFrameMethod() {
+    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
+  }
+
+  // Constant pool cache
+  public Address addressOfInterpreterFrameCPCache() {
+    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
+  }
+
+  // Entry frames
+  public JavaCallWrapper getEntryFrameCallWrapper() {
+    return new MIPS64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
+  }
+
+  protected Address addressOfSavedOopResult() {
+    // offset is 2 for compiler2 and 3 for compiler1
+    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
+                               VM.getVM().getAddressSize());
+  }
+
+  protected Address addressOfSavedReceiver() {
+    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
+  }
+
+  private void dumpStack() {
+    if (getFP() != null) {
+      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
+           AddressOps.lte(addr, getFP().addOffsetTo(5 * VM.getVM().getAddressSize()));
+           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
+        System.out.println(addr + ": " + addr.getAddressAt(0));
+      }
+    } else {
+      for (Address addr = getSP().addOffsetTo(-5 * VM.getVM().getAddressSize());
+           AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
+           addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
+        System.out.println(addr + ": " + addr.getAddressAt(0));
+      }
+    }
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
new file mode 100644
index 00000000000..81fcb5b5689
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.mips64;
+
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.runtime.*;
+
+public class MIPS64JavaCallWrapper extends JavaCallWrapper {
+  private static AddressField lastJavaFPField;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("JavaFrameAnchor");
+
+    lastJavaFPField  = type.getAddressField("_last_Java_fp");
+  }
+
+  public MIPS64JavaCallWrapper(Address addr) {
+    super(addr);
+  }
+
+  public Address getLastJavaFP() {
+    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
+  }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
new file mode 100644
index 00000000000..648503792d9
--- /dev/null
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.mips64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+
+public class MIPS64RegisterMap extends RegisterMap {
+
+  /** This is the only public constructor */
+  public MIPS64RegisterMap(JavaThread thread, boolean updateMap) {
+    super(thread, updateMap);
+  }
+
+  protected MIPS64RegisterMap(RegisterMap map) {
+    super(map);
+  }
+
+  public Object clone() {
+    MIPS64RegisterMap retval = new MIPS64RegisterMap(this);
+    return retval;
+  }
+
+  // no PD state to clear or copy:
+  protected void clearPD() {}
+  protected void initializePD() {}
+  protected void initializeFromPD(RegisterMap map) {}
+  protected Address getLocationPD(VMReg reg) { return null; }
+}
diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
index aa692578665..9c97d09bc34 100644
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
@@ -22,6 +22,13 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ *
+ */
+
 package sun.jvm.hotspot.utilities;
 
 /** Provides canonicalized OS and CPU information for the rest of the
@@ -65,6 +72,10 @@ public static String getCPU() throws UnsupportedPlatformException {
       return cpu;
     } else if (cpu.equals("aarch64")) {
       return cpu;
+    } else if (cpu.equals("mips64") || cpu.equals("mips64el")) {
+      return "mips64";
+    } else if (cpu.equals("loongarch64")) {
+      return "loongarch64";
     } else {
       try {
         Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed");
diff --git a/hotspot/make/defs.make b/hotspot/make/defs.make
index a3573da56f3..6e93182c928 100644
--- a/hotspot/make/defs.make
+++ b/hotspot/make/defs.make
@@ -22,6 +22,12 @@
 #
 #
 
+#
+# This file has been modified by Loongson Technology in 2020. These
+# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 # The common definitions for hotspot builds.
 
 # Optionally include SPEC file generated by configure.
@@ -285,7 +291,7 @@ ifneq ($(OSNAME),windows)
 
   # Use uname output for SRCARCH, but deal with platform differences. If ARCH
   # is not explicitly listed below, it is treated as x86.
-  SRCARCH    ?= $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 ppc ppc64 ppc64le zero aarch64,$(ARCH)))
+  SRCARCH    ?= $(ARCH/$(filter sparc sparc64 ia64 amd64 x86_64 ppc ppc64 ppc64le zero aarch64 mips64 loongarch64,$(ARCH)))
   ARCH/       = x86
   ARCH/sparc  = sparc
   ARCH/sparc64= sparc
@@ -295,6 +301,10 @@ ifneq ($(OSNAME),windows)
   ARCH/ppc64  = ppc
   ARCH/ppc64le= ppc
   ARCH/ppc    = ppc
+  ARCH/mips64 = mips
+  ARCH/mips64el = mips
+  ARCH/loongarch64 = loongarch
+  ARCH/loongarch = loongarch
   ARCH/zero   = zero
   ARCH/aarch64 = aarch64
 
@@ -317,6 +327,20 @@ ifneq ($(OSNAME),windows)
       BUILDARCH = ppc64
     endif
   endif
+  ifeq ($(BUILDARCH), mips)
+    ifdef LP64
+#      ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little)
+#        BUILDARCH = mips64el
+#      else
+        BUILDARCH = mips64
+#      endif
+    endif
+  endif
+  ifeq ($(BUILDARCH), loongarch)
+    ifdef LP64
+      BUILDARCH = loongarch64
+    endif
+  endif
 
   # LIBARCH is 1:1 mapping from BUILDARCH, except for ARCH=ppc64le
   ifeq ($(ARCH),ppc64le)
@@ -332,9 +356,18 @@ ifneq ($(OSNAME),windows)
   LIBARCH/sparcv9 = sparcv9
   LIBARCH/ia64    = ia64
   LIBARCH/ppc64   = ppc64
+  LIBARCH/loongarch   = loongarch64
   LIBARCH/zero    = $(ZERO_LIBARCH)
 
-  LP64_ARCH += sparcv9 amd64 ia64 ppc64 aarch64 zero
+  ifeq ($(LIBARCH), mips64)
+    ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little)
+      LIBARCH  = mips64el
+    else
+      LIBARCH  = mips64
+    endif
+  endif
+
+  LP64_ARCH += sparcv9 amd64 ia64 ppc64 aarch64 mips64 mips64el loongarch64 zero
 endif
 
 # Required make macro settings for all platforms
diff --git a/hotspot/make/linux/Makefile b/hotspot/make/linux/Makefile
index e8f2010412f..5aff01e87d9 100644
--- a/hotspot/make/linux/Makefile
+++ b/hotspot/make/linux/Makefile
@@ -74,6 +74,10 @@ ifneq (,$(findstring $(ARCH), ppc ppc64))
     FORCE_TIERED=0
   endif
 endif
+# C1 is not ported on mips64, so we cannot build a tiered VM:
+ifeq (mips64, $(findstring mips64, $(ARCH)))
+  FORCE_TIERED=0
+endif
 
 ifdef LP64
   ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")
diff --git a/hotspot/make/linux/makefiles/defs.make b/hotspot/make/linux/makefiles/defs.make
index ec414639d20..9ade73ab340 100644
--- a/hotspot/make/linux/makefiles/defs.make
+++ b/hotspot/make/linux/makefiles/defs.make
@@ -22,6 +22,12 @@
 #
 #
 
+#
+# This file has been modified by Loongson Technology in 2020. These
+# modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 # The common definitions for hotspot linux builds.
 # Include the top level defs.make under make directory instead of this one.
 # This file is included into make/defs.make.
@@ -39,6 +45,18 @@ ifndef ARCH
     ARCH := ppc64
   endif
 endif
+ifeq ($(ARCH), mips64el)
+  ARCH=mips64
+endif
+ifeq ($(LP64), 1)
+  ifeq ($(ARCH), mips)
+    ARCH=mips64
+  endif
+endif
+
+ifeq ($(ARCH), loongarch)
+  ARCH=loongarch64
+endif
 
 PATH_SEP ?= :
 
@@ -83,6 +101,36 @@ ifneq (,$(findstring $(ARCH), sparc))
   HS_ARCH            = sparc
 endif
 
+# mips
+ifeq ($(ARCH), mips64)
+  ifeq ($(ARCH_DATA_MODEL), 64)
+    ARCH_DATA_MODEL  = 64
+    MAKE_ARGS        += LP64=1
+    PLATFORM         = linux-mips64
+    VM_PLATFORM      = linux_mips64
+  else
+    ARCH_DATA_MODEL  = 32
+    PLATFORM         = linux-mips32
+    VM_PLATFORM      = linux_mips32
+  endif
+  HS_ARCH          = mips
+endif
+
+# loongarch
+ifeq ($(ARCH), loongarch64)
+  ifeq ($(ARCH_DATA_MODEL), 64)
+    ARCH_DATA_MODEL  = 64
+    MAKE_ARGS        += LP64=1
+    PLATFORM         = linux-loongarch64
+    VM_PLATFORM      = linux_loongarch64
+  else
+    ARCH_DATA_MODEL  = 32
+    PLATFORM         = linux-loongarch32
+    VM_PLATFORM      = linux_loongarch32
+  endif
+  HS_ARCH          = loongarch
+endif
+
 # i686/i586 and amd64/x86_64
 ifneq (,$(findstring $(ARCH), amd64 x86_64 i686 i586))
   ifeq ($(ARCH_DATA_MODEL), 64)
@@ -311,16 +359,24 @@ ADD_SA_BINARIES/sparc = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
                         $(EXPORT_LIB_DIR)/sa-jdi.jar
 ADD_SA_BINARIES/aarch64 = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
                         $(EXPORT_LIB_DIR)/sa-jdi.jar
+ADD_SA_BINARIES/mips  = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
+                        $(EXPORT_LIB_DIR)/sa-jdi.jar
+ADD_SA_BINARIES/loongarch  = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
+                        $(EXPORT_LIB_DIR)/sa-jdi.jar
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifneq ($(STRIP_POLICY),no_strip)
     ifeq ($(ZIP_DEBUGINFO_FILES),1)
       ADD_SA_BINARIES/x86     += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
       ADD_SA_BINARIES/sparc   += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
       ADD_SA_BINARIES/aarch64 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+      ADD_SA_BINARIES/mips    += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+      ADD_SA_BINARIES/loongarch  += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
     else
       ADD_SA_BINARIES/x86     += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
       ADD_SA_BINARIES/sparc   += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
       ADD_SA_BINARIES/aarch64 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+      ADD_SA_BINARIES/mips    += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+      ADD_SA_BINARIES/loongarch  += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
     endif
   endif
 endif
diff --git a/hotspot/make/linux/makefiles/gcc.make b/hotspot/make/linux/makefiles/gcc.make
index 7dde7f0963e..94c6d1d0154 100644
--- a/hotspot/make/linux/makefiles/gcc.make
+++ b/hotspot/make/linux/makefiles/gcc.make
@@ -22,6 +22,12 @@
 #  
 #
 
+#
+# This file has been modified by Loongson Technology in 2020. These
+# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 #------------------------------------------------------------------------
 # CC, CXX & AS
 
@@ -177,6 +183,9 @@ ARCHFLAG/aarch64 =
 ARCHFLAG/ia64    =
 ARCHFLAG/sparc   = -m32 -mcpu=v9
 ARCHFLAG/sparcv9 = -m64 -mcpu=v9
+ARCHFLAG/mips64  = -mabi=64
+#ARCHFLAG/loongarch64  = -lp64
+ARCHFLAG/loongarch64  =
 ARCHFLAG/zero    = $(ZERO_ARCHFLAG)
 ARCHFLAG/ppc64   =  -m64
 
@@ -202,7 +211,7 @@ else
 endif
 
 # Compiler warnings are treated as errors
-WARNINGS_ARE_ERRORS = -Werror
+#WARNINGS_ARE_ERRORS = -Werror
 
 ifeq ($(USE_CLANG), true)
   # However we need to clean the code up before we can unrestrictedly enable this option with Clang
diff --git a/hotspot/make/linux/makefiles/loongarch64.make b/hotspot/make/linux/makefiles/loongarch64.make
new file mode 100644
index 00000000000..9e3cdb6f23a
--- /dev/null
+++ b/hotspot/make/linux/makefiles/loongarch64.make
@@ -0,0 +1,43 @@
+#
+# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#  
+#
+
+# Not included in includeDB because it has no dependencies
+Obj_Files += linux_loongarch.o
+
+# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized
+OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
+# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized
+OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
+# Must also specify if CPU is little endian
+CFLAGS += -DVM_LITTLE_ENDIAN
+
+CFLAGS += -DSICORTEX_ERRATA
+
+CFLAGS += -D_LP64=1
+
+# The serviceability agent relies on frame pointer (%rbp) to walk thread stack
+CFLAGS += -fno-omit-frame-pointer
+
+OPT_CFLAGS/compactingPermGenGen.o = -O1
diff --git a/hotspot/make/linux/makefiles/mips64.make b/hotspot/make/linux/makefiles/mips64.make
new file mode 100644
index 00000000000..d9af3b13ab2
--- /dev/null
+++ b/hotspot/make/linux/makefiles/mips64.make
@@ -0,0 +1,43 @@
+#
+# Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#  
+#
+
+# Not included in includeDB because it has no dependencies
+Obj_Files += linux_mips.o
+
+# The copied fdlibm routines in sharedRuntimeTrig.o must not be optimized
+OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
+# The copied fdlibm routines in sharedRuntimeTrans.o must not be optimized
+OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
+# Must also specify if CPU is little endian
+CFLAGS += -DVM_LITTLE_ENDIAN
+
+CFLAGS += -DSICORTEX_ERRATA
+
+CFLAGS += -D_LP64=1
+
+# The serviceability agent relies on frame pointer (%rbp) to walk thread stack
+CFLAGS += -fno-omit-frame-pointer
+
+OPT_CFLAGS/compactingPermGenGen.o = -O1
diff --git a/hotspot/make/linux/makefiles/sa.make b/hotspot/make/linux/makefiles/sa.make
index cdcb16a1a3f..34c71bd666c 100644
--- a/hotspot/make/linux/makefiles/sa.make
+++ b/hotspot/make/linux/makefiles/sa.make
@@ -22,6 +22,12 @@
 #  
 #
 
+#
+# This file has been modified by Loongson Technology in 2020. These
+# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 # This makefile (sa.make) is included from the sa.make in the
 # build directories.
 
@@ -109,6 +115,8 @@ $(GENERATED)/sa-jdi.jar:: $(AGENT_FILES)
 	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext
 	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext
 	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext
+	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.mips64.MIPS64ThreadContext
+	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.loongarch64.LOONGARCH64ThreadContext
 	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.debugger.sparc.SPARCThreadContext
 	$(QUIETLY) $(REMOTE) $(RUN.JAVAH) -classpath $(SA_CLASSDIR) -d $(GENERATED) -jni sun.jvm.hotspot.asm.Disassembler
 
diff --git a/hotspot/make/linux/makefiles/saproc.make b/hotspot/make/linux/makefiles/saproc.make
index ffc0ec5ce5b..c04a6765df7 100644
--- a/hotspot/make/linux/makefiles/saproc.make
+++ b/hotspot/make/linux/makefiles/saproc.make
@@ -21,6 +21,13 @@
 # questions.
 #  
 #
+
+#
+# This file has been modified by Loongson Technology in 2019. These
+# modifications are Copyright (c) 2018, 2019, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 include $(GAMMADIR)/make/defs.make
 include $(GAMMADIR)/make/altsrc.make
 
@@ -81,7 +88,12 @@ endif
 SA_LFLAGS = $(MAPFLAG:FILENAME=$(SAMAPFILE)) $(LDFLAGS_HASH_STYLE) \
             $(LDFLAGS_NO_EXEC_STACK) $(EXTRA_LDFLAGS)
 
+ifneq (mips64, $(findstring mips64, $(BUILDARCH)))
 SAARCH ?= $(BUILDARCH)
+else
+#If -Dmips64 is used, mips64 would be conflict with "struct mips64_watch_regs mips64" in /usr/include/asm/ptrace.h.
+SAARCH ?= mips
+endif
 
 $(LIBSAPROC): $(SASRCFILES) $(SAMAPFILE)
 	$(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \
diff --git a/hotspot/make/linux/makefiles/sparcWorks.make b/hotspot/make/linux/makefiles/sparcWorks.make
index e39116023c5..dbc2ace8257 100644
--- a/hotspot/make/linux/makefiles/sparcWorks.make
+++ b/hotspot/make/linux/makefiles/sparcWorks.make
@@ -22,6 +22,12 @@
 #  
 #
 
+#
+# This file has been modified by Loongson Technology in 2015. These
+# modifications are Copyright (c) 2015 Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 #------------------------------------------------------------------------
 # CC, CXX & AS
 
@@ -38,6 +44,7 @@ endif
 ARCHFLAG = $(ARCHFLAG/$(BUILDARCH))
 ARCHFLAG/i486    = -m32
 ARCHFLAG/amd64   = -m64
+ARCHFLAG/mips64  = -m64
 
 CFLAGS     += $(ARCHFLAG)
 AOUT_FLAGS += $(ARCHFLAG)
diff --git a/hotspot/make/linux/makefiles/vm.make b/hotspot/make/linux/makefiles/vm.make
index 04b7c202873..5e428538a0f 100644
--- a/hotspot/make/linux/makefiles/vm.make
+++ b/hotspot/make/linux/makefiles/vm.make
@@ -22,6 +22,12 @@
 #
 #
 
+#
+# This file has been modified by Loongson Technology in 2020. These
+# modifications are Copyright (c) 2018, 2020, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 # Rules to build JVM and related libraries, included from vm.make in the build
 # directory.
 
@@ -99,9 +105,22 @@ CXXFLAGS =           \
   ${HS_LIB_ARCH}     \
   ${VM_DISTRO}
 
+ifeq ($(MIPS_ABI),n32)
+  CXXFLAGS +=   -DN32 
+else
+  ifeq ($(MIPS_ABI),n64)
+    CXXFLAGS +=   -DN64
+  endif
+endif
 # This is VERY important! The version define must only be supplied to vm_version.o
 # If not, ccache will not re-use the cache at all, since the version string might contain
 # a time and date.
+ifdef LOONGSON_RUNTIME_NAME
+  LOONGSON_VM_INFO = -DLOONGSON_RUNTIME_NAME="\"$(LOONGSON_RUNTIME_NAME)\""
+else
+  LOONGSON_VM_INFO = -DLOONGSON_RUNTIME_NAME="\"\""
+endif
+CXXFLAGS/vmError.o += ${LOONGSON_VM_INFO}
 CXXFLAGS/vm_version.o += ${JRE_VERSION} ${VERSION_CFLAGS}
 CXXFLAGS/arguments.o += ${VERSION_CFLAGS}
 
@@ -211,6 +230,15 @@ endif
 ifeq ($(Platform_arch_model), x86_64)
 Src_Files_EXCLUDE += \*x86_32\*
 endif
+ifeq ($(Platform_arch_model), mips_32)
+Src_Files_EXCLUDE += \*mips_64\*
+endif
+ifeq ($(Platform_arch_model), mips_64)
+Src_Files_EXCLUDE += \*mips_32\*
+endif
+ifeq ($(Platform_arch_model), loongarch_64)
+Src_Files_EXCLUDE += \*loongarch_32\*
+endif
 
 # Alternate vm.make
 # This has to be included here to allow changes to the source
diff --git a/hotspot/make/linux/platform_loongarch64 b/hotspot/make/linux/platform_loongarch64
new file mode 100644
index 00000000000..d704cf389ae
--- /dev/null
+++ b/hotspot/make/linux/platform_loongarch64
@@ -0,0 +1,17 @@
+os_family = linux
+
+arch = loongarch
+
+arch_model = loongarch_64
+
+os_arch = linux_loongarch
+
+os_arch_model = linux_loongarch_64
+
+lib_arch = loongarch64
+
+compiler = gcc
+
+gnu_dis_arch = loongarch64
+
+sysdefs = -DLINUX -D_GNU_SOURCE -DLOONGARCH64
diff --git a/hotspot/make/linux/platform_mips64 b/hotspot/make/linux/platform_mips64
new file mode 100644
index 00000000000..c283671f828
--- /dev/null
+++ b/hotspot/make/linux/platform_mips64
@@ -0,0 +1,17 @@
+os_family = linux
+
+arch = mips
+
+arch_model = mips_64
+
+os_arch = linux_mips
+
+os_arch_model = linux_mips_64
+
+lib_arch = mips64
+
+compiler = gcc
+
+gnu_dis_arch = mips64
+
+sysdefs = -DLINUX -D_GNU_SOURCE -DMIPS64
diff --git a/hotspot/make/sa.files b/hotspot/make/sa.files
index d6e728a9a8c..43b08e3ad19 100644
--- a/hotspot/make/sa.files
+++ b/hotspot/make/sa.files
@@ -22,6 +22,12 @@
 #  
 #
 
+#
+# This file has been modified by Loongson Technology in 2020. These
+# modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 # This filelist macro is included in platform specific sa.make
 # included all packages/*.java. package list can be generated by
 # $(GAMMADIR)/agent/make/build-pkglist. 
@@ -52,14 +58,20 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/cdbg/basic/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/dummy/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/amd64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/mips64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/loongarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/x86/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/aarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/sparc/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/mips64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/loongarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/elf/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/amd64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/aarch64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/mips64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/loongarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/sparc/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/x86/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/*.java \
@@ -94,8 +106,12 @@ $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_x86/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_amd64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_aarch64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_mips64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_loongarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_x86/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_sparc/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/mips64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/loongarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/posix/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/solaris_amd64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/solaris_sparc/*.java \
diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
index 35d34a08eaa..3b8cf4a11d9 100644
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
@@ -1177,7 +1177,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
   }
 }
 
-
+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
+  ShouldNotReachHere();
+}
 
 void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
   LIR_Opr src  = op->in_opr();
@@ -1242,7 +1244,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
       }
     case Bytecodes::_d2l:
       {
-        Register tmp = op->tmp1()->as_register();
+        Register tmp = op->tmp()->as_register();
         __ clear_fpsr();
         __ fcvtzd(dest->as_register_lo(), src->as_double_reg());
         __ get_fpsr(tmp);
@@ -1253,7 +1255,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
       }
     case Bytecodes::_f2i:
       {
-        Register tmp = op->tmp1()->as_register();
+        Register tmp = op->tmp()->as_register();
         __ clear_fpsr();
         __ fcvtzsw(dest->as_register(), src->as_float_reg());
         __ get_fpsr(tmp);
@@ -1264,7 +1266,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
       }
     case Bytecodes::_f2l:
       {
-        Register tmp = op->tmp1()->as_register();
+        Register tmp = op->tmp()->as_register();
         __ clear_fpsr();
         __ fcvtzs(dest->as_register_lo(), src->as_float_reg());
         __ get_fpsr(tmp);
@@ -1275,7 +1277,7 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
       }
     case Bytecodes::_d2i:
       {
-        Register tmp = op->tmp1()->as_register();
+        Register tmp = op->tmp()->as_register();
         __ clear_fpsr();
         __ fcvtzdw(dest->as_register(), src->as_double_reg());
         __ get_fpsr(tmp);
@@ -1731,6 +1733,11 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
     __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond);
 }
 
+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right,
+                              LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
+  ShouldNotReachHere();
+}
+
 void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
 
diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
index 120dd1a7dfa..6a3289022dd 100644
--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
+++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
@@ -277,18 +277,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
   __ store(reg, addr);
 }
 
-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+template<typename T>
+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
   LIR_Opr reg = new_register(T_INT);
   __ load(generate_address(base, disp, T_INT), reg, info);
-  __ cmp(condition, reg, LIR_OprFact::intConst(c));
+  __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt);
 }
 
-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
+// Explicit instantiation for all supported types.
+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+
+template<typename T>
+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
   LIR_Opr reg1 = new_register(T_INT);
   __ load(generate_address(base, disp, type), reg1, info);
-  __ cmp(condition, reg, reg1);
+  __ cmp_branch(condition, reg, reg1, type, tgt);
 }
 
+// Explicit instantiation for all supported types.
+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
 
 bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
 
diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp
new file mode 100644
index 00000000000..2996ef7aa70
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.cpp
@@ -0,0 +1,855 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// Implementation of AddressLiteral
+
+AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
+  _is_lval = false;
+  _target = target;
+  _rspec = rspec_from_rtype(rtype, target);
+}
+
+// Implementation of Address
+
+Address Address::make_array(ArrayAddress adr) {
+  AddressLiteral base = adr.base();
+  Address index = adr.index();
+  assert(index._disp == 0, "must not have disp"); // maybe it can?
+  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
+  array._rspec = base._rspec;
+  return array;
+}
+
+// exceedingly dangerous constructor
+Address::Address(address loc, RelocationHolder spec) {
+  _base  = noreg;
+  _index = noreg;
+  _scale = no_scale;
+  _disp  = (intptr_t) loc;
+  _rspec = spec;
+}
+
+
+int Assembler::is_int_mask(int x) {
+  int xx = x;
+  int count = 0;
+
+  while (x != 0) {
+     x &= (x - 1);
+     count++;
+  }
+
+  if ((1<<count) == (xx+1)) {
+     return count;
+  } else {
+     return -1;
+  }
+}
+
+int Assembler::is_jlong_mask(jlong x) {
+  jlong  xx = x;
+  int count = 0;
+
+  while (x != 0) {
+     x &= (x - 1);
+     count++;
+  }
+
+  if ((1<<count) == (xx+1)) {
+     return count;
+  } else {
+     return -1;
+  }
+}
+
+int AbstractAssembler::code_fill_byte() {
+  return 0x00;                  // illegal instruction 0x00000000
+}
+
+// Now the Assembler instruction (identical for 32/64 bits)
+void Assembler::ld_b(Register rd, Address src) {
+  Register dst   = rd;
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          ldx_b(dst, base, index);
+        } else {
+          add_d(AT, base, index);
+          ld_b(dst, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        ld_b(dst, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      ldx_b(dst, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      ld_b(dst, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      ldx_b(dst, base, AT);
+    }
+  }
+}
+
+void Assembler::ld_bu(Register rd, Address src) {
+  Register dst   = rd;
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          ldx_bu(dst, base, index);
+        } else {
+          add_d(AT, base, index);
+          ld_bu(dst, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        ld_bu(dst, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      ldx_bu(dst, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      ld_bu(dst, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      ldx_bu(dst, base, AT);
+    }
+  }
+}
+
+void Assembler::ld_d(Register rd, Address src){
+  Register dst   = rd;
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          ldx_d(dst, base, index);
+        } else {
+          add_d(AT, base, index);
+          ld_d(dst, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        ld_d(dst, AT, disp);
+      }
+    } else if (is_simm(disp, 16) && !(disp & 3)) {
+      if (scale == 0) {
+        add_d(AT, base, index);
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+      }
+      ldptr_d(dst, AT, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      ldx_d(dst, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      ld_d(dst, base, disp);
+    } else if (is_simm(disp, 16) && !(disp & 3)) {
+      ldptr_d(dst, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      ldx_d(dst, base, AT);
+    }
+  }
+}
+
+void Assembler::ld_h(Register rd, Address src){
+  Register dst   = rd;
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          ldx_h(dst, base, index);
+        } else {
+          add_d(AT, base, index);
+          ld_h(dst, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        ld_h(dst, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      ldx_h(dst, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      ld_h(dst, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      ldx_h(dst, base, AT);
+    }
+  }
+}
+
+void Assembler::ld_hu(Register rd, Address src){
+  Register dst   = rd;
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          ldx_hu(dst, base, index);
+        } else {
+          add_d(AT, base, index);
+          ld_hu(dst, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        ld_hu(dst, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      ldx_hu(dst, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      ld_hu(dst, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      ldx_hu(dst, base, AT);
+    }
+  }
+}
+
+void Assembler::ll_w(Register rd, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  ll_w(rd, src.base(), src.disp());
+}
+
+void Assembler::ll_d(Register rd, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  ll_d(rd, src.base(), src.disp());
+}
+
+void Assembler::ld_w(Register rd, Address src){
+  Register dst   = rd;
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          ldx_w(dst, base, index);
+        } else {
+          add_d(AT, base, index);
+          ld_w(dst, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        ld_w(dst, AT, disp);
+      }
+    } else if (is_simm(disp, 16) && !(disp & 3)) {
+      if (scale == 0) {
+        add_d(AT, base, index);
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+      }
+      ldptr_w(dst, AT, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      ldx_w(dst, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      ld_w(dst, base, disp);
+    } else if (is_simm(disp, 16) && !(disp & 3)) {
+      ldptr_w(dst, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      ldx_w(dst, base, AT);
+    }
+  }
+}
+
+void Assembler::ld_wu(Register rd, Address src){
+  Register dst   = rd;
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          ldx_wu(dst, base, index);
+        } else {
+          add_d(AT, base, index);
+          ld_wu(dst, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        ld_wu(dst, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      ldx_wu(dst, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      ld_wu(dst, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      ldx_wu(dst, base, AT);
+    }
+  }
+}
+
+void Assembler::st_b(Register rd, Address dst) {
+  Register src   = rd;
+  Register base  = dst.base();
+  Register index = dst.index();
+
+  int scale = dst.scale();
+  int disp  = dst.disp();
+
+  if (index != noreg) {
+    assert_different_registers(src, AT);
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          stx_b(src, base, index);
+        } else {
+          add_d(AT, base, index);
+          st_b(src, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        st_b(src, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      stx_b(src, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      st_b(src, base, disp);
+    } else {
+      assert_different_registers(src, AT);
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      stx_b(src, base, AT);
+    }
+  }
+}
+
+void Assembler::sc_w(Register rd, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  sc_w(rd, dst.base(), dst.disp());
+}
+
+void Assembler::sc_d(Register rd, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  sc_d(rd, dst.base(), dst.disp());
+}
+
+void Assembler::st_d(Register rd, Address dst) {
+  Register src   = rd;
+  Register base  = dst.base();
+  Register index = dst.index();
+
+  int scale = dst.scale();
+  int disp  = dst.disp();
+
+  if (index != noreg) {
+    assert_different_registers(src, AT);
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          stx_d(src, base, index);
+        } else {
+          add_d(AT, base, index);
+          st_d(src, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        st_d(src, AT, disp);
+      }
+    } else if (is_simm(disp, 16) && !(disp & 3)) {
+      if (scale == 0) {
+        add_d(AT, base, index);
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+      }
+      stptr_d(src, AT, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      stx_d(src, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      st_d(src, base, disp);
+    } else if (is_simm(disp, 16) && !(disp & 3)) {
+      stptr_d(src, base, disp);
+    } else {
+      assert_different_registers(src, AT);
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      stx_d(src, base, AT);
+    }
+  }
+}
+
+void Assembler::st_h(Register rd, Address dst) {
+  Register src   = rd;
+  Register base  = dst.base();
+  Register index = dst.index();
+
+  int scale = dst.scale();
+  int disp  = dst.disp();
+
+  if (index != noreg) {
+    assert_different_registers(src, AT);
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          stx_h(src, base, index);
+        } else {
+          add_d(AT, base, index);
+          st_h(src, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        st_h(src, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      stx_h(src, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      st_h(src, base, disp);
+    } else {
+      assert_different_registers(src, AT);
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      stx_h(src, base, AT);
+    }
+  }
+}
+
+void Assembler::st_w(Register rd, Address dst) {
+  Register src   = rd;
+  Register base  = dst.base();
+  Register index = dst.index();
+
+  int scale = dst.scale();
+  int disp  = dst.disp();
+
+  if (index != noreg) {
+    assert_different_registers(src, AT);
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          stx_w(src, base, index);
+        } else {
+          add_d(AT, base, index);
+          st_w(src, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        st_w(src, AT, disp);
+      }
+    } else if (is_simm(disp, 16) && !(disp & 3)) {
+      if (scale == 0) {
+        add_d(AT, base, index);
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+      }
+      stptr_w(src, AT, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      stx_w(src, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      st_w(src, base, disp);
+    } else if (is_simm(disp, 16) && !(disp & 3)) {
+      stptr_w(src, base, disp);
+    } else {
+      assert_different_registers(src, AT);
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      stx_w(src, base, AT);
+    }
+  }
+}
+
+void Assembler::fld_s(FloatRegister fd, Address src) {
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          fldx_s(fd, base, index);
+        } else {
+          add_d(AT, base, index);
+          fld_s(fd, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        fld_s(fd, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      fldx_s(fd, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      fld_s(fd, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      fldx_s(fd, base, AT);
+    }
+  }
+}
+
+void Assembler::fld_d(FloatRegister fd, Address src) {
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          fldx_d(fd, base, index);
+        } else {
+          add_d(AT, base, index);
+          fld_d(fd, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        fld_d(fd, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      fldx_d(fd, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      fld_d(fd, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      fldx_d(fd, base, AT);
+    }
+  }
+}
+
+void Assembler::fst_s(FloatRegister fd, Address dst) {
+  Register base  = dst.base();
+  Register index = dst.index();
+
+  int scale = dst.scale();
+  int disp  = dst.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          fstx_s(fd, base, index);
+        } else {
+          add_d(AT, base, index);
+          fst_s(fd, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        fst_s(fd, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      fstx_s(fd, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      fst_s(fd, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      fstx_s(fd, base, AT);
+    }
+  }
+}
+
+void Assembler::fst_d(FloatRegister fd, Address dst) {
+  Register base  = dst.base();
+  Register index = dst.index();
+
+  int scale = dst.scale();
+  int disp  = dst.disp();
+
+  if (index != noreg) {
+    if (is_simm(disp, 12)) {
+      if (scale == 0) {
+        if (disp == 0) {
+          fstx_d(fd, base, index);
+        } else {
+          add_d(AT, base, index);
+          fst_d(fd, AT, disp);
+        }
+      } else {
+        alsl_d(AT, index, base, scale - 1);
+        fst_d(fd, AT, disp);
+      }
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+
+      if (scale == 0) {
+        add_d(AT, AT, index);
+      } else {
+        alsl_d(AT, index, AT, scale - 1);
+      }
+      fstx_d(fd, base, AT);
+    }
+  } else {
+    if (is_simm(disp, 12)) {
+      fst_d(fd, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      fstx_d(fd, base, AT);
+    }
+  }
+}
diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp
new file mode 100644
index 00000000000..46b57cfe761
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.hpp
@@ -0,0 +1,2810 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP
+
+#include "asm/register.hpp"
+
+class BiasedLockingCounters;
+
+
+// Note: A register location is represented via a Register, not
+//       via an address for efficiency & simplicity reasons.
+
+class ArrayAddress;
+
+class Address VALUE_OBJ_CLASS_SPEC {
+ public:
+  enum ScaleFactor {
+    no_scale = -1,
+    times_1  =  0,
+    times_2  =  1,
+    times_4  =  2,
+    times_8  =  3,
+    times_ptr = times_8
+  };
+  static ScaleFactor times(int size) {
+    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
+    if (size == 8)  return times_8;
+    if (size == 4)  return times_4;
+    if (size == 2)  return times_2;
+    return times_1;
+  }
+
+ private:
+  Register         _base;
+  Register         _index;
+  ScaleFactor      _scale;
+  int              _disp;
+  RelocationHolder _rspec;
+
+  // Easily misused constructors make them private
+  Address(address loc, RelocationHolder spec);
+  Address(int disp, address loc, relocInfo::relocType rtype);
+  Address(int disp, address loc, RelocationHolder spec);
+
+ public:
+
+  // creation
+  Address()
+    : _base(noreg),
+      _index(noreg),
+      _scale(no_scale),
+      _disp(0) {
+  }
+
+  // No default displacement otherwise Register can be implicitly
+  // converted to 0(Register) which is quite a different animal.
+
+  Address(Register base, int disp = 0)
+    : _base(base),
+      _index(noreg),
+      _scale(no_scale),
+      _disp(disp) {
+    assert_different_registers(_base, AT);
+  }
+
+  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
+    : _base (base),
+      _index(index),
+      _scale(scale),
+      _disp (disp) {
+    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
+    assert_different_registers(_base, _index, AT);
+  }
+
+  // The following two overloads are used in connection with the
+  // ByteSize type (see sizes.hpp).  They simplify the use of
+  // ByteSize'd arguments in assembly code. Note that their equivalent
+  // for the optimized build are the member functions with int disp
+  // argument since ByteSize is mapped to an int type in that case.
+  //
+  // Note: DO NOT introduce similar overloaded functions for WordSize
+  // arguments as in the optimized mode, both ByteSize and WordSize
+  // are mapped to the same type and thus the compiler cannot make a
+  // distinction anymore (=> compiler errors).
+
+#ifdef ASSERT
+  Address(Register base, ByteSize disp)
+    : _base(base),
+      _index(noreg),
+      _scale(no_scale),
+      _disp(in_bytes(disp)) {
+    assert_different_registers(_base, AT);
+  }
+
+  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
+    : _base(base),
+      _index(index),
+      _scale(scale),
+      _disp(in_bytes(disp)) {
+    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
+    assert_different_registers(_base, _index, AT);
+  }
+#endif // ASSERT
+
+  // accessors
+  bool        uses(Register reg) const { return _base == reg || _index == reg; }
+  Register    base()             const { return _base;  }
+  Register    index()            const { return _index; }
+  ScaleFactor scale()            const { return _scale; }
+  int         disp()             const { return _disp;  }
+
+  static Address make_array(ArrayAddress);
+
+  friend class Assembler;
+  friend class MacroAssembler;
+  friend class LIR_Assembler; // base/index/scale/disp
+};
+
+// Calling convention
+class Argument VALUE_OBJ_CLASS_SPEC {
+ public:
+  enum {
+    n_register_parameters = 8,   // 8 integer registers used to pass parameters
+    n_float_register_parameters = 8   // 8 float registers used to pass parameters
+  };
+};
+
+//
+// AddressLiteral has been split out from Address because operands of this type
+// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
+// the few instructions that need to deal with address literals are unique and the
+// MacroAssembler does not have to implement every instruction in the Assembler
+// in order to search for address literals that may need special handling depending
+// on the instruction and the platform. As small step on the way to merging i486/amd64
+// directories.
+//
+class AddressLiteral VALUE_OBJ_CLASS_SPEC {
+  friend class ArrayAddress;
+  RelocationHolder _rspec;
+  // Typically we use AddressLiterals we want to use their rval
+  // However in some situations we want the lval (effect address) of the item.
+  // We provide a special factory for making those lvals.
+  bool _is_lval;
+
+  // If the target is far we'll need to load the ea of this to
+  // a register to reach it. Otherwise if near we can do rip
+  // relative addressing.
+
+  address          _target;
+
+ protected:
+  // creation
+  AddressLiteral()
+    : _is_lval(false),
+      _target(NULL)
+  {}
+
+  public:
+
+
+  AddressLiteral(address target, relocInfo::relocType rtype);
+
+  AddressLiteral(address target, RelocationHolder const& rspec)
+    : _rspec(rspec),
+      _is_lval(false),
+      _target(target)
+  {}
+   // 32-bit complains about a multiple declaration for int*.
+   AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none)
+     : _target((address) addr),
+       _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral addr() {
+    AddressLiteral ret = *this;
+    ret._is_lval = true;
+    return ret;
+  }
+
+
+ private:
+
+  address target() { return _target; }
+  bool is_lval() { return _is_lval; }
+
+  relocInfo::relocType reloc() const { return _rspec.type(); }
+  const RelocationHolder& rspec() const { return _rspec; }
+
+  friend class Assembler;
+  friend class MacroAssembler;
+  friend class Address;
+  friend class LIR_Assembler;
+  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
+    switch (rtype) {
+      case relocInfo::external_word_type:
+        return external_word_Relocation::spec(addr);
+      case relocInfo::internal_word_type:
+        return internal_word_Relocation::spec(addr);
+      case relocInfo::opt_virtual_call_type:
+        return opt_virtual_call_Relocation::spec();
+      case relocInfo::static_call_type:
+        return static_call_Relocation::spec();
+      case relocInfo::runtime_call_type:
+        return runtime_call_Relocation::spec();
+      case relocInfo::poll_type:
+      case relocInfo::poll_return_type:
+        return Relocation::spec_simple(rtype);
+      case relocInfo::none:
+      case relocInfo::oop_type:
+        // Oops are a special case. Normally they would be their own section
+        // but in cases like icBuffer they are literals in the code stream that
+        // we don't have a section for. We use none so that we get a literal address
+        // which is always patchable.
+        return RelocationHolder();
+      default:
+        ShouldNotReachHere();
+        return RelocationHolder();
+    }
+  }
+
+};
+
+// Convience classes
+class RuntimeAddress: public AddressLiteral {
+
+ public:
+
+  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
+
+};
+
+class OopAddress: public AddressLiteral {
+
+ public:
+
+  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
+
+};
+
+class ExternalAddress: public AddressLiteral {
+
+ public:
+
+  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
+
+};
+
+class InternalAddress: public AddressLiteral {
+
+ public:
+
+  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
+
+};
+
+// x86 can do array addressing as a single operation since disp can be an absolute
+// address amd64 can't. We create a class that expresses the concept but does extra
+// magic on amd64 to get the final result
+
+class ArrayAddress VALUE_OBJ_CLASS_SPEC {
+  private:
+
+  AddressLiteral _base;
+  Address        _index;
+
+  public:
+
+  ArrayAddress() {};
+  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
+  AddressLiteral base() { return _base; }
+  Address index() { return _index; }
+
+};
+
+// The LoongArch Assembler: Pure assembler doing NO optimizations on the instruction
+// level ; i.e., what you write is what you get. The Assembler is generating code into
+// a CodeBuffer.
+
+class Assembler : public AbstractAssembler  {
+  friend class AbstractAssembler; // for the non-virtual hack
+  friend class LIR_Assembler; // as_Address()
+  friend class StubGenerator;
+
+ public:
+  // 22-bit opcode, highest 22 bits: bits[31...10]
+  enum ops22 {
+    clo_w_op           = 0b0000000000000000000100,
+    clz_w_op           = 0b0000000000000000000101,
+    cto_w_op           = 0b0000000000000000000110,
+    ctz_w_op           = 0b0000000000000000000111,
+    clo_d_op           = 0b0000000000000000001000,
+    clz_d_op           = 0b0000000000000000001001,
+    cto_d_op           = 0b0000000000000000001010,
+    ctz_d_op           = 0b0000000000000000001011,
+    revb_2h_op         = 0b0000000000000000001100,
+    revb_4h_op         = 0b0000000000000000001101,
+    revb_2w_op         = 0b0000000000000000001110,
+    revb_d_op          = 0b0000000000000000001111,
+    revh_2w_op         = 0b0000000000000000010000,
+    revh_d_op          = 0b0000000000000000010001,
+    bitrev_4b_op       = 0b0000000000000000010010,
+    bitrev_8b_op       = 0b0000000000000000010011,
+    bitrev_w_op        = 0b0000000000000000010100,
+    bitrev_d_op        = 0b0000000000000000010101,
+    ext_w_h_op         = 0b0000000000000000010110,
+    ext_w_b_op         = 0b0000000000000000010111,
+    rdtimel_w_op       = 0b0000000000000000011000,
+    rdtimeh_w_op       = 0b0000000000000000011001,
+    rdtime_d_op        = 0b0000000000000000011010,
+    cpucfg_op          = 0b0000000000000000011011,
+    fabs_s_op          = 0b0000000100010100000001,
+    fabs_d_op          = 0b0000000100010100000010,
+    fneg_s_op          = 0b0000000100010100000101,
+    fneg_d_op          = 0b0000000100010100000110,
+    flogb_s_op         = 0b0000000100010100001001,
+    flogb_d_op         = 0b0000000100010100001010,
+    fclass_s_op        = 0b0000000100010100001101,
+    fclass_d_op        = 0b0000000100010100001110,
+    fsqrt_s_op         = 0b0000000100010100010001,
+    fsqrt_d_op         = 0b0000000100010100010010,
+    frecip_s_op        = 0b0000000100010100010101,
+    frecip_d_op        = 0b0000000100010100010110,
+    frsqrt_s_op        = 0b0000000100010100011001,
+    frsqrt_d_op        = 0b0000000100010100011010,
+    fmov_s_op          = 0b0000000100010100100101,
+    fmov_d_op          = 0b0000000100010100100110,
+    movgr2fr_w_op      = 0b0000000100010100101001,
+    movgr2fr_d_op      = 0b0000000100010100101010,
+    movgr2frh_w_op     = 0b0000000100010100101011,
+    movfr2gr_s_op      = 0b0000000100010100101101,
+    movfr2gr_d_op      = 0b0000000100010100101110,
+    movfrh2gr_s_op     = 0b0000000100010100101111,
+    movgr2fcsr_op      = 0b0000000100010100110000,
+    movfcsr2gr_op      = 0b0000000100010100110010,
+    movfr2cf_op        = 0b0000000100010100110100,
+    movcf2fr_op        = 0b0000000100010100110101,
+    movgr2cf_op        = 0b0000000100010100110110,
+    movcf2gr_op        = 0b0000000100010100110111,
+    fcvt_s_d_op        = 0b0000000100011001000110,
+    fcvt_d_s_op        = 0b0000000100011001001001,
+    ftintrm_w_s_op     = 0b0000000100011010000001,
+    ftintrm_w_d_op     = 0b0000000100011010000010,
+    ftintrm_l_s_op     = 0b0000000100011010001001,
+    ftintrm_l_d_op     = 0b0000000100011010001010,
+    ftintrp_w_s_op     = 0b0000000100011010010001,
+    ftintrp_w_d_op     = 0b0000000100011010010010,
+    ftintrp_l_s_op     = 0b0000000100011010011001,
+    ftintrp_l_d_op     = 0b0000000100011010011010,
+    ftintrz_w_s_op     = 0b0000000100011010100001,
+    ftintrz_w_d_op     = 0b0000000100011010100010,
+    ftintrz_l_s_op     = 0b0000000100011010101001,
+    ftintrz_l_d_op     = 0b0000000100011010101010,
+    ftintrne_w_s_op    = 0b0000000100011010110001,
+    ftintrne_w_d_op    = 0b0000000100011010110010,
+    ftintrne_l_s_op    = 0b0000000100011010111001,
+    ftintrne_l_d_op    = 0b0000000100011010111010,
+    ftint_w_s_op       = 0b0000000100011011000001,
+    ftint_w_d_op       = 0b0000000100011011000010,
+    ftint_l_s_op       = 0b0000000100011011001001,
+    ftint_l_d_op       = 0b0000000100011011001010,
+    ffint_s_w_op       = 0b0000000100011101000100,
+    ffint_s_l_op       = 0b0000000100011101000110,
+    ffint_d_w_op       = 0b0000000100011101001000,
+    ffint_d_l_op       = 0b0000000100011101001010,
+    frint_s_op         = 0b0000000100011110010001,
+    frint_d_op         = 0b0000000100011110010010,
+    iocsrrd_b_op       = 0b0000011001001000000000,
+    iocsrrd_h_op       = 0b0000011001001000000001,
+    iocsrrd_w_op       = 0b0000011001001000000010,
+    iocsrrd_d_op       = 0b0000011001001000000011,
+    iocsrwr_b_op       = 0b0000011001001000000100,
+    iocsrwr_h_op       = 0b0000011001001000000101,
+    iocsrwr_w_op       = 0b0000011001001000000110,
+    iocsrwr_d_op       = 0b0000011001001000000111,
+    vpcnt_b_op         = 0b0111001010011100001000,
+    vpcnt_h_op         = 0b0111001010011100001001,
+    vpcnt_w_op         = 0b0111001010011100001010,
+    vpcnt_d_op         = 0b0111001010011100001011,
+    vneg_b_op          = 0b0111001010011100001100,
+    vneg_h_op          = 0b0111001010011100001101,
+    vneg_w_op          = 0b0111001010011100001110,
+    vneg_d_op          = 0b0111001010011100001111,
+    vfclass_s_op       = 0b0111001010011100110101,
+    vfclass_d_op       = 0b0111001010011100110110,
+    vfsqrt_s_op        = 0b0111001010011100111001,
+    vfsqrt_d_op        = 0b0111001010011100111010,
+    vfrint_s_op        = 0b0111001010011101001101,
+    vfrint_d_op        = 0b0111001010011101001110,
+    vfrintrm_s_op      = 0b0111001010011101010001,
+    vfrintrm_d_op      = 0b0111001010011101010010,
+    vfrintrp_s_op      = 0b0111001010011101010101,
+    vfrintrp_d_op      = 0b0111001010011101010110,
+    vfrintrz_s_op      = 0b0111001010011101011001,
+    vfrintrz_d_op      = 0b0111001010011101011010,
+    vfrintrne_s_op     = 0b0111001010011101011101,
+    vfrintrne_d_op     = 0b0111001010011101011110,
+    vfcvtl_s_h_op      = 0b0111001010011101111010,
+    vfcvth_s_h_op      = 0b0111001010011101111011,
+    vfcvtl_d_s_op      = 0b0111001010011101111100,
+    vfcvth_d_s_op      = 0b0111001010011101111101,
+    vffint_s_w_op      = 0b0111001010011110000000,
+    vffint_s_wu_op     = 0b0111001010011110000001,
+    vffint_d_l_op      = 0b0111001010011110000010,
+    vffint_d_lu_op     = 0b0111001010011110000011,
+    vffintl_d_w_op     = 0b0111001010011110000100,
+    vffinth_d_w_op     = 0b0111001010011110000101,
+    vftint_w_s_op      = 0b0111001010011110001100,
+    vftint_l_d_op      = 0b0111001010011110001101,
+    vftintrm_w_s_op    = 0b0111001010011110001110,
+    vftintrm_l_d_op    = 0b0111001010011110001111,
+    vftintrp_w_s_op    = 0b0111001010011110010000,
+    vftintrp_l_d_op    = 0b0111001010011110010001,
+    vftintrz_w_s_op    = 0b0111001010011110010010,
+    vftintrz_l_d_op    = 0b0111001010011110010011,
+    vftintrne_w_s_op   = 0b0111001010011110010100,
+    vftintrne_l_d_op   = 0b0111001010011110010101,
+    vftint_wu_s        = 0b0111001010011110010110,
+    vftint_lu_d        = 0b0111001010011110010111,
+    vftintrz_wu_f      = 0b0111001010011110011100,
+    vftintrz_lu_d      = 0b0111001010011110011101,
+    vftintl_l_s_op     = 0b0111001010011110100000,
+    vftinth_l_s_op     = 0b0111001010011110100001,
+    vftintrml_l_s_op   = 0b0111001010011110100010,
+    vftintrmh_l_s_op   = 0b0111001010011110100011,
+    vftintrpl_l_s_op   = 0b0111001010011110100100,
+    vftintrph_l_s_op   = 0b0111001010011110100101,
+    vftintrzl_l_s_op   = 0b0111001010011110100110,
+    vftintrzh_l_s_op   = 0b0111001010011110100111,
+    vftintrnel_l_s_op  = 0b0111001010011110101000,
+    vftintrneh_l_s_op  = 0b0111001010011110101001,
+    vreplgr2vr_b_op    = 0b0111001010011111000000,
+    vreplgr2vr_h_op    = 0b0111001010011111000001,
+    vreplgr2vr_w_op    = 0b0111001010011111000010,
+    vreplgr2vr_d_op    = 0b0111001010011111000011,
+    xvpcnt_b_op        = 0b0111011010011100001000,
+    xvpcnt_h_op        = 0b0111011010011100001001,
+    xvpcnt_w_op        = 0b0111011010011100001010,
+    xvpcnt_d_op        = 0b0111011010011100001011,
+    xvneg_b_op         = 0b0111011010011100001100,
+    xvneg_h_op         = 0b0111011010011100001101,
+    xvneg_w_op         = 0b0111011010011100001110,
+    xvneg_d_op         = 0b0111011010011100001111,
+    xvfclass_s_op      = 0b0111011010011100110101,
+    xvfclass_d_op      = 0b0111011010011100110110,
+    xvfsqrt_s_op       = 0b0111011010011100111001,
+    xvfsqrt_d_op       = 0b0111011010011100111010,
+    xvfrint_s_op       = 0b0111011010011101001101,
+    xvfrint_d_op       = 0b0111011010011101001110,
+    xvfrintrm_s_op     = 0b0111011010011101010001,
+    xvfrintrm_d_op     = 0b0111011010011101010010,
+    xvfrintrp_s_op     = 0b0111011010011101010101,
+    xvfrintrp_d_op     = 0b0111011010011101010110,
+    xvfrintrz_s_op     = 0b0111011010011101011001,
+    xvfrintrz_d_op     = 0b0111011010011101011010,
+    xvfrintrne_s_op    = 0b0111011010011101011101,
+    xvfrintrne_d_op    = 0b0111011010011101011110,
+    xvfcvtl_s_h_op     = 0b0111011010011101111010,
+    xvfcvth_s_h_op     = 0b0111011010011101111011,
+    xvfcvtl_d_s_op     = 0b0111011010011101111100,
+    xvfcvth_d_s_op     = 0b0111011010011101111101,
+    xvffint_s_w_op     = 0b0111011010011110000000,
+    xvffint_s_wu_op    = 0b0111011010011110000001,
+    xvffint_d_l_op     = 0b0111011010011110000010,
+    xvffint_d_lu_op    = 0b0111011010011110000011,
+    xvffintl_d_w_op    = 0b0111011010011110000100,
+    xvffinth_d_w_op    = 0b0111011010011110000101,
+    xvftint_w_s_op     = 0b0111011010011110001100,
+    xvftint_l_d_op     = 0b0111011010011110001101,
+    xvftintrm_w_s_op   = 0b0111011010011110001110,
+    xvftintrm_l_d_op   = 0b0111011010011110001111,
+    xvftintrp_w_s_op   = 0b0111011010011110010000,
+    xvftintrp_l_d_op   = 0b0111011010011110010001,
+    xvftintrz_w_s_op   = 0b0111011010011110010010,
+    xvftintrz_l_d_op   = 0b0111011010011110010011,
+    xvftintrne_w_s_op  = 0b0111011010011110010100,
+    xvftintrne_l_d_op  = 0b0111011010011110010101,
+    xvftint_wu_s       = 0b0111011010011110010110,
+    xvftint_lu_d       = 0b0111011010011110010111,
+    xvftintrz_wu_f     = 0b0111011010011110011100,
+    xvftintrz_lu_d     = 0b0111011010011110011101,
+    xvftintl_l_s_op    = 0b0111011010011110100000,
+    xvftinth_l_s_op    = 0b0111011010011110100001,
+    xvftintrml_l_s_op  = 0b0111011010011110100010,
+    xvftintrmh_l_s_op  = 0b0111011010011110100011,
+    xvftintrpl_l_s_op  = 0b0111011010011110100100,
+    xvftintrph_l_s_op  = 0b0111011010011110100101,
+    xvftintrzl_l_s_op  = 0b0111011010011110100110,
+    xvftintrzh_l_s_op  = 0b0111011010011110100111,
+    xvftintrnel_l_s_op = 0b0111011010011110101000,
+    xvftintrneh_l_s_op = 0b0111011010011110101001,
+    xvreplgr2vr_b_op   = 0b0111011010011111000000,
+    xvreplgr2vr_h_op   = 0b0111011010011111000001,
+    xvreplgr2vr_w_op   = 0b0111011010011111000010,
+    xvreplgr2vr_d_op   = 0b0111011010011111000011,
+    vext2xv_h_b_op     = 0b0111011010011111000100,
+    vext2xv_w_b_op     = 0b0111011010011111000101,
+    vext2xv_d_b_op     = 0b0111011010011111000110,
+    vext2xv_w_h_op     = 0b0111011010011111000111,
+    vext2xv_d_h_op     = 0b0111011010011111001000,
+    vext2xv_d_w_op     = 0b0111011010011111001001,
+    vext2xv_hu_bu_op   = 0b0111011010011111001010,
+    vext2xv_wu_bu_op   = 0b0111011010011111001011,
+    vext2xv_du_bu_op   = 0b0111011010011111001100,
+    vext2xv_wu_hu_op   = 0b0111011010011111001101,
+    vext2xv_du_hu_op   = 0b0111011010011111001110,
+    vext2xv_du_wu_op   = 0b0111011010011111001111,
+    xvreplve0_b_op     = 0b0111011100000111000000,
+    xvreplve0_h_op     = 0b0111011100000111100000,
+    xvreplve0_w_op     = 0b0111011100000111110000,
+    xvreplve0_d_op     = 0b0111011100000111111000,
+    xvreplve0_q_op     = 0b0111011100000111111100,
+
+    unknow_ops22       = 0b1111111111111111111111
+  };
+
+  // 21-bit opcode, highest 21 bits: bits[31...11]
+  enum ops21 {
+    vinsgr2vr_d_op     = 0b011100101110101111110,
+    vpickve2gr_d_op    = 0b011100101110111111110,
+    vpickve2gr_du_op   = 0b011100101111001111110,
+    vreplvei_d_op      = 0b011100101111011111110,
+
+    unknow_ops21       = 0b111111111111111111111
+  };
+
+  // 20-bit opcode, highest 20 bits: bits[31...12]
+  enum ops20 {
+    vinsgr2vr_w_op     = 0b01110010111010111110,
+    vpickve2gr_w_op    = 0b01110010111011111110,
+    vpickve2gr_wu_op   = 0b01110010111100111110,
+    vreplvei_w_op      = 0b01110010111101111110,
+    xvinsgr2vr_d_op    = 0b01110110111010111110,
+    xvpickve2gr_d_op   = 0b01110110111011111110,
+    xvpickve2gr_du_op  = 0b01110110111100111110,
+    xvinsve0_d_op      = 0b01110110111111111110,
+    xvpickve_d_op      = 0b01110111000000111110,
+
+    unknow_ops20       = 0b11111111111111111111
+  };
+
+  // 19-bit opcode, highest 19 bits: bits[31...13]
+  enum ops19 {
+    vrotri_b_op        = 0b0111001010100000001,
+    vinsgr2vr_h_op     = 0b0111001011101011110,
+    vpickve2gr_h_op    = 0b0111001011101111110,
+    vpickve2gr_hu_op   = 0b0111001011110011110,
+    vreplvei_h_op      = 0b0111001011110111110,
+    vbitclri_b_op      = 0b0111001100010000001,
+    vbitseti_b_op      = 0b0111001100010100001,
+    vbitrevi_b_op      = 0b0111001100011000001,
+    vslli_b_op         = 0b0111001100101100001,
+    vsrli_b_op         = 0b0111001100110000001,
+    vsrai_b_op         = 0b0111001100110100001,
+    xvrotri_b_op       = 0b0111011010100000001,
+    xvinsgr2vr_w_op    = 0b0111011011101011110,
+    xvpickve2gr_w_op   = 0b0111011011101111110,
+    xvpickve2gr_wu_op  = 0b0111011011110011110,
+    xvinsve0_w_op      = 0b0111011011111111110,
+    xvpickve_w_op      = 0b0111011100000011110,
+    xvbitclri_b_op     = 0b0111011100010000001,
+    xvbitseti_b_op     = 0b0111011100010100001,
+    xvbitrevi_b_op     = 0b0111011100011000001,
+    xvslli_b_op        = 0b0111011100101100001,
+    xvsrli_b_op        = 0b0111011100110000001,
+    xvsrai_b_op        = 0b0111011100110100001,
+
+    unknow_ops19       = 0b1111111111111111111
+  };
+
+  // 18-bit opcode, highest 18 bits: bits[31...14]
+  enum ops18 {
+    vrotri_h_op        = 0b011100101010000001,
+    vinsgr2vr_b_op     = 0b011100101110101110,
+    vpickve2gr_b_op    = 0b011100101110111110,
+    vpickve2gr_bu_op   = 0b011100101111001110,
+    vreplvei_b_op      = 0b011100101111011110,
+    vbitclri_h_op      = 0b011100110001000001,
+    vbitseti_h_op      = 0b011100110001010001,
+    vbitrevi_h_op      = 0b011100110001100001,
+    vslli_h_op         = 0b011100110010110001,
+    vsrli_h_op         = 0b011100110011000001,
+    vsrai_h_op         = 0b011100110011010001,
+    vsrlni_b_h_op      = 0b011100110100000001,
+    xvrotri_h_op       = 0b011101101010000001,
+    xvbitclri_h_op     = 0b011101110001000001,
+    xvbitseti_h_op     = 0b011101110001010001,
+    xvbitrevi_h_op     = 0b011101110001100001,
+    xvslli_h_op        = 0b011101110010110001,
+    xvsrli_h_op        = 0b011101110011000001,
+    xvsrai_h_op        = 0b011101110011010001,
+
+    unknow_ops18       = 0b111111111111111111
+  };
+
+  // 17-bit opcode, highest 17 bits: bits[31...15]
+  enum ops17 {
+    asrtle_d_op        = 0b00000000000000010,
+    asrtgt_d_op        = 0b00000000000000011,
+    add_w_op           = 0b00000000000100000,
+    add_d_op           = 0b00000000000100001,
+    sub_w_op           = 0b00000000000100010,
+    sub_d_op           = 0b00000000000100011,
+    slt_op             = 0b00000000000100100,
+    sltu_op            = 0b00000000000100101,
+    maskeqz_op         = 0b00000000000100110,
+    masknez_op         = 0b00000000000100111,
+    nor_op             = 0b00000000000101000,
+    and_op             = 0b00000000000101001,
+    or_op              = 0b00000000000101010,
+    xor_op             = 0b00000000000101011,
+    orn_op             = 0b00000000000101100,
+    andn_op            = 0b00000000000101101,
+    sll_w_op           = 0b00000000000101110,
+    srl_w_op           = 0b00000000000101111,
+    sra_w_op           = 0b00000000000110000,
+    sll_d_op           = 0b00000000000110001,
+    srl_d_op           = 0b00000000000110010,
+    sra_d_op           = 0b00000000000110011,
+    rotr_w_op          = 0b00000000000110110,
+    rotr_d_op          = 0b00000000000110111,
+    mul_w_op           = 0b00000000000111000,
+    mulh_w_op          = 0b00000000000111001,
+    mulh_wu_op         = 0b00000000000111010,
+    mul_d_op           = 0b00000000000111011,
+    mulh_d_op          = 0b00000000000111100,
+    mulh_du_op         = 0b00000000000111101,
+    mulw_d_w_op        = 0b00000000000111110,
+    mulw_d_wu_op       = 0b00000000000111111,
+    div_w_op           = 0b00000000001000000,
+    mod_w_op           = 0b00000000001000001,
+    div_wu_op          = 0b00000000001000010,
+    mod_wu_op          = 0b00000000001000011,
+    div_d_op           = 0b00000000001000100,
+    mod_d_op           = 0b00000000001000101,
+    div_du_op          = 0b00000000001000110,
+    mod_du_op          = 0b00000000001000111,
+    crc_w_b_w_op       = 0b00000000001001000,
+    crc_w_h_w_op       = 0b00000000001001001,
+    crc_w_w_w_op       = 0b00000000001001010,
+    crc_w_d_w_op       = 0b00000000001001011,
+    crcc_w_b_w_op      = 0b00000000001001100,
+    crcc_w_h_w_op      = 0b00000000001001101,
+    crcc_w_w_w_op      = 0b00000000001001110,
+    crcc_w_d_w_op      = 0b00000000001001111,
+    break_op           = 0b00000000001010100,
+    fadd_s_op          = 0b00000001000000001,
+    fadd_d_op          = 0b00000001000000010,
+    fsub_s_op          = 0b00000001000000101,
+    fsub_d_op          = 0b00000001000000110,
+    fmul_s_op          = 0b00000001000001001,
+    fmul_d_op          = 0b00000001000001010,
+    fdiv_s_op          = 0b00000001000001101,
+    fdiv_d_op          = 0b00000001000001110,
+    fmax_s_op          = 0b00000001000010001,
+    fmax_d_op          = 0b00000001000010010,
+    fmin_s_op          = 0b00000001000010101,
+    fmin_d_op          = 0b00000001000010110,
+    fmaxa_s_op         = 0b00000001000011001,
+    fmaxa_d_op         = 0b00000001000011010,
+    fmina_s_op         = 0b00000001000011101,
+    fmina_d_op         = 0b00000001000011110,
+    fscaleb_s_op       = 0b00000001000100001,
+    fscaleb_d_op       = 0b00000001000100010,
+    fcopysign_s_op     = 0b00000001000100101,
+    fcopysign_d_op     = 0b00000001000100110,
+    ldx_b_op           = 0b00111000000000000,
+    ldx_h_op           = 0b00111000000001000,
+    ldx_w_op           = 0b00111000000010000,
+    ldx_d_op           = 0b00111000000011000,
+    stx_b_op           = 0b00111000000100000,
+    stx_h_op           = 0b00111000000101000,
+    stx_w_op           = 0b00111000000110000,
+    stx_d_op           = 0b00111000000111000,
+    ldx_bu_op          = 0b00111000001000000,
+    ldx_hu_op          = 0b00111000001001000,
+    ldx_wu_op          = 0b00111000001010000,
+    fldx_s_op          = 0b00111000001100000,
+    fldx_d_op          = 0b00111000001101000,
+    fstx_s_op          = 0b00111000001110000,
+    fstx_d_op          = 0b00111000001111000,
+    vldx_op            = 0b00111000010000000,
+    vstx_op            = 0b00111000010001000,
+    xvldx_op           = 0b00111000010010000,
+    xvstx_op           = 0b00111000010011000,
+    amswap_w_op        = 0b00111000011000000,
+    amswap_d_op        = 0b00111000011000001,
+    amadd_w_op         = 0b00111000011000010,
+    amadd_d_op         = 0b00111000011000011,
+    amand_w_op         = 0b00111000011000100,
+    amand_d_op         = 0b00111000011000101,
+    amor_w_op          = 0b00111000011000110,
+    amor_d_op          = 0b00111000011000111,
+    amxor_w_op         = 0b00111000011001000,
+    amxor_d_op         = 0b00111000011001001,
+    ammax_w_op         = 0b00111000011001010,
+    ammax_d_op         = 0b00111000011001011,
+    ammin_w_op         = 0b00111000011001100,
+    ammin_d_op         = 0b00111000011001101,
+    ammax_wu_op        = 0b00111000011001110,
+    ammax_du_op        = 0b00111000011001111,
+    ammin_wu_op        = 0b00111000011010000,
+    ammin_du_op        = 0b00111000011010001,
+    amswap_db_w_op     = 0b00111000011010010,
+    amswap_db_d_op     = 0b00111000011010011,
+    amadd_db_w_op      = 0b00111000011010100,
+    amadd_db_d_op      = 0b00111000011010101,
+    amand_db_w_op      = 0b00111000011010110,
+    amand_db_d_op      = 0b00111000011010111,
+    amor_db_w_op       = 0b00111000011011000,
+    amor_db_d_op       = 0b00111000011011001,
+    amxor_db_w_op      = 0b00111000011011010,
+    amxor_db_d_op      = 0b00111000011011011,
+    ammax_db_w_op      = 0b00111000011011100,
+    ammax_db_d_op      = 0b00111000011011101,
+    ammin_db_w_op      = 0b00111000011011110,
+    ammin_db_d_op      = 0b00111000011011111,
+    ammax_db_wu_op     = 0b00111000011100000,
+    ammax_db_du_op     = 0b00111000011100001,
+    ammin_db_wu_op     = 0b00111000011100010,
+    ammin_db_du_op     = 0b00111000011100011,
+    dbar_op            = 0b00111000011100100,
+    ibar_op            = 0b00111000011100101,
+    fldgt_s_op         = 0b00111000011101000,
+    fldgt_d_op         = 0b00111000011101001,
+    fldle_s_op         = 0b00111000011101010,
+    fldle_d_op         = 0b00111000011101011,
+    fstgt_s_op         = 0b00111000011101100,
+    fstgt_d_op         = 0b00111000011101101,
+    fstle_s_op         = 0b00111000011101110,
+    fstle_d_op         = 0b00111000011101111,
+    ldgt_b_op          = 0b00111000011110000,
+    ldgt_h_op          = 0b00111000011110001,
+    ldgt_w_op          = 0b00111000011110010,
+    ldgt_d_op          = 0b00111000011110011,
+    ldle_b_op          = 0b00111000011110100,
+    ldle_h_op          = 0b00111000011110101,
+    ldle_w_op          = 0b00111000011110110,
+    ldle_d_op          = 0b00111000011110111,
+    stgt_b_op          = 0b00111000011111000,
+    stgt_h_op          = 0b00111000011111001,
+    stgt_w_op          = 0b00111000011111010,
+    stgt_d_op          = 0b00111000011111011,
+    stle_b_op          = 0b00111000011111100,
+    stle_h_op          = 0b00111000011111101,
+    stle_w_op          = 0b00111000011111110,
+    stle_d_op          = 0b00111000011111111,
+    vseq_b_op          = 0b01110000000000000,
+    vseq_h_op          = 0b01110000000000001,
+    vseq_w_op          = 0b01110000000000010,
+    vseq_d_op          = 0b01110000000000011,
+    vsle_b_op          = 0b01110000000000100,
+    vsle_h_op          = 0b01110000000000101,
+    vsle_w_op          = 0b01110000000000110,
+    vsle_d_op          = 0b01110000000000111,
+    vsle_bu_op         = 0b01110000000001000,
+    vsle_hu_op         = 0b01110000000001001,
+    vsle_wu_op         = 0b01110000000001010,
+    vsle_du_op         = 0b01110000000001011,
+    vslt_b_op          = 0b01110000000001100,
+    vslt_h_op          = 0b01110000000001101,
+    vslt_w_op          = 0b01110000000001110,
+    vslt_d_op          = 0b01110000000001111,
+    vslt_bu_op         = 0b01110000000010000,
+    vslt_hu_op         = 0b01110000000010001,
+    vslt_wu_op         = 0b01110000000010010,
+    vslt_du_op         = 0b01110000000010011,
+    vadd_b_op          = 0b01110000000010100,
+    vadd_h_op          = 0b01110000000010101,
+    vadd_w_op          = 0b01110000000010110,
+    vadd_d_op          = 0b01110000000010111,
+    vsub_b_op          = 0b01110000000011000,
+    vsub_h_op          = 0b01110000000011001,
+    vsub_w_op          = 0b01110000000011010,
+    vsub_d_op          = 0b01110000000011011,
+    vabsd_b_op         = 0b01110000011000000,
+    vabsd_h_op         = 0b01110000011000001,
+    vabsd_w_op         = 0b01110000011000010,
+    vabsd_d_op         = 0b01110000011000011,
+    vmax_b_op          = 0b01110000011100000,
+    vmax_h_op          = 0b01110000011100001,
+    vmax_w_op          = 0b01110000011100010,
+    vmax_d_op          = 0b01110000011100011,
+    vmin_b_op          = 0b01110000011100100,
+    vmin_h_op          = 0b01110000011100101,
+    vmin_w_op          = 0b01110000011100110,
+    vmin_d_op          = 0b01110000011100111,
+    vmul_b_op          = 0b01110000100001000,
+    vmul_h_op          = 0b01110000100001001,
+    vmul_w_op          = 0b01110000100001010,
+    vmul_d_op          = 0b01110000100001011,
+    vmuh_b_op          = 0b01110000100001100,
+    vmuh_h_op          = 0b01110000100001101,
+    vmuh_w_op          = 0b01110000100001110,
+    vmuh_d_op          = 0b01110000100001111,
+    vmuh_bu_op         = 0b01110000100010000,
+    vmuh_hu_op         = 0b01110000100010001,
+    vmuh_wu_op         = 0b01110000100010010,
+    vmuh_du_op         = 0b01110000100010011,
+    vmulwev_h_b_op     = 0b01110000100100000,
+    vmulwev_w_h_op     = 0b01110000100100001,
+    vmulwev_d_w_op     = 0b01110000100100010,
+    vmulwev_q_d_op     = 0b01110000100100011,
+    vmulwod_h_b_op     = 0b01110000100100100,
+    vmulwod_w_h_op     = 0b01110000100100101,
+    vmulwod_d_w_op     = 0b01110000100100110,
+    vmulwod_q_d_op     = 0b01110000100100111,
+    vmadd_b_op         = 0b01110000101010000,
+    vmadd_h_op         = 0b01110000101010001,
+    vmadd_w_op         = 0b01110000101010010,
+    vmadd_d_op         = 0b01110000101010011,
+    vmsub_b_op         = 0b01110000101010100,
+    vmsub_h_op         = 0b01110000101010101,
+    vmsub_w_op         = 0b01110000101010110,
+    vmsub_d_op         = 0b01110000101010111,
+    vsll_b_op          = 0b01110000111010000,
+    vsll_h_op          = 0b01110000111010001,
+    vsll_w_op          = 0b01110000111010010,
+    vsll_d_op          = 0b01110000111010011,
+    vsrl_b_op          = 0b01110000111010100,
+    vsrl_h_op          = 0b01110000111010101,
+    vsrl_w_op          = 0b01110000111010110,
+    vsrl_d_op          = 0b01110000111010111,
+    vsra_b_op          = 0b01110000111011000,
+    vsra_h_op          = 0b01110000111011001,
+    vsra_w_op          = 0b01110000111011010,
+    vsra_d_op          = 0b01110000111011011,
+    vrotr_b_op         = 0b01110000111011100,
+    vrotr_h_op         = 0b01110000111011101,
+    vrotr_w_op         = 0b01110000111011110,
+    vrotr_d_op         = 0b01110000111011111,
+    vbitclr_b_op       = 0b01110001000011000,
+    vbitclr_h_op       = 0b01110001000011001,
+    vbitclr_w_op       = 0b01110001000011010,
+    vbitclr_d_op       = 0b01110001000011011,
+    vbitset_b_op       = 0b01110001000011100,
+    vbitset_h_op       = 0b01110001000011101,
+    vbitset_w_op       = 0b01110001000011110,
+    vbitset_d_op       = 0b01110001000011111,
+    vbitrev_b_op       = 0b01110001000100000,
+    vbitrev_h_op       = 0b01110001000100001,
+    vbitrev_w_op       = 0b01110001000100010,
+    vbitrev_d_op       = 0b01110001000100011,
+    vand_v_op          = 0b01110001001001100,
+    vor_v_op           = 0b01110001001001101,
+    vxor_v_op          = 0b01110001001001110,
+    vnor_v_op          = 0b01110001001001111,
+    vandn_v_op         = 0b01110001001010000,
+    vorn_v_op          = 0b01110001001010001,
+    vadd_q_op          = 0b01110001001011010,
+    vsub_q_op          = 0b01110001001011011,
+    vfadd_s_op         = 0b01110001001100001,
+    vfadd_d_op         = 0b01110001001100010,
+    vfsub_s_op         = 0b01110001001100101,
+    vfsub_d_op         = 0b01110001001100110,
+    vfmul_s_op         = 0b01110001001110001,
+    vfmul_d_op         = 0b01110001001110010,
+    vfdiv_s_op         = 0b01110001001110101,
+    vfdiv_d_op         = 0b01110001001110110,
+    vfmax_s_op         = 0b01110001001111001,
+    vfmax_d_op         = 0b01110001001111010,
+    vfmin_s_op         = 0b01110001001111101,
+    vfmin_d_op         = 0b01110001001111110,
+    vfcvt_h_s_op       = 0b01110001010001100,
+    vfcvt_s_d_op       = 0b01110001010001101,
+    vffint_s_l_op      = 0b01110001010010000,
+    vftint_w_d_op      = 0b01110001010010011,
+    vftintrm_w_d_op    = 0b01110001010010100,
+    vftintrp_w_d_op    = 0b01110001010010101,
+    vftintrz_w_d_op    = 0b01110001010010110,
+    vftintrne_w_d_op   = 0b01110001010010111,
+    vshuf_h_op         = 0b01110001011110101,
+    vshuf_w_op         = 0b01110001011110110,
+    vshuf_d_op         = 0b01110001011110111,
+    vslti_bu_op        = 0b01110010100010000,
+    vslti_hu_op        = 0b01110010100010001,
+    vslti_wu_op        = 0b01110010100010010,
+    vslti_du_op        = 0b01110010100010011,
+    vaddi_bu_op        = 0b01110010100010100,
+    vaddi_hu_op        = 0b01110010100010101,
+    vaddi_wu_op        = 0b01110010100010110,
+    vaddi_du_op        = 0b01110010100010111,
+    vsubi_bu_op        = 0b01110010100011000,
+    vsubi_hu_op        = 0b01110010100011001,
+    vsubi_wu_op        = 0b01110010100011010,
+    vsubi_du_op        = 0b01110010100011011,
+    vrotri_w_op        = 0b01110010101000001,
+    vbitclri_w_op      = 0b01110011000100001,
+    vbitseti_w_op      = 0b01110011000101001,
+    vbitrevi_w_op      = 0b01110011000110001,
+    vslli_w_op         = 0b01110011001011001,
+    vsrli_w_op         = 0b01110011001100001,
+    vsrai_w_op         = 0b01110011001101001,
+    vsrlni_h_w_op      = 0b01110011010000001,
+    xvseq_b_op         = 0b01110100000000000,
+    xvseq_h_op         = 0b01110100000000001,
+    xvseq_w_op         = 0b01110100000000010,
+    xvseq_d_op         = 0b01110100000000011,
+    xvsle_b_op         = 0b01110100000000100,
+    xvsle_h_op         = 0b01110100000000101,
+    xvsle_w_op         = 0b01110100000000110,
+    xvsle_d_op         = 0b01110100000000111,
+    xvsle_bu_op        = 0b01110100000001000,
+    xvsle_hu_op        = 0b01110100000001001,
+    xvsle_wu_op        = 0b01110100000001010,
+    xvsle_du_op        = 0b01110100000001011,
+    xvslt_b_op         = 0b01110100000001100,
+    xvslt_h_op         = 0b01110100000001101,
+    xvslt_w_op         = 0b01110100000001110,
+    xvslt_d_op         = 0b01110100000001111,
+    xvslt_bu_op        = 0b01110100000010000,
+    xvslt_hu_op        = 0b01110100000010001,
+    xvslt_wu_op        = 0b01110100000010010,
+    xvslt_du_op        = 0b01110100000010011,
+    xvadd_b_op         = 0b01110100000010100,
+    xvadd_h_op         = 0b01110100000010101,
+    xvadd_w_op         = 0b01110100000010110,
+    xvadd_d_op         = 0b01110100000010111,
+    xvsub_b_op         = 0b01110100000011000,
+    xvsub_h_op         = 0b01110100000011001,
+    xvsub_w_op         = 0b01110100000011010,
+    xvsub_d_op         = 0b01110100000011011,
+    xvabsd_b_op        = 0b01110100011000000,
+    xvabsd_h_op        = 0b01110100011000001,
+    xvabsd_w_op        = 0b01110100011000010,
+    xvabsd_d_op        = 0b01110100011000011,
+    xvmax_b_op         = 0b01110100011100000,
+    xvmax_h_op         = 0b01110100011100001,
+    xvmax_w_op         = 0b01110100011100010,
+    xvmax_d_op         = 0b01110100011100011,
+    xvmin_b_op         = 0b01110100011100100,
+    xvmin_h_op         = 0b01110100011100101,
+    xvmin_w_op         = 0b01110100011100110,
+    xvmin_d_op         = 0b01110100011100111,
+    xvmul_b_op         = 0b01110100100001000,
+    xvmul_h_op         = 0b01110100100001001,
+    xvmul_w_op         = 0b01110100100001010,
+    xvmul_d_op         = 0b01110100100001011,
+    xvmuh_b_op         = 0b01110100100001100,
+    xvmuh_h_op         = 0b01110100100001101,
+    xvmuh_w_op         = 0b01110100100001110,
+    xvmuh_d_op         = 0b01110100100001111,
+    xvmuh_bu_op        = 0b01110100100010000,
+    xvmuh_hu_op        = 0b01110100100010001,
+    xvmuh_wu_op        = 0b01110100100010010,
+    xvmuh_du_op        = 0b01110100100010011,
+    xvmulwev_h_b_op    = 0b01110100100100000,
+    xvmulwev_w_h_op    = 0b01110100100100001,
+    xvmulwev_d_w_op    = 0b01110100100100010,
+    xvmulwev_q_d_op    = 0b01110100100100011,
+    xvmulwod_h_b_op    = 0b01110100100100100,
+    xvmulwod_w_h_op    = 0b01110100100100101,
+    xvmulwod_d_w_op    = 0b01110100100100110,
+    xvmulwod_q_d_op    = 0b01110100100100111,
+    xvmadd_b_op        = 0b01110100101010000,
+    xvmadd_h_op        = 0b01110100101010001,
+    xvmadd_w_op        = 0b01110100101010010,
+    xvmadd_d_op        = 0b01110100101010011,
+    xvmsub_b_op        = 0b01110100101010100,
+    xvmsub_h_op        = 0b01110100101010101,
+    xvmsub_w_op        = 0b01110100101010110,
+    xvmsub_d_op        = 0b01110100101010111,
+    xvsll_b_op         = 0b01110100111010000,
+    xvsll_h_op         = 0b01110100111010001,
+    xvsll_w_op         = 0b01110100111010010,
+    xvsll_d_op         = 0b01110100111010011,
+    xvsrl_b_op         = 0b01110100111010100,
+    xvsrl_h_op         = 0b01110100111010101,
+    xvsrl_w_op         = 0b01110100111010110,
+    xvsrl_d_op         = 0b01110100111010111,
+    xvsra_b_op         = 0b01110100111011000,
+    xvsra_h_op         = 0b01110100111011001,
+    xvsra_w_op         = 0b01110100111011010,
+    xvsra_d_op         = 0b01110100111011011,
+    xvrotr_b_op        = 0b01110100111011100,
+    xvrotr_h_op        = 0b01110100111011101,
+    xvrotr_w_op        = 0b01110100111011110,
+    xvrotr_d_op        = 0b01110100111011111,
+    xvbitclr_b_op      = 0b01110101000011000,
+    xvbitclr_h_op      = 0b01110101000011001,
+    xvbitclr_w_op      = 0b01110101000011010,
+    xvbitclr_d_op      = 0b01110101000011011,
+    xvbitset_b_op      = 0b01110101000011100,
+    xvbitset_h_op      = 0b01110101000011101,
+    xvbitset_w_op      = 0b01110101000011110,
+    xvbitset_d_op      = 0b01110101000011111,
+    xvbitrev_b_op      = 0b01110101000100000,
+    xvbitrev_h_op      = 0b01110101000100001,
+    xvbitrev_w_op      = 0b01110101000100010,
+    xvbitrev_d_op      = 0b01110101000100011,
+    xvand_v_op         = 0b01110101001001100,
+    xvor_v_op          = 0b01110101001001101,
+    xvxor_v_op         = 0b01110101001001110,
+    xvnor_v_op         = 0b01110101001001111,
+    xvandn_v_op        = 0b01110101001010000,
+    xvorn_v_op         = 0b01110101001010001,
+    xvadd_q_op         = 0b01110101001011010,
+    xvsub_q_op         = 0b01110101001011011,
+    xvfadd_s_op        = 0b01110101001100001,
+    xvfadd_d_op        = 0b01110101001100010,
+    xvfsub_s_op        = 0b01110101001100101,
+    xvfsub_d_op        = 0b01110101001100110,
+    xvfmul_s_op        = 0b01110101001110001,
+    xvfmul_d_op        = 0b01110101001110010,
+    xvfdiv_s_op        = 0b01110101001110101,
+    xvfdiv_d_op        = 0b01110101001110110,
+    xvfmax_s_op        = 0b01110101001111001,
+    xvfmax_d_op        = 0b01110101001111010,
+    xvfmin_s_op        = 0b01110101001111101,
+    xvfmin_d_op        = 0b01110101001111110,
+    xvfcvt_h_s_op      = 0b01110101010001100,
+    xvfcvt_s_d_op      = 0b01110101010001101,
+    xvffint_s_l_op     = 0b01110101010010000,
+    xvftint_w_d_op     = 0b01110101010010011,
+    xvftintrm_w_d_op   = 0b01110101010010100,
+    xvftintrp_w_d_op   = 0b01110101010010101,
+    xvftintrz_w_d_op   = 0b01110101010010110,
+    xvftintrne_w_d_op  = 0b01110101010010111,
+    xvshuf_h_op        = 0b01110101011110101,
+    xvshuf_w_op        = 0b01110101011110110,
+    xvshuf_d_op        = 0b01110101011110111,
+    xvperm_w_op        = 0b01110101011111010,
+    xvslti_bu_op       = 0b01110110100010000,
+    xvslti_hu_op       = 0b01110110100010001,
+    xvslti_wu_op       = 0b01110110100010010,
+    xvslti_du_op       = 0b01110110100010011,
+    xvaddi_bu_op       = 0b01110110100010100,
+    xvaddi_hu_op       = 0b01110110100010101,
+    xvaddi_wu_op       = 0b01110110100010110,
+    xvaddi_du_op       = 0b01110110100010111,
+    xvsubi_bu_op       = 0b01110110100011000,
+    xvsubi_hu_op       = 0b01110110100011001,
+    xvsubi_wu_op       = 0b01110110100011010,
+    xvsubi_du_op       = 0b01110110100011011,
+    xvrotri_w_op       = 0b01110110101000001,
+    xvbitclri_w_op     = 0b01110111000100001,
+    xvbitseti_w_op     = 0b01110111000101001,
+    xvbitrevi_w_op     = 0b01110111000110001,
+    xvslli_w_op        = 0b01110111001011001,
+    xvsrli_w_op        = 0b01110111001100001,
+    xvsrai_w_op        = 0b01110111001101001,
+
+    unknow_ops17       = 0b11111111111111111
+  };
+
+  // 16-bit opcode, highest 16 bits: bits[31...16]
+  enum ops16 {
+    vrotri_d_op        = 0b0111001010100001,
+    vbitclri_d_op      = 0b0111001100010001,
+    vbitseti_d_op      = 0b0111001100010101,
+    vbitrevi_d_op      = 0b0111001100011001,
+    vslli_d_op         = 0b0111001100101101,
+    vsrli_d_op         = 0b0111001100110001,
+    vsrai_d_op         = 0b0111001100110101,
+    vsrlni_w_d_op      = 0b0111001101000001,
+    xvrotri_d_op       = 0b0111011010100001,
+    xvbitclri_d_op     = 0b0111011100010001,
+    xvbitseti_d_op     = 0b0111011100010101,
+    xvbitrevi_d_op     = 0b0111011100011001,
+    xvslli_d_op        = 0b0111011100101101,
+    xvsrli_d_op        = 0b0111011100110001,
+    xvsrai_d_op        = 0b0111011100110101,
+
+    unknow_ops16       = 0b1111111111111111
+  };
+
+  // 15-bit opcode, highest 15 bits: bits[31...17]
+  enum ops15 {
+    vsrlni_d_q_op      = 0b011100110100001,
+
+    unknow_ops15       = 0b111111111111111
+  };
+
+  // 14-bit opcode, highest 14 bits: bits[31...18]
+  enum ops14 {
+    alsl_w_op          = 0b00000000000001,
+    bytepick_w_op      = 0b00000000000010,
+    bytepick_d_op      = 0b00000000000011,
+    alsl_d_op          = 0b00000000001011,
+    slli_op            = 0b00000000010000,
+    srli_op            = 0b00000000010001,
+    srai_op            = 0b00000000010010,
+    rotri_op           = 0b00000000010011,
+    lddir_op           = 0b00000110010000,
+    ldpte_op           = 0b00000110010001,
+    vshuf4i_b_op       = 0b01110011100100,
+    vshuf4i_h_op       = 0b01110011100101,
+    vshuf4i_w_op       = 0b01110011100110,
+    vshuf4i_d_op       = 0b01110011100111,
+    vandi_b_op         = 0b01110011110100,
+    vori_b_op          = 0b01110011110101,
+    vxori_b_op         = 0b01110011110110,
+    vnori_b_op         = 0b01110011110111,
+    vldi_op            = 0b01110011111000,
+    vpermi_w_op        = 0b01110011111001,
+    xvshuf4i_b_op      = 0b01110111100100,
+    xvshuf4i_h_op      = 0b01110111100101,
+    xvshuf4i_w_op      = 0b01110111100110,
+    xvshuf4i_d_op      = 0b01110111100111,
+    xvandi_b_op        = 0b01110111110100,
+    xvori_b_op         = 0b01110111110101,
+    xvxori_b_op        = 0b01110111110110,
+    xvnori_b_op        = 0b01110111110111,
+    xvldi_op           = 0b01110111111000,
+    xvpermi_w_op       = 0b01110111111001,
+    xvpermi_d_op       = 0b01110111111010,
+    xvpermi_q_op       = 0b01110111111011,
+
+    unknow_ops14       = 0b11111111111111
+  };
+
+  // 12-bit opcode, highest 12 bits: bits[31...20]
+  enum ops12 {
+    fmadd_s_op         = 0b000010000001,
+    fmadd_d_op         = 0b000010000010,
+    fmsub_s_op         = 0b000010000101,
+    fmsub_d_op         = 0b000010000110,
+    fnmadd_s_op        = 0b000010001001,
+    fnmadd_d_op        = 0b000010001010,
+    fnmsub_s_op        = 0b000010001101,
+    fnmsub_d_op        = 0b000010001110,
+    vfmadd_s_op        = 0b000010010001,
+    vfmadd_d_op        = 0b000010010010,
+    vfmsub_s_op        = 0b000010010101,
+    vfmsub_d_op        = 0b000010010110,
+    vfnmadd_s_op       = 0b000010011001,
+    vfnmadd_d_op       = 0b000010011010,
+    vfnmsub_s_op       = 0b000010011101,
+    vfnmsub_d_op       = 0b000010011110,
+    xvfmadd_s_op       = 0b000010100001,
+    xvfmadd_d_op       = 0b000010100010,
+    xvfmsub_s_op       = 0b000010100101,
+    xvfmsub_d_op       = 0b000010100110,
+    xvfnmadd_s_op      = 0b000010101001,
+    xvfnmadd_d_op      = 0b000010101010,
+    xvfnmsub_s_op      = 0b000010101101,
+    xvfnmsub_d_op      = 0b000010101110,
+    fcmp_cond_s_op     = 0b000011000001,
+    fcmp_cond_d_op     = 0b000011000010,
+    vfcmp_cond_s_op    = 0b000011000101,
+    vfcmp_cond_d_op    = 0b000011000110,
+    xvfcmp_cond_s_op   = 0b000011001001,
+    xvfcmp_cond_d_op   = 0b000011001010,
+    fsel_op            = 0b000011010000,
+    vbitsel_v_op       = 0b000011010001,
+    xvbitsel_v_op      = 0b000011010010,
+    vshuf_b_op         = 0b000011010101,
+    xvshuf_b_op        = 0b000011010110,
+
+    unknow_ops12       = 0b111111111111
+  };
+
+  // 10-bit opcode, highest 10 bits: bits[31...22]
+  enum ops10 {
+    bstr_w_op          = 0b0000000001,
+    bstrins_d_op       = 0b0000000010,
+    bstrpick_d_op      = 0b0000000011,
+    slti_op            = 0b0000001000,
+    sltui_op           = 0b0000001001,
+    addi_w_op          = 0b0000001010,
+    addi_d_op          = 0b0000001011,
+    lu52i_d_op         = 0b0000001100,
+    andi_op            = 0b0000001101,
+    ori_op             = 0b0000001110,
+    xori_op            = 0b0000001111,
+    ld_b_op            = 0b0010100000,
+    ld_h_op            = 0b0010100001,
+    ld_w_op            = 0b0010100010,
+    ld_d_op            = 0b0010100011,
+    st_b_op            = 0b0010100100,
+    st_h_op            = 0b0010100101,
+    st_w_op            = 0b0010100110,
+    st_d_op            = 0b0010100111,
+    ld_bu_op           = 0b0010101000,
+    ld_hu_op           = 0b0010101001,
+    ld_wu_op           = 0b0010101010,
+    preld_op           = 0b0010101011,
+    fld_s_op           = 0b0010101100,
+    fst_s_op           = 0b0010101101,
+    fld_d_op           = 0b0010101110,
+    fst_d_op           = 0b0010101111,
+    vld_op             = 0b0010110000,
+    vst_op             = 0b0010110001,
+    xvld_op            = 0b0010110010,
+    xvst_op            = 0b0010110011,
+    ldl_w_op           = 0b0010111000,
+    ldr_w_op           = 0b0010111001,
+
+    unknow_ops10       = 0b1111111111
+  };
+
+  // 8-bit opcode, highest 8 bits: bits[31...22]
+  enum ops8 {
+    ll_w_op            = 0b00100000,
+    sc_w_op            = 0b00100001,
+    ll_d_op            = 0b00100010,
+    sc_d_op            = 0b00100011,
+    ldptr_w_op         = 0b00100100,
+    stptr_w_op         = 0b00100101,
+    ldptr_d_op         = 0b00100110,
+    stptr_d_op         = 0b00100111,
+
+    unknow_ops8        = 0b11111111
+  };
+
+  // 7-bit opcode, highest 7 bits: bits[31...25]
+  enum ops7 {
+    lu12i_w_op         = 0b0001010,
+    lu32i_d_op         = 0b0001011,
+    pcaddi_op          = 0b0001100,
+    pcalau12i_op       = 0b0001101,
+    pcaddu12i_op       = 0b0001110,
+    pcaddu18i_op       = 0b0001111,
+
+    unknow_ops7        = 0b1111111
+  };
+
+  // 6-bit opcode, highest 6 bits: bits[31...25]
+  enum ops6 {
+    addu16i_d_op       = 0b000100,
+    beqz_op            = 0b010000,
+    bnez_op            = 0b010001,
+    bccondz_op         = 0b010010,
+    jirl_op            = 0b010011,
+    b_op               = 0b010100,
+    bl_op              = 0b010101,
+    beq_op             = 0b010110,
+    bne_op             = 0b010111,
+    blt_op             = 0b011000,
+    bge_op             = 0b011001,
+    bltu_op            = 0b011010,
+    bgeu_op            = 0b011011,
+
+    unknow_ops6        = 0b111111
+  };
+
+  enum fcmp_cond {
+    fcmp_caf           = 0x00,
+    fcmp_cun           = 0x08,
+    fcmp_ceq           = 0x04,
+    fcmp_cueq          = 0x0c,
+    fcmp_clt           = 0x02,
+    fcmp_cult          = 0x0a,
+    fcmp_cle           = 0x06,
+    fcmp_cule          = 0x0e,
+    fcmp_cne           = 0x10,
+    fcmp_cor           = 0x14,
+    fcmp_cune          = 0x18,
+    fcmp_saf           = 0x01,
+    fcmp_sun           = 0x09,
+    fcmp_seq           = 0x05,
+    fcmp_sueq          = 0x0d,
+    fcmp_slt           = 0x03,
+    fcmp_sult          = 0x0b,
+    fcmp_sle           = 0x07,
+    fcmp_sule          = 0x0f,
+    fcmp_sne           = 0x11,
+    fcmp_sor           = 0x15,
+    fcmp_sune          = 0x19
+  };
+
+  enum Condition {
+    zero         ,
+    notZero      ,
+    equal        ,
+    notEqual     ,
+    less         ,
+    lessEqual    ,
+    greater      ,
+    greaterEqual ,
+    below        ,
+    belowEqual   ,
+    above        ,
+    aboveEqual
+  };
+
+  static const int LogInstructionSize = 2;
+  static const int InstructionSize    = 1 << LogInstructionSize;
+
+  enum WhichOperand {
+    // input to locate_operand, and format code for relocations
+    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
+    disp32_operand = 1,          // embedded 32-bit displacement or address
+    call32_operand = 2,          // embedded 32-bit self-relative displacement
+    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
+    _WhichOperand_limit = 4
+  };
+
+  static int low  (int x, int l) { return bitfield(x, 0, l); }
+  static int low16(int x)        { return low(x, 16); }
+  static int low26(int x)        { return low(x, 26); }
+
+  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
+  static int high16(int x)        { return high(x, 16); }
+  static int high6 (int x)        { return high(x, 6); }
+
+
+ protected:
+  // help methods for instruction ejection
+
+  // 2R-type
+  //  31                          10 9      5 4     0
+  // |   opcode                     |   rj   |  rd   |
+  static inline int insn_RR   (int op, int rj, int rd) { return (op<<10) | (rj<<5) | rd; }
+
+  // 3R-type
+  //  31                    15 14 10 9      5 4     0
+  // |   opcode               |  rk |   rj   |  rd   |
+  static inline int insn_RRR  (int op, int rk, int rj, int rd)  { return (op<<15) | (rk<<10) | (rj<<5) | rd; }
+
+  // 4R-type
+  //  31             20 19  15 14  10 9     5 4     0
+  // |   opcode        |  ra  |  rk |    rj  |  rd   |
+  static inline int insn_RRRR (int op, int ra,  int rk, int rj, int rd)  { return (op<<20) | (ra << 15) | (rk<<10) | (rj<<5) | rd; }
+
+  // 2RI1-type
+  //  31                11     10    9      5 4     0
+  // |   opcode           |    I1   |    vj  |  rd   |
+  static inline int insn_I1RR (int op, int ui1, int vj, int rd)  { assert(is_uimm(ui1, 1), "not a unsigned 1-bit int"); return (op<<11) | (low(ui1, 1)<<10) | (vj<<5) | rd; }
+
+  // 2RI2-type
+  //  31                12 11     10 9      5 4     0
+  // |   opcode           |    I2   |    vj  |  rd   |
+  static inline int insn_I2RR (int op, int ui2, int vj, int rd)  { assert(is_uimm(ui2, 2), "not a unsigned 2-bit int"); return (op<<12) | (low(ui2, 2)<<10) | (vj<<5) | rd; }
+
+  // 2RI3-type
+  //  31                13 12     10 9      5 4     0
+  // |   opcode           |    I3   |    vj  |  vd   |
+  static inline int insn_I3RR (int op, int ui3, int vj, int vd)  { assert(is_uimm(ui3, 3), "not a unsigned 3-bit int"); return (op<<13) | (low(ui3, 3)<<10) | (vj<<5) | vd; }
+
+  // 2RI4-type
+  //  31                14 13     10 9      5 4     0
+  // |   opcode           |    I4   |    vj  |  vd   |
+  static inline int insn_I4RR (int op, int ui4, int vj, int vd)  { assert(is_uimm(ui4, 4), "not a unsigned 4-bit int"); return (op<<14) | (low(ui4, 4)<<10) | (vj<<5) | vd; }
+
+  // 2RI5-type
+  //  31                15 14     10 9      5 4     0
+  // |   opcode           |    I5   |    vj  |  vd   |
+  static inline int insn_I5RR (int op, int ui5, int vj, int vd)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); return (op<<15) | (low(ui5, 5)<<10) | (vj<<5) | vd; }
+
+  // 2RI6-type
+  //  31                16 15     10 9      5 4     0
+  // |   opcode           |    I6   |    vj  |  vd   |
+  static inline int insn_I6RR (int op, int ui6, int vj, int vd)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); return (op<<16) | (low(ui6, 6)<<10) | (vj<<5) | vd; }
+
+  // 2RI7-type
+  //  31                17 16     10 9      5 4     0
+  // |   opcode           |    I7   |    vj  |  vd   |
+  static inline int insn_I7RR (int op, int ui7, int vj, int vd)  { assert(is_uimm(ui7, 7), "not a unsigned 7-bit int"); return (op<<17) | (low(ui7, 6)<<10) | (vj<<5) | vd; }
+
+  // 2RI8-type
+  //  31                18 17     10 9      5 4     0
+  // |   opcode           |    I8   |    rj  |  rd   |
+  static inline int insn_I8RR (int op, int imm8, int rj, int rd)  { /*assert(is_simm(imm8, 8), "not a signed 8-bit int");*/ return (op<<18) | (low(imm8, 8)<<10) | (rj<<5) | rd; }
+
+  // 2RI12-type
+  //  31           22 21          10 9      5 4     0
+  // |   opcode      |     I12      |    rj  |  rd   |
+  static inline int insn_I12RR(int op, int imm12, int rj, int rd) { /* assert(is_simm(imm12, 12), "not a signed 12-bit int");*/  return (op<<22) | (low(imm12, 12)<<10) | (rj<<5) | rd; }
+
+
+  // 2RI14-type
+  //  31         24 23            10 9      5 4     0
+  // |   opcode    |      I14       |    rj  |  rd   |
+  static inline int insn_I14RR(int op, int imm14, int rj, int rd) { assert(is_simm(imm14, 14), "not a signed 14-bit int"); return (op<<24) | (low(imm14, 14)<<10) | (rj<<5) | rd; }
+
+  // 2RI16-type
+  //  31       26 25              10 9      5 4     0
+  // |   opcode  |       I16        |    rj  |  rd   |
+  static inline int insn_I16RR(int op, int imm16, int rj, int rd) { assert(is_simm16(imm16), "not a signed 16-bit int"); return (op<<26) | (low16(imm16)<<10) | (rj<<5) | rd; }
+
+  // 1RI13-type (?)
+  //  31        18 17                      5 4     0
+  // |   opcode   |               I13        |  vd   |
+  static inline int insn_I13R (int op, int imm13, int vd) { assert(is_simm(imm13, 13), "not a signed 13-bit int"); return (op<<18) | (low(imm13, 13)<<5) | vd; }
+
+  // 1RI20-type (?)
+  //  31        25 24                      5 4     0
+  // |   opcode   |               I20        |  rd   |
+  static inline int insn_I20R (int op, int imm20, int rd) { assert(is_simm(imm20, 20), "not a signed 20-bit int"); return (op<<25) | (low(imm20, 20)<<5) | rd; }
+
+  // 1RI21-type
+  //  31       26 25              10 9     5 4        0
+  // |   opcode  |     I21[15:0]    |   rj   |I21[20:16]|
+  static inline int insn_IRI(int op, int imm21, int rj) { assert(is_simm(imm21, 21), "not a signed 21-bit int"); return (op << 26) | (low16(imm21) << 10) | (rj << 5) | low(imm21 >> 16, 5); }
+
+  // I26-type
+  //  31       26 25              10 9               0
+  // |   opcode  |     I26[15:0]    |    I26[25:16]   |
+  static inline int insn_I26(int op, int imm26) { assert(is_simm(imm26, 26), "not a signed 26-bit int"); return (op << 26) | (low16(imm26) << 10) | low(imm26 >> 16, 10); }
+
+  // imm15
+  //  31                    15 14                    0
+  // |         opcode         |          I15          |
+  static inline int insn_I15  (int op, int imm15) { assert(is_uimm(imm15, 15), "not a unsigned 15-bit int"); return (op<<15) | low(imm15, 15); }
+
+
+  // get the offset field of beq, bne, blt[u], bge[u] instruction
+  int offset16(address entry) {
+    assert(is_simm16((entry - pc()) / 4), "change this code");
+    if (!is_simm16((entry - pc()) / 4)) {
+      tty->print_cr("!!! is_simm16: %lx", (entry - pc()) / 4);
+    }
+    return (entry - pc()) / 4;
+  }
+
+  // get the offset field of beqz, bnez instruction
+  int offset21(address entry) {
+    assert(is_simm((int)(entry - pc()) / 4, 21), "change this code");
+    if (!is_simm((int)(entry - pc()) / 4, 21)) {
+      tty->print_cr("!!! is_simm21: %lx", (entry - pc()) / 4);
+    }
+    return (entry - pc()) / 4;
+  }
+
+  // get the offset field of b instruction
+  int offset26(address entry) {
+    assert(is_simm((int)(entry - pc()) / 4, 26), "change this code");
+    if (!is_simm((int)(entry - pc()) / 4, 26)) {
+      tty->print_cr("!!! is_simm26: %lx", (entry - pc()) / 4);
+    }
+    return (entry - pc()) / 4;
+  }
+
+public:
+  using AbstractAssembler::offset;
+
+  //sign expand with the sign bit is h
+  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
+
+  // If x is a mask, return the number of one-bit in x.
+  // else return -1.
+  static int is_int_mask(int x);
+
+  // If x is a mask, return the number of one-bit in x.
+  // else return -1.
+  static int is_jlong_mask(jlong x);
+
+  static int split_low16(int x) {
+    return (x & 0xffff);
+  }
+
+  // Convert 16-bit x to a sign-extended 16-bit integer
+  static int simm16(int x) {
+    assert(x == (x & 0xFFFF), "must be 16-bit only");
+    return (x << 16) >> 16;
+  }
+
+  static int split_high16(int x) {
+    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
+  }
+
+  static int split_low20(int x) {
+    return (x & 0xfffff);
+  }
+
+  // Convert 20-bit x to a sign-extended 20-bit integer
+  static int simm20(int x) {
+    assert(x == (x & 0xFFFFF), "must be 20-bit only");
+    return (x << 12) >> 12;
+  }
+
+  static int split_low12(int x) {
+    return (x & 0xfff);
+  }
+
+  static inline void split_simm38(jlong si38, jint& si18, jint& si20) {
+    si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14;
+    si38 += (si38 & 0x20000) << 1;
+    si20 = si38 >> 18;
+  }
+
+  // Convert 12-bit x to a sign-extended 12-bit integer
+  static int simm12(int x) {
+    assert(x == (x & 0xFFF), "must be 12-bit only");
+    return (x << 20) >> 20;
+  }
+
+  // Convert 26-bit x to a sign-extended 26-bit integer
+  static int simm26(int x) {
+    assert(x == (x & 0x3FFFFFF), "must be 26-bit only");
+    return (x << 6) >> 6;
+  }
+
+  static intptr_t merge(intptr_t x0, intptr_t x12) {
+    //lu12i, ori
+    return (((x12 << 12) | x0) << 32) >> 32;
+  }
+
+  static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32) {
+    //lu32i, lu12i, ori
+    return (((x32 << 32) | (x12 << 12) | x0) << 12) >> 12;
+  }
+
+  static intptr_t merge(intptr_t x0, intptr_t x12, intptr_t x32, intptr_t x52) {
+    //lu52i, lu32i, lu12i, ori
+    return (x52 << 52) | (x32 << 32) | (x12 << 12) | x0;
+  }
+
+  // Test if x is within signed immediate range for nbits.
+  static bool is_simm  (int x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 32, "out of bounds");
+    const int   min      = -( ((int)1) << nbits-1 );
+    const int   maxplus1 =  ( ((int)1) << nbits-1 );
+    return min <= x && x < maxplus1;
+  }
+
+  static bool is_simm(jlong x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 64, "out of bounds");
+    const jlong min      = -( ((jlong)1) << nbits-1 );
+    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
+    return min <= x && x < maxplus1;
+  }
+
+  static bool is_simm16(int x)            { return is_simm(x, 16); }
+  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
+
+  // Test if x is within unsigned immediate range for nbits
+  static bool is_uimm(int x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 32, "out of bounds");
+    const int   maxplus1 = ( ((int)1) << nbits );
+    return 0 <= x && x < maxplus1;
+  }
+
+  static bool is_uimm(jlong x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 64, "out of bounds");
+    const jlong maxplus1 =  ( ((jlong)1) << nbits );
+    return 0 <= x && x < maxplus1;
+  }
+
+public:
+
+  void flush() {
+    AbstractAssembler::flush();
+  }
+
+  inline void emit_data(int x) { emit_int32(x); }
+  inline void emit_data(int x, relocInfo::relocType rtype) {
+    relocate(rtype);
+    emit_int32(x);
+  }
+
+  inline void emit_data(int x, RelocationHolder const& rspec) {
+    relocate(rspec);
+    emit_int32(x);
+  }
+
+  // Generic instructions
+  // Does 32bit or 64bit as needed for the platform. In some sense these
+  // belong in macro assembler but there is no need for both varieties to exist
+
+  void clo_w  (Register rd, Register rj) { emit_int32(insn_RR(clo_w_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void clz_w  (Register rd, Register rj) { emit_int32(insn_RR(clz_w_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void cto_w  (Register rd, Register rj) { emit_int32(insn_RR(cto_w_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void ctz_w  (Register rd, Register rj) { emit_int32(insn_RR(ctz_w_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void clo_d  (Register rd, Register rj) { emit_int32(insn_RR(clo_d_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void clz_d  (Register rd, Register rj) { emit_int32(insn_RR(clz_d_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void cto_d  (Register rd, Register rj) { emit_int32(insn_RR(cto_d_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void ctz_d  (Register rd, Register rj) { emit_int32(insn_RR(ctz_d_op, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void revb_2h(Register rd, Register rj) { emit_int32(insn_RR(revb_2h_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void revb_4h(Register rd, Register rj) { emit_int32(insn_RR(revb_4h_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void revb_2w(Register rd, Register rj) { emit_int32(insn_RR(revb_2w_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void revb_d (Register rd, Register rj) { emit_int32(insn_RR( revb_d_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void revh_2w(Register rd, Register rj) { emit_int32(insn_RR(revh_2w_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void revh_d (Register rd, Register rj) { emit_int32(insn_RR( revh_d_op, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void bitrev_4b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_4b_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void bitrev_8b(Register rd, Register rj) { emit_int32(insn_RR(bitrev_8b_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void bitrev_w (Register rd, Register rj) { emit_int32(insn_RR(bitrev_w_op,  (int)rj->encoding(), (int)rd->encoding())); }
+  void bitrev_d (Register rd, Register rj) { emit_int32(insn_RR(bitrev_d_op,  (int)rj->encoding(), (int)rd->encoding())); }
+
+  void ext_w_h(Register rd, Register rj) { emit_int32(insn_RR(ext_w_h_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void ext_w_b(Register rd, Register rj) { emit_int32(insn_RR(ext_w_b_op, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void rdtimel_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimel_w_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void rdtimeh_w(Register rd, Register rj) { emit_int32(insn_RR(rdtimeh_w_op, (int)rj->encoding(), (int)rd->encoding())); }
+  void rdtime_d(Register rd, Register rj)  { emit_int32(insn_RR(rdtime_d_op, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void cpucfg(Register rd, Register rj) { emit_int32(insn_RR(cpucfg_op, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void asrtle_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtle_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); }
+  void asrtgt_d (Register rj, Register rk) { emit_int32(insn_RRR(asrtgt_d_op , (int)rk->encoding(), (int)rj->encoding(), 0)); }
+
+  void alsl_w(Register rd, Register rj, Register rk, int sa2)  { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int");  emit_int32(insn_I8RR(alsl_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
+  void alsl_wu(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(alsl_w_op, ( (1 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
+  void bytepick_w(Register rd, Register rj, Register rk, int sa2) { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int"); emit_int32(insn_I8RR(bytepick_w_op, ( (0 << 7) | (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
+  void bytepick_d(Register rd, Register rj, Register rk, int sa3) { assert(is_uimm(sa3, 3), "not a unsigned 3-bit int"); emit_int32(insn_I8RR(bytepick_d_op, ( (sa3 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void add_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void add_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(add_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void sub_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void sub_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sub_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void slt  (Register rd, Register rj, Register rk)  { emit_int32(insn_RRR(slt_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void sltu (Register rd, Register rj, Register rk)  { emit_int32(insn_RRR(sltu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void maskeqz (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(maskeqz_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void masknez (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(masknez_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void nor (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(nor_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void AND (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(and_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void OR  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(or_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void XOR (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(xor_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void orn (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(orn_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void andn(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(andn_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void sll_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void srl_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void sra_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void sll_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sll_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void srl_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(srl_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void sra_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(sra_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void rotr_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void rotr_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(rotr_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void mul_w     (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mulh_w    (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_w_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mulh_wu   (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_wu_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mul_d     (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mul_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mulh_d    (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mulh_du   (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulh_du_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mulw_d_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mulw_d_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mulw_d_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void div_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mod_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void div_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mod_wu(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void div_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mod_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void div_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(div_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void mod_du(Register rd, Register rj, Register rk) { emit_int32(insn_RRR(mod_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void crc_w_b_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_b_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void crc_w_h_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_h_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void crc_w_w_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_w_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void crc_w_d_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crc_w_d_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void crcc_w_b_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_b_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void crcc_w_h_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_h_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void crcc_w_w_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_w_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void crcc_w_d_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(crcc_w_d_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void brk(int code)      { assert(is_uimm(code, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(break_op, code)); }
+
+  void alsl_d(Register rd, Register rj, Register rk, int sa2)  { assert(is_uimm(sa2, 2), "not a unsigned 2-bit int");  emit_int32(insn_I8RR(alsl_d_op, ( (sa2 << 5) | (int)rk->encoding() ), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void slli_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
+  void slli_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(slli_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
+  void srli_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
+  void srli_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srli_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
+  void srai_w(Register rd, Register rj, int ui5)  { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
+  void srai_d(Register rd, Register rj, int ui6)  { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(srai_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
+  void rotri_w(Register rd, Register rj, int ui5) { assert(is_uimm(ui5, 5), "not a unsigned 5-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b001 << 5) | ui5 ), (int)rj->encoding(), (int)rd->encoding())); }
+  void rotri_d(Register rd, Register rj, int ui6) { assert(is_uimm(ui6, 6), "not a unsigned 6-bit int"); emit_int32(insn_I8RR(rotri_op, ( (0b01  << 6) | ui6 ), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void bstrins_w  (Register rd, Register rj, int msbw, int lsbw)  { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (0<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); }
+  void bstrpick_w  (Register rd, Register rj, int msbw, int lsbw) { assert(is_uimm(msbw, 5) && is_uimm(lsbw, 5), "not a unsigned 5-bit int"); emit_int32(insn_I12RR(bstr_w_op, ( (1<<11) | (low(msbw, 5)<<6) | (1<<5) | low(lsbw, 5) ), (int)rj->encoding(), (int)rd->encoding())); }
+  void bstrins_d  (Register rd, Register rj, int msbd, int lsbd)  { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrins_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); }
+  void bstrpick_d  (Register rd, Register rj, int msbd, int lsbd) { assert(is_uimm(msbd, 6) && is_uimm(lsbd, 6), "not a unsigned 6-bit int"); emit_int32(insn_I12RR(bstrpick_d_op, ( (low(msbd, 6)<<6) | low(lsbd, 6) ), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void fadd_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fadd_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fadd_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fsub_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fsub_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fsub_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmul_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmul_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmul_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fdiv_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fdiv_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fdiv_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmax_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmax_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmax_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmin_s  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_s_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmin_d  (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmin_d_op,  (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmaxa_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmaxa_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmaxa_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmina_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmina_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fmina_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+
+  void fscaleb_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fscaleb_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fscaleb_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fcopysign_s (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_s_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fcopysign_d (FloatRegister fd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRR(fcopysign_d_op, (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+
+  void fabs_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fabs_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void fabs_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fabs_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void fneg_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fneg_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void fneg_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fneg_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void flogb_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(flogb_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void flogb_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(flogb_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void fclass_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fclass_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void fclass_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fclass_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void fsqrt_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void fsqrt_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void frecip_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frecip_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void frecip_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frecip_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void frsqrt_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frsqrt_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void frsqrt_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frsqrt_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void fmov_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fmov_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void fmov_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fmov_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+
+  void movgr2fr_w (FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2fr_w_op,  (int)rj->encoding(), (int)fd->encoding())); }
+  void movgr2fr_d (FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2fr_d_op,  (int)rj->encoding(), (int)fd->encoding())); }
+  void movgr2frh_w(FloatRegister fd, Register rj)  { emit_int32(insn_RR(movgr2frh_w_op, (int)rj->encoding(), (int)fd->encoding())); }
+  void movfr2gr_s (Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfr2gr_s_op,  (int)fj->encoding(), (int)rd->encoding())); }
+  void movfr2gr_d (Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfr2gr_d_op,  (int)fj->encoding(), (int)rd->encoding())); }
+  void movfrh2gr_s(Register rd, FloatRegister fj)  { emit_int32(insn_RR(movfrh2gr_s_op, (int)fj->encoding(), (int)rd->encoding())); }
+  void movgr2fcsr (int fcsr, Register rj)  { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movgr2fcsr_op,  (int)rj->encoding(), fcsr)); }
+  void movfcsr2gr (Register rd, int fcsr)  { assert(is_uimm(fcsr, 2), "not a unsigned 2-bit init: fcsr0-fcsr3"); emit_int32(insn_RR(movfcsr2gr_op,  fcsr, (int)rd->encoding())); }
+  void movfr2cf   (ConditionalFlagRegister cd, FloatRegister fj)  { emit_int32(insn_RR(movfr2cf_op,    (int)fj->encoding(), (int)cd->encoding())); }
+  void movcf2fr   (FloatRegister fd, ConditionalFlagRegister cj)  { emit_int32(insn_RR(movcf2fr_op,    (int)cj->encoding(), (int)fd->encoding())); }
+  void movgr2cf   (ConditionalFlagRegister cd, Register rj)  { emit_int32(insn_RR(movgr2cf_op,    (int)rj->encoding(), (int)cd->encoding())); }
+  void movcf2gr   (Register rd, ConditionalFlagRegister cj)  { emit_int32(insn_RR(movcf2gr_op,    (int)cj->encoding(), (int)rd->encoding())); }
+
+  void fcvt_s_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fcvt_s_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void fcvt_d_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(fcvt_d_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+
+  void ftintrm_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrm_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrm_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrm_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrm_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrp_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrp_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrp_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrp_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrp_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrz_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrz_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrz_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrz_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrz_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrne_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrne_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrne_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftintrne_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftintrne_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftint_w_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_w_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftint_w_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_w_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftint_l_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_l_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ftint_l_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ftint_l_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ffint_s_w(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_s_w_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ffint_s_l(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_s_l_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ffint_d_w(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_d_w_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void ffint_d_l(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(ffint_d_l_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void frint_s(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frint_s_op, (int)fj->encoding(), (int)fd->encoding())); }
+  void frint_d(FloatRegister fd, FloatRegister fj)  { emit_int32(insn_RR(frint_d_op, (int)fj->encoding(), (int)fd->encoding())); }
+
+  void slti  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(slti_op,   si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void sltui (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(sltui_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void addi_w(Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_w_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void addi_d(Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(addi_d_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void lu52i_d(Register rd, Register rj, int si12) { /*assert(is_simm(si12, 12), "not a signed 12-bit int");*/ emit_int32(insn_I12RR(lu52i_d_op,  simm12(si12), (int)rj->encoding(), (int)rd->encoding())); }
+  void andi  (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(andi_op,   ui12, (int)rj->encoding(), (int)rd->encoding())); }
+  void ori   (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(ori_op,    ui12, (int)rj->encoding(), (int)rd->encoding())); }
+  void xori  (Register rd, Register rj, int ui12)  { assert(is_uimm(ui12, 12), "not a unsigned 12-bit int"); emit_int32(insn_I12RR(xori_op,   ui12, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void fmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fnmadd_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fnmadd_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa) { emit_int32(insn_RRRR(fnmadd_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fnmsub_s (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa)  { emit_int32(insn_RRRR(fnmsub_s_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+  void fnmsub_d (FloatRegister fd, FloatRegister fj, FloatRegister fk, FloatRegister fa)  { emit_int32(insn_RRRR(fnmsub_d_op , (int)fa->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+
+  void fcmp_caf_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cun_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_ceq_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_clt_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cle_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cne_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cor_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_saf_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sun_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_seq_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sueq_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_slt_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sult_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sle_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sule_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sne_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sor_s  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sune_s (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_s_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+
+  void fcmp_caf_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_caf, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cun_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_ceq_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_ceq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_clt_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_clt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cle_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cne_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cor_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_cune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_cune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_saf_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_saf , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sun_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sun , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_seq_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_seq , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sueq_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sueq, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_slt_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_slt , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sult_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sult, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sle_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sle , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sule_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sule, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sne_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sne , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sor_d  (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sor , (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+  void fcmp_sune_d (ConditionalFlagRegister cd, FloatRegister fj, FloatRegister fk) { emit_int32(insn_RRRR(fcmp_cond_d_op, fcmp_sune, (int)fk->encoding(), (int)fj->encoding(), (int)cd->encoding())); }
+
+  void fsel (FloatRegister fd, FloatRegister fj, FloatRegister fk, ConditionalFlagRegister ca) { emit_int32(insn_RRRR(fsel_op, (int)ca->encoding(), (int)fk->encoding(), (int)fj->encoding(), (int)fd->encoding())); }
+
+  void addu16i_d(Register rj, Register rd, int si16)      { assert(is_simm(si16, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(addu16i_d_op, si16, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void lu12i_w(Register rj, int si20)      { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu12i_w_op, simm20(si20), (int)rj->encoding())); }
+  void lu32i_d(Register rj, int si20)      { /*assert(is_simm(si20, 20), "not a signed 20-bit int");*/ emit_int32(insn_I20R(lu32i_d_op, simm20(si20), (int)rj->encoding())); }
+  void pcaddi(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddi_op, si20, (int)rj->encoding())); }
+  void pcalau12i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcalau12i_op, si20, (int)rj->encoding())); }
+  void pcaddu12i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu12i_op, si20, (int)rj->encoding())); }
+  void pcaddu18i(Register rj, int si20)      { assert(is_simm(si20, 20), "not a signed 20-bit int"); emit_int32(insn_I20R(pcaddu18i_op, si20, (int)rj->encoding())); }
+
+  void ll_w  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
+  void sc_w  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
+  void ll_d  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ll_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
+  void sc_d  (Register rd, Register rj, int si16)   { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(sc_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
+  void ldptr_w  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
+  void stptr_w  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_w_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
+  void ldptr_d  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(ldptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
+  void stptr_d  (Register rd, Register rj, int si16)  { assert(is_simm(si16, 16) && ((si16 & 0x3) == 0), "not a signed 16-bit int"); emit_int32(insn_I14RR(stptr_d_op, si16>>2, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void ld_b  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_b_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void ld_h  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_h_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void ld_w  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void ld_d  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_d_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void st_b  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_b_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void st_h  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_h_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void st_w  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void st_d  (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(st_d_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void ld_bu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_bu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void ld_hu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_hu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void ld_wu (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ld_wu_op, si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void preld (int hint, Register rj, int si12)  { assert(is_uimm(hint, 5), "not a unsigned 5-bit int"); assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(preld_op, si12, (int)rj->encoding(), hint)); }
+  void fld_s (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
+  void fst_s (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_s_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
+  void fld_d (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fld_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
+  void fst_d (FloatRegister fd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(fst_d_op, si12, (int)rj->encoding(), (int)fd->encoding())); }
+  void ldl_w (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldl_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+  void ldr_w (Register rd, Register rj, int si12)  { assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(ldr_w_op,  si12, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void ldx_b  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_b_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldx_h  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_h_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldx_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldx_d  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stx_b  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_b_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stx_h  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_h_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stx_w  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stx_d  (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stx_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldx_bu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_bu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldx_hu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_hu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldx_wu (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldx_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void fldx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_s_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
+  void fldx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fldx_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
+  void fstx_s (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_s_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
+  void fstx_d (FloatRegister fd, Register rj, Register rk) { emit_int32(insn_RRR(fstx_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)fd->encoding())); }
+
+  void ld_b  (Register rd, Address src);
+  void ld_bu (Register rd, Address src);
+  void ld_d  (Register rd, Address src);
+  void ld_h  (Register rd, Address src);
+  void ld_hu (Register rd, Address src);
+  void ll_w  (Register rd, Address src);
+  void ll_d  (Register rd, Address src);
+  void ld_wu (Register rd, Address src);
+  void ld_w  (Register rd, Address src);
+  void st_b  (Register rd, Address dst);
+  void st_d  (Register rd, Address dst);
+  void st_w  (Register rd, Address dst);
+  void sc_w  (Register rd, Address dst);
+  void sc_d  (Register rd, Address dst);
+  void st_h  (Register rd, Address dst);
+  void fld_s (FloatRegister fd, Address src);
+  void fld_d (FloatRegister fd, Address src);
+  void fst_s (FloatRegister fd, Address dst);
+  void fst_d (FloatRegister fd, Address dst);
+
+  void amswap_w   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_w_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amswap_d   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_d_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amadd_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amadd_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rj->encoding())); }
+  void amand_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amand_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amor_w     (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_w_op,      (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amor_d     (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_d_op,      (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amxor_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amxor_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammax_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammax_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammin_w    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_w_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammin_d    (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_d_op,     (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammax_wu   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammax_du   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_du_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammin_wu   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_wu_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammin_du   (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_du_op,    (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amswap_db_w(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amswap_db_d(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amswap_db_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amadd_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amadd_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amadd_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amand_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amand_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amand_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amor_db_w  (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_w_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amor_db_d  (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amor_db_d_op,   (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amxor_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void amxor_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(amxor_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammax_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammax_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammin_db_w (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_w_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammin_db_d (Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_d_op,  (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammax_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammax_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammax_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammin_db_wu(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_wu_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ammin_db_du(Register rd, Register rk, Register rj) { assert_different_registers(rd, rj); assert_different_registers(rd, rk); emit_int32(insn_RRR(ammin_db_du_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void dbar(int hint)      {
+    assert(is_uimm(hint, 15), "not a unsigned 15-bit int");
+
+    if (os::is_ActiveCoresMP())
+      andi(R0, R0, 0);
+    else
+      emit_int32(insn_I15(dbar_op, hint));
+  }
+  void ibar(int hint)      { assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); emit_int32(insn_I15(ibar_op, hint)); }
+
+  void fldgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void fldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void fldle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void fldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void fstgt_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void fstgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void fstle_s (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_s_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void fstle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(fstle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void ldgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void ldle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(ldle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stgt_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stgt_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stgt_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stgt_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stgt_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stle_b (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_b_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stle_h (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_h_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stle_w (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_w_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+  void stle_d (Register rd, Register rj, Register rk) { emit_int32(insn_RRR(stle_d_op, (int)rk->encoding(), (int)rj->encoding(), (int)rd->encoding())); }
+
+  void beqz(Register rj, int offs)      { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(beqz_op, offs, (int)rj->encoding())); }
+  void bnez(Register rj, int offs)      { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bnez_op, offs, (int)rj->encoding())); }
+  void bceqz(ConditionalFlagRegister cj, int offs)     { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b00<<3) | (int)cj->encoding()))); }
+  void bcnez(ConditionalFlagRegister cj, int offs)     { assert(is_simm(offs, 21), "not a signed 21-bit int"); emit_int32(insn_IRI(bccondz_op, offs, ( (0b01<<3) | (int)cj->encoding()))); }
+
+  void jirl(Register rd, Register rj, int offs)      { assert(is_simm(offs, 18) && ((offs & 3) == 0), "not a signed 18-bit int"); emit_int32(insn_I16RR(jirl_op, offs >> 2, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void b(int offs)      { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(b_op, offs)); }
+  void bl(int offs)     { assert(is_simm(offs, 26), "not a signed 26-bit int"); emit_int32(insn_I26(bl_op, offs)); }
+
+
+  void beq(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(beq_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
+  void bne(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bne_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
+  void blt(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(blt_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
+  void bge(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bge_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
+  void bltu(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bltu_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
+  void bgeu(Register rj, Register rd, int offs)      { assert(is_simm(offs, 16), "not a signed 16-bit int"); emit_int32(insn_I16RR(bgeu_op, offs, (int)rj->encoding(), (int)rd->encoding())); }
+
+  void beq   (Register rj, Register rd, address entry) { beq   (rj, rd, offset16(entry)); }
+  void bne   (Register rj, Register rd, address entry) { bne   (rj, rd, offset16(entry)); }
+  void blt   (Register rj, Register rd, address entry) { blt   (rj, rd, offset16(entry)); }
+  void bge   (Register rj, Register rd, address entry) { bge   (rj, rd, offset16(entry)); }
+  void bltu  (Register rj, Register rd, address entry) { bltu  (rj, rd, offset16(entry)); }
+  void bgeu  (Register rj, Register rd, address entry) { bgeu  (rj, rd, offset16(entry)); }
+  void beqz  (Register rj, address entry) { beqz  (rj, offset21(entry)); }
+  void bnez  (Register rj, address entry) { bnez  (rj, offset21(entry)); }
+  void b(address entry) { b(offset26(entry)); }
+  void bl(address entry) { bl(offset26(entry)); }
+  void bceqz(ConditionalFlagRegister cj, address entry)     { bceqz(cj, offset21(entry)); }
+  void bcnez(ConditionalFlagRegister cj, address entry)     { bcnez(cj, offset21(entry)); }
+
+  void beq   (Register rj, Register rd, Label& L) { beq   (rj, rd, target(L)); }
+  void bne   (Register rj, Register rd, Label& L) { bne   (rj, rd, target(L)); }
+  void blt   (Register rj, Register rd, Label& L) { blt   (rj, rd, target(L)); }
+  void bge   (Register rj, Register rd, Label& L) { bge   (rj, rd, target(L)); }
+  void bltu  (Register rj, Register rd, Label& L) { bltu  (rj, rd, target(L)); }
+  void bgeu  (Register rj, Register rd, Label& L) { bgeu  (rj, rd, target(L)); }
+  void beqz  (Register rj, Label& L) { beqz  (rj, target(L)); }
+  void bnez  (Register rj, Label& L) { bnez  (rj, target(L)); }
+  void b(Label& L)      { b(target(L)); }
+  void bl(Label& L)     { bl(target(L)); }
+  void bceqz(ConditionalFlagRegister cj, Label& L)     { bceqz(cj, target(L)); }
+  void bcnez(ConditionalFlagRegister cj, Label& L)     { bcnez(cj, target(L)); }
+
+  typedef enum {
+    // hint[4]
+    Completion = 0,
+    Ordering   = (1 << 4),
+
+    // The bitwise-not of the below constants is corresponding to the hint. This is convenient for OR operation.
+    // hint[3:2] and hint[1:0]
+    LoadLoad   = ((1 << 3) | (1 << 1)),
+    LoadStore  = ((1 << 3) | (1 << 0)),
+    StoreLoad  = ((1 << 2) | (1 << 1)),
+    StoreStore = ((1 << 2) | (1 << 0)),
+    AnyAny     = ((3 << 2) | (3 << 0)),
+  } Membar_mask_bits;
+
+  // Serializes memory and blows flags
+  void membar(Membar_mask_bits hint) {
+    assert((hint & (3 << 0)) != 0, "membar mask unsupported!");
+    assert((hint & (3 << 2)) != 0, "membar mask unsupported!");
+    dbar(Ordering | (~hint & 0xf));
+  }
+
+  // LSX and LASX
+#define ASSERT_LSX  assert(UseLSX, "");
+#define ASSERT_LASX assert(UseLASX, "");
+
+  void  vadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vadd_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vadd_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvadd_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvadd_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsub_q(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsub_q_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsub_q(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsub_q_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vaddi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vaddi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vaddi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vaddi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vaddi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvaddi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvaddi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvaddi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvaddi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvaddi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vsubi_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsubi_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsubi_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsubi_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsubi_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvsubi_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsubi_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsubi_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsubi_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsubi_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vneg_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_b_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vneg_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_h_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vneg_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_w_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vneg_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vneg_d_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvneg_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_b_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvneg_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_h_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvneg_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_w_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvneg_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvneg_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vabsd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vabsd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vabsd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vabsd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vabsd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvabsd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvabsd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvabsd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvabsd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvabsd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vmax_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmax_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmax_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvmax_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmax_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmax_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vmin_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmin_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmin_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvmin_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmin_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmin_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vmul_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmul_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmul_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvmul_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmul_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmul_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vmuh_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmuh_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmuh_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmuh_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvmuh_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmuh_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmuh_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmuh_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vmuh_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmuh_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmuh_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmuh_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmuh_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvmuh_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmuh_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmuh_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmuh_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmuh_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vmulwev_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmulwev_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmulwev_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmulwev_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwev_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvmulwev_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmulwev_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmulwev_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmulwev_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwev_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vmulwod_h_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_h_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmulwod_w_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_w_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmulwod_d_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_d_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmulwod_q_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmulwod_q_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvmulwod_h_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_h_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmulwod_w_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_w_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmulwod_d_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_d_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmulwod_q_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmulwod_q_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vmadd_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmadd_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmadd_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvmadd_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmadd_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmadd_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vmsub_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmsub_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmsub_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vmsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvmsub_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmsub_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmsub_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvmsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void vext2xv_h_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_h_b_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void vext2xv_w_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_b_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void vext2xv_d_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_b_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void vext2xv_w_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_w_h_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void vext2xv_d_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_h_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void vext2xv_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void vext2xv_hu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_hu_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void vext2xv_wu_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void vext2xv_du_bu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_bu_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void vext2xv_wu_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_wu_hu_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void vext2xv_du_hu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_hu_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void vext2xv_du_wu(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(vext2xv_du_wu_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vldi(FloatRegister vd, int i13) { ASSERT_LSX  emit_int32(insn_I13R( vldi_op, i13, (int)vd->encoding())); }
+  void xvldi(FloatRegister xd, int i13) { ASSERT_LASX emit_int32(insn_I13R(xvldi_op, i13, (int)xd->encoding())); }
+
+  void  vand_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vand_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvand_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvand_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vxor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vxor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvxor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvxor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vnor_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vnor_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvnor_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvnor_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vandn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vandn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvandn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvandn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vorn_v(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vorn_v_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvorn_v(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvorn_v_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vandi_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vandi_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvandi_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvandi_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vxori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vxori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvxori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvxori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vnori_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vnori_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvnori_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvnori_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vsll_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsll_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsll_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsll_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsll_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvsll_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsll_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsll_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsll_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsll_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vslli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vslli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vslli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vslli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvslli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvslli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvslli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvslli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vsrl_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrl_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrl_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrl_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsrl_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvsrl_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsrl_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsrl_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsrl_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsrl_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vsrli_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vsrli_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrli_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrli_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrli_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrli_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrli_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrli_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvsrli_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrli_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsrli_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrli_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsrli_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrli_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsrli_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrli_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vsra_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsra_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsra_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsra_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsra_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvsra_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsra_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsra_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsra_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsra_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vsrai_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vsrai_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrai_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrai_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrai_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrai_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrai_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrai_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvsrai_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvsrai_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsrai_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvsrai_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsrai_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvsrai_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsrai_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvsrai_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vrotr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vrotr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vrotr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vrotr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vrotr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvrotr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvrotr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvrotr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvrotr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvrotr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vrotri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vrotri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vrotri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vrotri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vrotri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vrotri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vrotri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vrotri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvrotri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvrotri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvrotri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvrotri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvrotri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvrotri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvrotri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvrotri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vsrlni_b_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vsrlni_b_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrlni_h_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vsrlni_h_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrlni_w_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vsrlni_w_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsrlni_d_q(FloatRegister vd, FloatRegister vj, int ui7) { ASSERT_LSX  emit_int32(insn_I7RR( vsrlni_d_q_op, ui7, (int)vj->encoding(), (int)vd->encoding())); }
+
+  void  vpcnt_b(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_b_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vpcnt_h(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_h_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vpcnt_w(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_w_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vpcnt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vpcnt_d_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvpcnt_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_b_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvpcnt_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_h_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvpcnt_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_w_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvpcnt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvpcnt_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vbitclr_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitclr_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitclr_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitclr_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitclr_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvbitclr_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitclr_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitclr_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitclr_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitclr_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vbitclri_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitclri_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitclri_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitclri_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitclri_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitclri_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitclri_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitclri_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvbitclri_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitclri_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitclri_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitclri_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitclri_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitclri_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitclri_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitclri_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vbitset_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitset_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitset_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitset_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitset_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvbitset_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitset_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitset_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitset_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitset_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vbitseti_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitseti_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitseti_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitseti_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitseti_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitseti_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitseti_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitseti_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvbitseti_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitseti_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitseti_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitseti_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitseti_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitseti_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitseti_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitseti_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vbitrev_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitrev_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitrev_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitrev_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vbitrev_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvbitrev_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitrev_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitrev_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitrev_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvbitrev_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vbitrevi_b(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vbitrevi_b_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitrevi_h(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vbitrevi_h_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitrevi_w(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vbitrevi_w_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vbitrevi_d(FloatRegister vd, FloatRegister vj, int ui6) { ASSERT_LSX  emit_int32(insn_I6RR( vbitrevi_d_op, ui6, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvbitrevi_b(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvbitrevi_b_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitrevi_h(FloatRegister xd, FloatRegister xj, int ui4) { ASSERT_LASX emit_int32(insn_I4RR(xvbitrevi_h_op, ui4, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitrevi_w(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvbitrevi_w_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvbitrevi_d(FloatRegister xd, FloatRegister xj, int ui6) { ASSERT_LASX emit_int32(insn_I6RR(xvbitrevi_d_op, ui6, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfadd_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfadd_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfadd_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfsub_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfsub_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfsub_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfmul_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmul_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfmul_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmul_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfmul_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfmul_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmul_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfdiv_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfdiv_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfdiv_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfdiv_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfdiv_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfdiv_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfdiv_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfnmadd_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmadd_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfnmadd_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmadd_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfnmadd_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfnmadd_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmadd_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfnmsub_s(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmsub_s_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfnmsub_d(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vfnmsub_d_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfnmsub_s(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_s_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfnmsub_d(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvfnmsub_d_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfmax_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmax_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfmax_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmax_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfmax_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfmax_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmax_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfmin_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmin_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfmin_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfmin_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfmin_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfmin_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfmin_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfclass_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfclass_s_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfclass_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfclass_d_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfclass_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfclass_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfclass_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfsqrt_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfsqrt_s_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfsqrt_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfsqrt_d_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfsqrt_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfsqrt_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfsqrt_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfcvtl_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvtl_s_h_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vfcvtl_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvtl_d_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvfcvtl_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_s_h_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcvtl_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvtl_d_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfcvth_s_h(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvth_s_h_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vfcvth_d_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vfcvth_d_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvfcvth_s_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_s_h_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcvth_d_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfcvth_d_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfcvt_h_s(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfcvt_h_s_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcvt_s_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vfcvt_s_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfcvt_h_s(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_h_s_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcvt_s_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvfcvt_s_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfrintrne_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrne_s_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfrintrne_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrne_d_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfrintrne_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfrintrne_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrne_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfrintrz_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrz_s_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfrintrz_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrz_d_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfrintrz_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfrintrz_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrz_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfrintrp_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrp_s_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfrintrp_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrp_d_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfrintrp_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfrintrp_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrp_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfrintrm_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrm_s_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfrintrm_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrintrm_d_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfrintrm_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfrintrm_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrintrm_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfrint_s(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrint_s_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfrint_d(FloatRegister vd, FloatRegister vj) { ASSERT_LSX  emit_int32(insn_RR( vfrint_d_op, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvfrint_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfrint_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvfrint_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrne_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrne_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vftintrne_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrne_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrne_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvftintrne_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrne_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrz_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrz_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vftintrz_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrz_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrz_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvftintrz_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrz_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrp_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrp_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vftintrp_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrp_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrp_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvftintrp_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrp_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrm_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrm_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vftintrm_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrm_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrm_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvftintrm_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrm_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftint_w_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftint_w_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vftint_l_d(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftint_l_d_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftint_w_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_w_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvftint_l_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftint_l_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrne_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk)  { ASSERT_LSX  emit_int32(insn_RRR( vftintrne_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvftintrne_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk)  { ASSERT_LASX emit_int32(insn_RRR(xvftintrne_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrz_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrz_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvftintrz_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrz_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrp_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrp_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvftintrp_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrp_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrm_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftintrm_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvftintrm_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftintrm_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftint_w_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vftint_w_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvftint_w_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvftint_w_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrnel_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrnel_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrnel_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrnel_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrneh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrneh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrneh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrneh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrzl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrzl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrzl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrzh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrzh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrzh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrzh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrpl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrpl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrpl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrpl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrph_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrph_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrph_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrph_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrml_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrml_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrml_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrml_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintrmh_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintrmh_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintrmh_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintrmh_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftintl_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftintl_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftintl_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftintl_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vftinth_l_s(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vftinth_l_s_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvftinth_l_s(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvftinth_l_s_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vffint_s_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffint_s_w_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vffint_d_l(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffint_d_l_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvffint_s_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_s_w_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvffint_d_l(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffint_d_l_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vffint_s_l(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vffint_s_l_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvffint_s_l(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvffint_s_l_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vffintl_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffintl_d_w_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvffintl_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffintl_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vffinth_d_w(FloatRegister vd, FloatRegister rj) { ASSERT_LSX  emit_int32(insn_RR( vffinth_d_w_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvffinth_d_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvffinth_d_w_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vseq_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vseq_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vseq_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vseq_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vseq_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvseq_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvseq_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvseq_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvseq_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvseq_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vsle_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsle_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsle_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsle_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvsle_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsle_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsle_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsle_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vsle_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsle_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsle_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vsle_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vsle_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvsle_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsle_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsle_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvsle_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvsle_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vslt_b(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_b_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslt_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslt_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslt_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvslt_b(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_b_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslt_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslt_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslt_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vslt_bu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_bu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslt_hu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_hu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslt_wu(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_wu_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslt_du(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vslt_du_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvslt_bu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_bu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslt_hu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_hu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslt_wu(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_wu_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslt_du(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvslt_du_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vslti_bu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_bu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslti_hu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_hu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslti_wu(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_wu_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vslti_du(FloatRegister vd, FloatRegister vj, int ui5) { ASSERT_LSX  emit_int32(insn_I5RR( vslti_du_op, ui5, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvslti_bu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_bu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslti_hu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_hu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslti_wu(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_wu_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvslti_du(FloatRegister xd, FloatRegister xj, int ui5) { ASSERT_LASX emit_int32(insn_I5RR(xvslti_du_op, ui5, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vfcmp_caf_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cun_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_ceq_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_clt_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cle_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cne_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cor_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_saf_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sun_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_seq_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sueq_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_slt_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sult_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sle_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sule_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sne_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sor_s  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sune_s (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_s_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+
+  void  vfcmp_caf_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_caf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cun_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_ceq_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_ceq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_clt_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_clt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cle_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cne_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cor_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_cune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_cune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_saf_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_saf , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sun_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sun , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_seq_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_seq , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sueq_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sueq, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_slt_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_slt , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sult_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sult, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sle_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sle , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sule_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sule, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sne_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sne , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sor_d  (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sor , (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vfcmp_sune_d (FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRRR( vfcmp_cond_d_op, fcmp_sune, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+
+  void xvfcmp_caf_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cun_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_ceq_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_clt_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cle_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cne_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cor_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_saf_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sun_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_seq_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sueq_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_slt_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sult_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sle_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sule_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sne_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sor_s  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sune_s (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_s_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void xvfcmp_caf_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_caf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cun_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_ceq_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_ceq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_clt_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_clt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cle_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cne_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cor_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_cune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_cune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_saf_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_saf , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sun_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sun , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_seq_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_seq , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sueq_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sueq, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_slt_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_slt , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sult_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sult, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sle_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sle , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sule_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sule, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sne_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sne , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sor_d  (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sor , (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvfcmp_sune_d (FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRRR(xvfcmp_cond_d_op, fcmp_sune, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vbitsel_v(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vbitsel_v_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvbitsel_v(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvbitsel_v_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vinsgr2vr_b(FloatRegister vd, Register rj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vinsgr2vr_b_op, ui4, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vinsgr2vr_h(FloatRegister vd, Register rj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vinsgr2vr_h_op, ui3, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vinsgr2vr_w(FloatRegister vd, Register rj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vinsgr2vr_w_op, ui2, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vinsgr2vr_d(FloatRegister vd, Register rj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vinsgr2vr_d_op, ui1, (int)rj->encoding(), (int)vd->encoding())); }
+
+  void xvinsgr2vr_w(FloatRegister xd, Register rj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsgr2vr_w_op, ui3, (int)rj->encoding(), (int)xd->encoding())); }
+  void xvinsgr2vr_d(FloatRegister xd, Register rj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsgr2vr_d_op, ui2, (int)rj->encoding(), (int)xd->encoding())); }
+
+  void  vpickve2gr_b(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vpickve2gr_b_op, ui4, (int)vj->encoding(), (int)rd->encoding())); }
+  void  vpickve2gr_h(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vpickve2gr_h_op, ui3, (int)vj->encoding(), (int)rd->encoding())); }
+  void  vpickve2gr_w(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vpickve2gr_w_op, ui2, (int)vj->encoding(), (int)rd->encoding())); }
+  void  vpickve2gr_d(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vpickve2gr_d_op, ui1, (int)vj->encoding(), (int)rd->encoding())); }
+
+  void  vpickve2gr_bu(Register rd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR( vpickve2gr_bu_op, ui4, (int)vj->encoding(), (int)rd->encoding())); }
+  void  vpickve2gr_hu(Register rd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR( vpickve2gr_hu_op, ui3, (int)vj->encoding(), (int)rd->encoding())); }
+  void  vpickve2gr_wu(Register rd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR( vpickve2gr_wu_op, ui2, (int)vj->encoding(), (int)rd->encoding())); }
+  void  vpickve2gr_du(Register rd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR( vpickve2gr_du_op, ui1, (int)vj->encoding(), (int)rd->encoding())); }
+
+  void xvpickve2gr_w(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_w_op, ui3, (int)xj->encoding(), (int)rd->encoding())); }
+  void xvpickve2gr_d(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_d_op, ui2, (int)xj->encoding(), (int)rd->encoding())); }
+
+  void xvpickve2gr_wu(Register rd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve2gr_wu_op, ui3, (int)xj->encoding(), (int)rd->encoding())); }
+  void xvpickve2gr_du(Register rd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve2gr_du_op, ui2, (int)xj->encoding(), (int)rd->encoding())); }
+
+  void  vreplgr2vr_b(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_b_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vreplgr2vr_h(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_h_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vreplgr2vr_w(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_w_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void  vreplgr2vr_d(FloatRegister vd, Register rj) { ASSERT_LSX  emit_int32(insn_RR( vreplgr2vr_d_op, (int)rj->encoding(), (int)vd->encoding())); }
+  void xvreplgr2vr_b(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_b_op, (int)rj->encoding(), (int)xd->encoding())); }
+  void xvreplgr2vr_h(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_h_op, (int)rj->encoding(), (int)xd->encoding())); }
+  void xvreplgr2vr_w(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_w_op, (int)rj->encoding(), (int)xd->encoding())); }
+  void xvreplgr2vr_d(FloatRegister xd, Register rj) { ASSERT_LASX emit_int32(insn_RR(xvreplgr2vr_d_op, (int)rj->encoding(), (int)xd->encoding())); }
+
+  void  vreplvei_b(FloatRegister vd, FloatRegister vj, int ui4) { ASSERT_LSX  emit_int32(insn_I4RR(vreplvei_b_op, ui4, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vreplvei_h(FloatRegister vd, FloatRegister vj, int ui3) { ASSERT_LSX  emit_int32(insn_I3RR(vreplvei_h_op, ui3, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vreplvei_w(FloatRegister vd, FloatRegister vj, int ui2) { ASSERT_LSX  emit_int32(insn_I2RR(vreplvei_w_op, ui2, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vreplvei_d(FloatRegister vd, FloatRegister vj, int ui1) { ASSERT_LSX  emit_int32(insn_I1RR(vreplvei_d_op, ui1, (int)vj->encoding(), (int)vd->encoding())); }
+
+  void xvreplve0_b(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_b_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvreplve0_h(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_h_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvreplve0_w(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_w_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvreplve0_d(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_d_op, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvreplve0_q(FloatRegister xd, FloatRegister xj) { ASSERT_LASX emit_int32(insn_RR(xvreplve0_q_op, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void xvinsve0_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvinsve0_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvinsve0_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvinsve0_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void xvpickve_w(FloatRegister xd, FloatRegister xj, int ui3) { ASSERT_LASX emit_int32(insn_I3RR(xvpickve_w_op, ui3, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvpickve_d(FloatRegister xd, FloatRegister xj, int ui2) { ASSERT_LASX emit_int32(insn_I2RR(xvpickve_d_op, ui2, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vshuf_b(FloatRegister vd, FloatRegister vj, FloatRegister vk, FloatRegister va) { ASSERT_LSX  emit_int32(insn_RRRR( vshuf_b_op, (int)va->encoding(), (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void xvshuf_b(FloatRegister xd, FloatRegister xj, FloatRegister xk, FloatRegister xa) { ASSERT_LASX emit_int32(insn_RRRR(xvshuf_b_op, (int)xa->encoding(), (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vshuf_h(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_h_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vshuf_w(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_w_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+  void  vshuf_d(FloatRegister vd, FloatRegister vj, FloatRegister vk) { ASSERT_LSX  emit_int32(insn_RRR( vshuf_d_op, (int)vk->encoding(), (int)vj->encoding(), (int)vd->encoding())); }
+
+  void xvshuf_h(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_h_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvshuf_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+  void xvshuf_d(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvshuf_d_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void xvperm_w(FloatRegister xd, FloatRegister xj, FloatRegister xk) { ASSERT_LASX emit_int32(insn_RRR(xvperm_w_op, (int)xk->encoding(), (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vshuf4i_b(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_b_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vshuf4i_h(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_h_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
+  void  vshuf4i_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvshuf4i_b(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_b_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvshuf4i_h(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_h_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+  void xvshuf4i_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vshuf4i_d(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vshuf4i_d_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvshuf4i_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvshuf4i_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vpermi_w(FloatRegister vd, FloatRegister vj, int ui8) { ASSERT_LSX  assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR( vpermi_w_op, ui8, (int)vj->encoding(), (int)vd->encoding())); }
+  void xvpermi_w(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_w_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void xvpermi_d(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_d_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void xvpermi_q(FloatRegister xd, FloatRegister xj, int ui8) { ASSERT_LASX assert(is_uimm(ui8, 8), "not a unsigned 8-bit int");  emit_int32(insn_I8RR(xvpermi_q_op, ui8, (int)xj->encoding(), (int)xd->encoding())); }
+
+  void  vld(FloatRegister vd, Register rj, int si12) { ASSERT_LSX  assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vld_op, si12, (int)rj->encoding(), (int)vd->encoding()));}
+  void xvld(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvld_op, si12, (int)rj->encoding(), (int)xd->encoding()));}
+
+  void  vst(FloatRegister vd, Register rj, int si12) { ASSERT_LSX  assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR( vst_op, si12, (int)rj->encoding(), (int)vd->encoding()));}
+  void xvst(FloatRegister xd, Register rj, int si12) { ASSERT_LASX assert(is_simm(si12, 12), "not a signed 12-bit int"); emit_int32(insn_I12RR(xvst_op, si12, (int)rj->encoding(), (int)xd->encoding()));}
+
+  void  vldx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX  emit_int32(insn_RRR( vldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); }
+  void xvldx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvldx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); }
+
+  void  vstx(FloatRegister vd, Register rj, Register rk) { ASSERT_LSX  emit_int32(insn_RRR( vstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)vd->encoding())); }
+  void xvstx(FloatRegister xd, Register rj, Register rk) { ASSERT_LASX emit_int32(insn_RRR(xvstx_op, (int)rk->encoding(), (int)rj->encoding(), (int)xd->encoding())); }
+
+#undef ASSERT_LSX
+#undef ASSERT_LASX
+
+public:
+  // Creation
+  Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
+
+  // Decoding
+  static address locate_operand(address inst, WhichOperand which);
+  static address locate_next_instruction(address inst);
+};
+
+#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp
new file mode 100644
index 00000000000..601f4afe6fe
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/assembler_loongarch.inline.hpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP
+#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp
new file mode 100644
index 00000000000..32775e9bc39
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.hpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP
+
+// Platform specific for C++ based Interpreter
+#define LOTS_OF_REGS    /* Lets interpreter use plenty of registers */
+
+private:
+
+  // save the bottom of the stack after frame manager setup. For ease of restoration after return
+  // from recursive interpreter call
+  intptr_t*  _frame_bottom;             /* saved bottom of frame manager frame */
+  intptr_t* _last_Java_pc;              /* pc to return to in frame manager */
+  intptr_t* _sender_sp;                 /* sender's sp before stack (locals) extension */
+  interpreterState _self_link;          /*  Previous interpreter state  */ /* sometimes points to self??? */
+  double    _native_fresult;            /* save result of native calls that might return floats */
+  intptr_t  _native_lresult;            /* save result of native calls that might return handle/longs */
+public:
+
+  static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp);
+  inline intptr_t* sender_sp() {
+    return _sender_sp;
+  }
+
+
+#define SET_LAST_JAVA_FRAME()
+
+#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set_flags(0);
+
+/*
+ * Macros for accessing the stack.
+ */
+#undef STACK_INT
+#undef STACK_FLOAT
+#undef STACK_ADDR
+#undef STACK_OBJECT
+#undef STACK_DOUBLE
+#undef STACK_LONG
+
+// JavaStack Implementation
+
+#define GET_STACK_SLOT(offset)    (*((intptr_t*) &topOfStack[-(offset)]))
+#define STACK_SLOT(offset)    ((address) &topOfStack[-(offset)])
+#define STACK_ADDR(offset)    (*((address *) &topOfStack[-(offset)]))
+#define STACK_INT(offset)     (*((jint*) &topOfStack[-(offset)]))
+#define STACK_FLOAT(offset)   (*((jfloat *) &topOfStack[-(offset)]))
+#define STACK_OBJECT(offset)  (*((oop *) &topOfStack [-(offset)]))
+#define STACK_DOUBLE(offset)  (((VMJavaVal64*) &topOfStack[-(offset)])->d)
+#define STACK_LONG(offset)    (((VMJavaVal64 *) &topOfStack[-(offset)])->l)
+
+#define SET_STACK_SLOT(value, offset)   (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value))
+#define SET_STACK_ADDR(value, offset)   (*((address *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_INT(value, offset)    (*((jint *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_FLOAT(value, offset)  (*((jfloat *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value))
+#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d =  \
+                                                 ((VMJavaVal64*)(addr))->d)
+#define SET_STACK_LONG(value, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value))
+#define SET_STACK_LONG_FROM_ADDR(addr, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l =  \
+                                                 ((VMJavaVal64*)(addr))->l)
+// JavaLocals implementation
+
+#define LOCALS_SLOT(offset)    ((intptr_t*)&locals[-(offset)])
+#define LOCALS_ADDR(offset)    ((address)locals[-(offset)])
+#define LOCALS_INT(offset)     (*((jint*)&locals[-(offset)]))
+#define LOCALS_FLOAT(offset)   (*((jfloat*)&locals[-(offset)]))
+#define LOCALS_OBJECT(offset)  ((oop)locals[-(offset)])
+#define LOCALS_DOUBLE(offset)  (((VMJavaVal64*)&locals[-((offset) + 1)])->d)
+#define LOCALS_LONG(offset)    (((VMJavaVal64*)&locals[-((offset) + 1)])->l)
+#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
+#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)]))
+
+#define SET_LOCALS_SLOT(value, offset)    (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value))
+#define SET_LOCALS_ADDR(value, offset)    (*((address *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_INT(value, offset)     (*((jint *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_FLOAT(value, offset)   (*((jfloat *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_OBJECT(value, offset)  (*((oop *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_DOUBLE(value, offset)  (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
+#define SET_LOCALS_LONG(value, offset)    (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
+#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
+                                                  ((VMJavaVal64*)(addr))->d)
+#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \
+                                                ((VMJavaVal64*)(addr))->l)
+
+#endif // CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp
new file mode 100644
index 00000000000..07df527e94e
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/bytecodeInterpreter_loongarch.inline.hpp
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP
+#define CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP
+
+// Inline interpreter functions for LoongArch
+
+inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; }
+inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; }
+inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; }
+inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; }
+inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); }
+
+inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; }
+
+inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+
+}
+
+inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) {
+  // x86 can do unaligned copies but not 64bits at a time
+  to[0] = from[0]; to[1] = from[1];
+}
+
+// The long operations depend on compiler support for "long long" on x86
+
+inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) {
+  return op1 + op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) {
+  return op1 & op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) {
+  // QQQ what about check and throw...
+  return op1 / op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) {
+  return op1 * op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
+  return op1 | op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) {
+  return op1 - op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) {
+  return op1 ^ op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) {
+  return op1 % op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) {
+  // CVM did this 0x3f mask, is the really needed??? QQQ
+  return ((unsigned long long) op1) >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) {
+  return op1 >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) {
+  return op1 << (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongNeg(jlong op) {
+  return -op;
+}
+
+inline jlong BytecodeInterpreter::VMlongNot(jlong op) {
+  return ~op;
+}
+
+inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) {
+  return (op <= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGez(jlong op) {
+  return (op >= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) {
+  return (op == 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) {
+  return (op1 == op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) {
+  return (op1 != op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) {
+  return (op1 >= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) {
+  return (op1 <= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) {
+  return (op1 < op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) {
+  return (op1 > op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) {
+  return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0);
+}
+
+// Long conversions
+
+inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) {
+  return (jfloat) val;
+}
+
+inline jint BytecodeInterpreter::VMlong2Int(jlong val) {
+  return (jint) val;
+}
+
+// Double Arithmetic
+
+inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) {
+  return op1 + op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) {
+  // Divide by zero... QQQ
+  return op1 / op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) {
+  return op1 * op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) {
+  return -op;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) {
+  return fmod(op1, op2);
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) {
+  return op1 - op2;
+}
+
+inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+}
+
+// Double Conversions
+
+inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) {
+  return (jfloat) val;
+}
+
+// Float Conversions
+
+inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) {
+  return (jdouble) op;
+}
+
+// Integer Arithmetic
+
+inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) {
+  return op1 + op2;
+}
+
+inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
+  return op1 & op2;
+}
+
+inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
+  // it's possible we could catch this special case implicitly
+  if ((juint)op1 == 0x80000000 && op2 == -1) return op1;
+  else return op1 / op2;
+}
+
+inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) {
+  return op1 * op2;
+}
+
+inline jint BytecodeInterpreter::VMintNeg(jint op) {
+  return -op;
+}
+
+inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
+  return op1 | op2;
+}
+
+inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
+  // it's possible we could catch this special case implicitly
+  if ((juint)op1 == 0x80000000 && op2 == -1) return 0;
+  else return op1 % op2;
+}
+
+inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) {
+  return op1 <<  op2;
+}
+
+inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) {
+  return op1 >> (op2 & 0x1f); // QQ op2 & 0x1f??
+}
+
+inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) {
+  return op1 - op2;
+}
+
+inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
+  return ((juint) op1) >> (op2 & 0x1f); // QQ op2 & 0x1f??
+}
+
+inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) {
+  return op1 ^ op2;
+}
+
+inline jdouble BytecodeInterpreter::VMint2Double(jint val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMint2Float(jint val) {
+  return (jfloat) val;
+}
+
+inline jlong BytecodeInterpreter::VMint2Long(jint val) {
+  return (jlong) val;
+}
+
+inline jchar BytecodeInterpreter::VMint2Char(jint val) {
+  return (jchar) val;
+}
+
+inline jshort BytecodeInterpreter::VMint2Short(jint val) {
+  return (jshort) val;
+}
+
+inline jbyte BytecodeInterpreter::VMint2Byte(jint val) {
+  return (jbyte) val;
+}
+
+#endif // CPU_LOONGARCH_VM_BYTECODEINTERPRETER_LOONGARCH_INLINE_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp
new file mode 100644
index 00000000000..8641090584a
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/bytecodes.hpp"
+
+
+void Bytecodes::pd_initialize() {
+  // No LoongArch specific initialization
+}
+
+
+Bytecodes::Code Bytecodes::pd_base_code_for(Code code) {
+  // No LoongArch specific bytecodes
+  return code;
+}
diff --git a/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp
new file mode 100644
index 00000000000..fbdf5319960
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/bytecodes_loongarch.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP
+
+// No Loongson specific bytecodes
+
+#endif // CPU_LOONGARCH_VM_BYTECODES_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp
new file mode 100644
index 00000000000..8f766a617e6
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/bytes_loongarch.hpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP
+
+#include "memory/allocation.hpp"
+
+class Bytes: AllStatic {
+ public:
+  // Returns true if the byte ordering used by Java is different from the native byte ordering
+  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
+  // on Sparc.
+  // we use LoongArch, so return true
+  static inline bool is_Java_byte_ordering_different(){ return true; }
+
+
+  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
+  // (no special code is needed since LoongArch CPUs can access unaligned data)
+  static inline u2   get_native_u2(address p)         { return *(u2*)p; }
+  static inline u4   get_native_u4(address p)         { return *(u4*)p; }
+  static inline u8   get_native_u8(address p)         { return *(u8*)p; }
+
+  static inline void put_native_u2(address p, u2 x)   { *(u2*)p = x; }
+  static inline void put_native_u4(address p, u4 x)   { *(u4*)p = x; }
+  static inline void put_native_u8(address p, u8 x)   { *(u8*)p = x; }
+
+
+  // Efficient reading and writing of unaligned unsigned data in Java
+  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
+  // needed since LoongArch64 CPUs use little-endian format.
+  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
+  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
+  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
+
+  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
+  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
+  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
+
+
+  // Efficient swapping of byte ordering
+  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
+  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
+  static inline u8   swap_u8(u8 x);
+};
+
+
+// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
+#ifdef TARGET_OS_ARCH_linux_loongarch
+# include "bytes_linux_loongarch.inline.hpp"
+#endif
+
+#endif // CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp
new file mode 100644
index 00000000000..5166acfa2b5
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_CodeStubs_loongarch_64.cpp
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "classfile/javaClasses.hpp"
+#include "nativeInst_loongarch.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_loongarch.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
+
+#define A0 RA0
+#define A3 RA3
+
+#define __ ce->masm()->
+
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  ce->store_parameter(_method->as_register(), 1);
+  ce->store_parameter(_bci, 0);
+  __ call(Runtime1::entry_for(Runtime1::counter_overflow_id), relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
+                               bool throw_index_out_of_bounds_exception)
+  : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception)
+  , _index(index)
+{
+  assert(info != NULL, "must have info");
+  _info = new CodeEmitInfo(info);
+}
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_info->deoptimize_on_exception()) {
+    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+    __ call(a, relocInfo::runtime_call_type);
+    ce->add_call_info_here(_info);
+    ce->verify_oop_map(_info);
+    debug_only(__ should_not_reach_here());
+    return;
+  }
+
+  if (_index->is_cpu_register()) {
+    __ move(SCR1, _index->as_register());
+  } else {
+    __ li(SCR1, _index->as_jint());
+  }
+  Runtime1::StubID stub_id;
+  if (_throw_index_out_of_bounds_exception) {
+    stub_id = Runtime1::throw_index_exception_id;
+  } else {
+    stub_id = Runtime1::throw_range_check_failed_id;
+  }
+  __ call(Runtime1::entry_for(stub_id), relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
+  _info = new CodeEmitInfo(info);
+}
+
+void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  __ call(a, relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+  if (_offset != -1) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  __ call(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+#ifdef ASSERT
+  __ should_not_reach_here();
+#endif
+}
+
+// Implementation of NewInstanceStub
+
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass,
+                                 CodeEmitInfo* info, Runtime1::StubID stub_id) {
+  _result = result;
+  _klass = klass;
+  _klass_reg = klass_reg;
+  _info = new CodeEmitInfo(info);
+  assert(stub_id == Runtime1::new_instance_id ||
+         stub_id == Runtime1::fast_new_instance_id ||
+         stub_id == Runtime1::fast_new_instance_init_check_id,
+         "need new_instance id");
+  _stub_id   = stub_id;
+}
+
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  __ move(A3, _klass_reg->as_register());
+  __ call(Runtime1::entry_for(_stub_id), relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == A0, "result must in A0");
+  __ b(_continuation);
+}
+
+// Implementation of NewTypeArrayStub
+
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result,
+                                   CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  assert(_length->as_register() == S0, "length must in S0,");
+  assert(_klass_reg->as_register() == A3, "klass_reg must in A3");
+  __ call(Runtime1::entry_for(Runtime1::new_type_array_id), relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == A0, "result must in A0");
+  __ b(_continuation);
+}
+
+// Implementation of NewObjectArrayStub
+
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result,
+                                       CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _result = result;
+  _length = length;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  assert(_length->as_register() == S0, "length must in S0,");
+  assert(_klass_reg->as_register() == A3, "klass_reg must in A3");
+  __ call(Runtime1::entry_for(Runtime1::new_object_array_id), relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  assert(_result->as_register() == A0, "result must in A0");
+  __ b(_continuation);
+}
+
+// Implementation of MonitorAccessStubs
+
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+  : MonitorAccessStub(obj_reg, lock_reg) {
+  _info = new CodeEmitInfo(info);
+}
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+  __ bind(_entry);
+  ce->store_parameter(_obj_reg->as_register(),  1);
+  ce->store_parameter(_lock_reg->as_register(), 0);
+  Runtime1::StubID enter_id;
+  if (ce->compilation()->has_fpu_code()) {
+    enter_id = Runtime1::monitorenter_id;
+  } else {
+    enter_id = Runtime1::monitorenter_nofpu_id;
+  }
+  __ call(Runtime1::entry_for(enter_id), relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_compute_lock) {
+    // lock_reg was destroyed by fast unlocking attempt => recompute it
+    ce->monitor_address(_monitor_ix, _lock_reg);
+  }
+  ce->store_parameter(_lock_reg->as_register(), 0);
+  // note: non-blocking leaf routine => no call info needed
+  Runtime1::StubID exit_id;
+  if (ce->compilation()->has_fpu_code()) {
+    exit_id = Runtime1::monitorexit_id;
+  } else {
+    exit_id = Runtime1::monitorexit_nofpu_id;
+  }
+  __ lipc(RA, _continuation);
+  __ jmp(Runtime1::entry_for(exit_id), relocInfo::runtime_call_type);
+}
+
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
+// - Replace original code with a call to the stub
+// At Runtime:
+// - call to stub, jump to runtime
+// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
+// - in runtime: after initializing class, restore original code, reexecute instruction
+
+int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
+
+void PatchingStub::align_patch_site(MacroAssembler* masm) {
+}
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+  assert(false, "LoongArch64 should not use C1 runtime patching");
+}
+
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  __ call(Runtime1::entry_for(Runtime1::deoptimize_id), relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  DEBUG_ONLY(__ should_not_reach_here());
+}
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+  address a;
+  if (_info->deoptimize_on_exception()) {
+    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
+    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  } else {
+    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+  }
+
+  ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  __ bind(_entry);
+  __ call(a, relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+  assert(__ rsp_offset() == 0, "frame size should be fixed");
+
+  __ bind(_entry);
+  // pass the object in a scratch register because all other registers
+  // must be preserved
+  if (_obj->is_cpu_register()) {
+    __ move(SCR1, _obj->as_register());
+  }
+  __ call(Runtime1::entry_for(_stub), relocInfo::runtime_call_type);
+  ce->add_call_info_here(_info);
+  debug_only(__ should_not_reach_here());
+}
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+  //---------------slow case: call to native-----------------
+  __ bind(_entry);
+  // Figure out where the args should go
+  // This should really convert the IntrinsicID to the Method* and signature
+  // but I don't know how to do that.
+  //
+  VMRegPair args[5];
+  BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
+  SharedRuntime::java_calling_convention(signature, args, 5, true);
+
+  // push parameters
+  // (src, src_pos, dest, destPos, length)
+  Register r[5];
+  r[0] = src()->as_register();
+  r[1] = src_pos()->as_register();
+  r[2] = dst()->as_register();
+  r[3] = dst_pos()->as_register();
+  r[4] = length()->as_register();
+
+  // next registers will get stored on the stack
+  for (int i = 0; i < 5 ; i++ ) {
+    VMReg r_1 = args[i].first();
+    if (r_1->is_stack()) {
+      int st_off = r_1->reg2stack() * wordSize;
+      __ stptr_d (r[i], SP, st_off);
+    } else {
+      assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
+    }
+  }
+
+  ce->align_call(lir_static_call);
+
+  ce->emit_static_call_stub();
+  if (ce->compilation()->bailed_out()) {
+    return; // CodeCache is full
+  }
+  AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(),
+                         relocInfo::static_call_type);
+  address call = __ trampoline_call(resolve);
+  if (call == NULL) {
+    ce->bailout("trampoline stub overflow");
+    return;
+  }
+  ce->add_call_info_here(info());
+
+#ifndef PRODUCT
+  __ li(SCR2, (address)&Runtime1::_arraycopy_slowcase_cnt);
+  __ increment(Address(SCR2));
+#endif
+
+  __ b(_continuation);
+}
+
+/////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+
+  __ bind(_entry);
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
+  __ beqz(pre_val_reg, _continuation);
+  ce->store_parameter(pre_val()->as_register(), 0);
+  __ call(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id), relocInfo::runtime_call_type);
+  __ b(_continuation);
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register new_val_reg = new_val()->as_register();
+  __ beqz(new_val_reg, _continuation);
+  ce->store_parameter(addr()->as_pointer_register(), 0);
+  __ call(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id), relocInfo::runtime_call_type);
+  __ b(_continuation);
+}
+
+#endif // INCLUDE_ALL_GCS
+/////////////////////////////////////////////////////////////////////////////
+
+#undef __
diff --git a/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp
new file mode 100644
index 00000000000..1140e44431d
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_Defs_loongarch.hpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
+#define CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
+
+// native word offsets from memory address (little endian)
+enum {
+  pd_lo_word_offset_in_bytes = 0,
+  pd_hi_word_offset_in_bytes = BytesPerWord
+};
+
+// explicit rounding operations are required to implement the strictFP mode
+enum {
+  pd_strict_fp_requires_explicit_rounding = false
+};
+
+// FIXME: There are no callee-saved
+
+// registers
+enum {
+  pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers,      // number of registers used during code emission
+  pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission
+
+  pd_nof_caller_save_cpu_regs_frame_map = 15, // number of registers killed by calls
+  pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls
+
+  pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map,
+  pd_last_callee_saved_reg = 21,
+
+  pd_last_allocatable_cpu_reg = pd_nof_caller_save_cpu_regs_frame_map - 1,
+
+  pd_nof_cpu_regs_reg_alloc = pd_nof_caller_save_cpu_regs_frame_map, // number of registers that are visible to register allocator
+  pd_nof_fpu_regs_reg_alloc = 32, // number of registers that are visible to register allocator
+
+  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
+  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan
+  pd_nof_xmm_regs_linearscan = 0,  // don't have vector registers
+  pd_first_cpu_reg = 0,
+  pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1,
+  pd_first_byte_reg = 0,
+  pd_last_byte_reg = pd_nof_cpu_regs_reg_alloc - 1,
+  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
+  pd_last_fpu_reg =  pd_first_fpu_reg + 31,
+
+  pd_first_callee_saved_fpu_reg = 24 + pd_first_fpu_reg,
+  pd_last_callee_saved_fpu_reg = 31 + pd_first_fpu_reg,
+};
+
+// Encoding of float value in debug info.  This is true on x86 where
+// floats are extended to doubles when stored in the stack, false for
+// LoongArch64 where floats and doubles are stored in their native form.
+enum {
+  pd_float_saved_as_double = false
+};
+
+#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp
new file mode 100644
index 00000000000..bd8578c72a8
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch.hpp
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
+#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
+
+// No FPU stack on LoongArch
+class FpuStackSim;
+
+#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp
new file mode 100644
index 00000000000..1a89c437a83
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_FpuStackSim_loongarch_64.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+//--------------------------------------------------------
+//               FpuStackSim
+//--------------------------------------------------------
+
+// No FPU stack on LoongArch64
+#include "precompiled.hpp"
diff --git a/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp
new file mode 100644
index 00000000000..4f0cf053617
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch.hpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
+#define CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
+
+//  On LoongArch64 the frame looks as follows:
+//
+//  +-----------------------------+---------+----------------------------------------+----------------+-----------
+//  | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
+//  +-----------------------------+---------+----------------------------------------+----------------+-----------
+
+ public:
+  static const int pd_c_runtime_reserved_arg_size;
+
+  enum {
+    first_available_sp_in_frame = 0,
+    frame_pad_in_bytes = 16,
+    nof_reg_args = 8
+  };
+
+ public:
+  static LIR_Opr receiver_opr;
+
+  static LIR_Opr r0_opr;
+  static LIR_Opr ra_opr;
+  static LIR_Opr tp_opr;
+  static LIR_Opr sp_opr;
+  static LIR_Opr a0_opr;
+  static LIR_Opr a1_opr;
+  static LIR_Opr a2_opr;
+  static LIR_Opr a3_opr;
+  static LIR_Opr a4_opr;
+  static LIR_Opr a5_opr;
+  static LIR_Opr a6_opr;
+  static LIR_Opr a7_opr;
+  static LIR_Opr t0_opr;
+  static LIR_Opr t1_opr;
+  static LIR_Opr t2_opr;
+  static LIR_Opr t3_opr;
+  static LIR_Opr t4_opr;
+  static LIR_Opr t5_opr;
+  static LIR_Opr t6_opr;
+  static LIR_Opr t7_opr;
+  static LIR_Opr t8_opr;
+  static LIR_Opr rx_opr;
+  static LIR_Opr fp_opr;
+  static LIR_Opr s0_opr;
+  static LIR_Opr s1_opr;
+  static LIR_Opr s2_opr;
+  static LIR_Opr s3_opr;
+  static LIR_Opr s4_opr;
+  static LIR_Opr s5_opr;
+  static LIR_Opr s6_opr;
+  static LIR_Opr s7_opr;
+  static LIR_Opr s8_opr;
+
+  static LIR_Opr ra_oop_opr;
+  static LIR_Opr a0_oop_opr;
+  static LIR_Opr a1_oop_opr;
+  static LIR_Opr a2_oop_opr;
+  static LIR_Opr a3_oop_opr;
+  static LIR_Opr a4_oop_opr;
+  static LIR_Opr a5_oop_opr;
+  static LIR_Opr a6_oop_opr;
+  static LIR_Opr a7_oop_opr;
+  static LIR_Opr t0_oop_opr;
+  static LIR_Opr t1_oop_opr;
+  static LIR_Opr t2_oop_opr;
+  static LIR_Opr t3_oop_opr;
+  static LIR_Opr t4_oop_opr;
+  static LIR_Opr t5_oop_opr;
+  static LIR_Opr t6_oop_opr;
+  static LIR_Opr t7_oop_opr;
+  static LIR_Opr t8_oop_opr;
+  static LIR_Opr fp_oop_opr;
+  static LIR_Opr s0_oop_opr;
+  static LIR_Opr s1_oop_opr;
+  static LIR_Opr s2_oop_opr;
+  static LIR_Opr s3_oop_opr;
+  static LIR_Opr s4_oop_opr;
+  static LIR_Opr s5_oop_opr;
+  static LIR_Opr s6_oop_opr;
+  static LIR_Opr s7_oop_opr;
+  static LIR_Opr s8_oop_opr;
+
+  static LIR_Opr scr1_opr;
+  static LIR_Opr scr2_opr;
+  static LIR_Opr scr1_long_opr;
+  static LIR_Opr scr2_long_opr;
+
+  static LIR_Opr a0_metadata_opr;
+  static LIR_Opr a1_metadata_opr;
+  static LIR_Opr a2_metadata_opr;
+  static LIR_Opr a3_metadata_opr;
+  static LIR_Opr a4_metadata_opr;
+  static LIR_Opr a5_metadata_opr;
+
+  static LIR_Opr long0_opr;
+  static LIR_Opr long1_opr;
+  static LIR_Opr fpu0_float_opr;
+  static LIR_Opr fpu0_double_opr;
+
+  static LIR_Opr as_long_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+
+  // VMReg name for spilled physical FPU stack slot n
+  static VMReg fpu_regname (int n);
+
+  static bool is_caller_save_register(LIR_Opr opr) { return true; }
+  static bool is_caller_save_register(Register r) { return true; }
+
+  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
+  static int last_cpu_reg() { return pd_last_cpu_reg;  }
+  static int last_byte_reg() { return pd_last_byte_reg; }
+
+#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp
new file mode 100644
index 00000000000..25c90bcf98f
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_FrameMap_loongarch_64.cpp
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_loongarch.inline.hpp"
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
+  LIR_Opr opr = LIR_OprFact::illegalOpr;
+  VMReg r_1 = reg->first();
+  VMReg r_2 = reg->second();
+  if (r_1->is_stack()) {
+    // Convert stack slot to an SP offset
+    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
+    // so we must add it in here.
+    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+    opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
+  } else if (r_1->is_Register()) {
+    Register reg = r_1->as_Register();
+    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
+      Register reg2 = r_2->as_Register();
+      assert(reg2 == reg, "must be same register");
+      opr = as_long_opr(reg);
+    } else if (is_reference_type(type)) {
+      opr = as_oop_opr(reg);
+    } else if (type == T_METADATA) {
+      opr = as_metadata_opr(reg);
+    } else if (type == T_ADDRESS) {
+      opr = as_address_opr(reg);
+    } else {
+      opr = as_opr(reg);
+    }
+  } else if (r_1->is_FloatRegister()) {
+    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
+    int num = r_1->as_FloatRegister()->encoding();
+    if (type == T_FLOAT) {
+      opr = LIR_OprFact::single_fpu(num);
+    } else {
+      opr = LIR_OprFact::double_fpu(num);
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+  return opr;
+}
+
+LIR_Opr FrameMap::r0_opr;
+LIR_Opr FrameMap::ra_opr;
+LIR_Opr FrameMap::tp_opr;
+LIR_Opr FrameMap::sp_opr;
+LIR_Opr FrameMap::a0_opr;
+LIR_Opr FrameMap::a1_opr;
+LIR_Opr FrameMap::a2_opr;
+LIR_Opr FrameMap::a3_opr;
+LIR_Opr FrameMap::a4_opr;
+LIR_Opr FrameMap::a5_opr;
+LIR_Opr FrameMap::a6_opr;
+LIR_Opr FrameMap::a7_opr;
+LIR_Opr FrameMap::t0_opr;
+LIR_Opr FrameMap::t1_opr;
+LIR_Opr FrameMap::t2_opr;
+LIR_Opr FrameMap::t3_opr;
+LIR_Opr FrameMap::t4_opr;
+LIR_Opr FrameMap::t5_opr;
+LIR_Opr FrameMap::t6_opr;
+LIR_Opr FrameMap::t7_opr;
+LIR_Opr FrameMap::t8_opr;
+LIR_Opr FrameMap::rx_opr;
+LIR_Opr FrameMap::fp_opr;
+LIR_Opr FrameMap::s0_opr;
+LIR_Opr FrameMap::s1_opr;
+LIR_Opr FrameMap::s2_opr;
+LIR_Opr FrameMap::s3_opr;
+LIR_Opr FrameMap::s4_opr;
+LIR_Opr FrameMap::s5_opr;
+LIR_Opr FrameMap::s6_opr;
+LIR_Opr FrameMap::s7_opr;
+LIR_Opr FrameMap::s8_opr;
+
+LIR_Opr FrameMap::receiver_opr;
+
+LIR_Opr FrameMap::ra_oop_opr;
+LIR_Opr FrameMap::a0_oop_opr;
+LIR_Opr FrameMap::a1_oop_opr;
+LIR_Opr FrameMap::a2_oop_opr;
+LIR_Opr FrameMap::a3_oop_opr;
+LIR_Opr FrameMap::a4_oop_opr;
+LIR_Opr FrameMap::a5_oop_opr;
+LIR_Opr FrameMap::a6_oop_opr;
+LIR_Opr FrameMap::a7_oop_opr;
+LIR_Opr FrameMap::t0_oop_opr;
+LIR_Opr FrameMap::t1_oop_opr;
+LIR_Opr FrameMap::t2_oop_opr;
+LIR_Opr FrameMap::t3_oop_opr;
+LIR_Opr FrameMap::t4_oop_opr;
+LIR_Opr FrameMap::t5_oop_opr;
+LIR_Opr FrameMap::t6_oop_opr;
+LIR_Opr FrameMap::t7_oop_opr;
+LIR_Opr FrameMap::t8_oop_opr;
+LIR_Opr FrameMap::fp_oop_opr;
+LIR_Opr FrameMap::s0_oop_opr;
+LIR_Opr FrameMap::s1_oop_opr;
+LIR_Opr FrameMap::s2_oop_opr;
+LIR_Opr FrameMap::s3_oop_opr;
+LIR_Opr FrameMap::s4_oop_opr;
+LIR_Opr FrameMap::s5_oop_opr;
+LIR_Opr FrameMap::s6_oop_opr;
+LIR_Opr FrameMap::s7_oop_opr;
+LIR_Opr FrameMap::s8_oop_opr;
+
+LIR_Opr FrameMap::scr1_opr;
+LIR_Opr FrameMap::scr2_opr;
+LIR_Opr FrameMap::scr1_long_opr;
+LIR_Opr FrameMap::scr2_long_opr;
+
+LIR_Opr FrameMap::a0_metadata_opr;
+LIR_Opr FrameMap::a1_metadata_opr;
+LIR_Opr FrameMap::a2_metadata_opr;
+LIR_Opr FrameMap::a3_metadata_opr;
+LIR_Opr FrameMap::a4_metadata_opr;
+LIR_Opr FrameMap::a5_metadata_opr;
+
+LIR_Opr FrameMap::long0_opr;
+LIR_Opr FrameMap::long1_opr;
+LIR_Opr FrameMap::fpu0_float_opr;
+LIR_Opr FrameMap::fpu0_double_opr;
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0 };
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0 };
+
+//--------------------------------------------------------
+//               FrameMap
+//--------------------------------------------------------
+
+void FrameMap::initialize() {
+  assert(!_init_done, "once");
+  int i = 0;
+
+  // caller save register
+  map_register(i, A0); a0_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, A1); a1_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, A2); a2_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, A3); a3_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, A4); a4_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, A5); a5_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, A6); a6_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, A7); a7_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, T0); t0_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, T1); t1_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, T2); t2_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, T3); t3_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, T5); t5_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, T6); t6_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, T8); t8_opr = LIR_OprFact::single_cpu(i); i++;
+
+  // callee save register
+  map_register(i, S0); s0_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, S1); s1_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, S2); s2_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, S3); s3_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, S4); s4_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, S7); s7_opr = LIR_OprFact::single_cpu(i); i++;
+  map_register(i, S8); s8_opr = LIR_OprFact::single_cpu(i); i++;
+
+  // special register
+  map_register(i, S5); s5_opr = LIR_OprFact::single_cpu(i); i++; // heapbase
+  map_register(i, S6); s6_opr = LIR_OprFact::single_cpu(i); i++; // thread
+  map_register(i, TP); tp_opr = LIR_OprFact::single_cpu(i); i++; // tp
+  map_register(i, FP); fp_opr = LIR_OprFact::single_cpu(i); i++; // fp
+  map_register(i, RA); ra_opr = LIR_OprFact::single_cpu(i); i++; // ra
+  map_register(i, SP); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp
+
+  // tmp register
+  map_register(i, T7); t7_opr = LIR_OprFact::single_cpu(i); i++; // scr1
+  map_register(i, T4); t4_opr = LIR_OprFact::single_cpu(i); i++; // scr2
+
+  scr1_opr = t7_opr;
+  scr2_opr = t4_opr;
+  scr1_long_opr = LIR_OprFact::double_cpu(t7_opr->cpu_regnr(), t7_opr->cpu_regnr());
+  scr2_long_opr = LIR_OprFact::double_cpu(t4_opr->cpu_regnr(), t4_opr->cpu_regnr());
+
+  long0_opr = LIR_OprFact::double_cpu(a0_opr->cpu_regnr(), a0_opr->cpu_regnr());
+  long1_opr = LIR_OprFact::double_cpu(a1_opr->cpu_regnr(), a1_opr->cpu_regnr());
+
+  fpu0_float_opr   = LIR_OprFact::single_fpu(0);
+  fpu0_double_opr  = LIR_OprFact::double_fpu(0);
+
+  // scr1, scr2 not included
+  _caller_save_cpu_regs[0] = a0_opr;
+  _caller_save_cpu_regs[1] = a1_opr;
+  _caller_save_cpu_regs[2] = a2_opr;
+  _caller_save_cpu_regs[3] = a3_opr;
+  _caller_save_cpu_regs[4] = a4_opr;
+  _caller_save_cpu_regs[5] = a5_opr;
+  _caller_save_cpu_regs[6] = a6_opr;
+  _caller_save_cpu_regs[7] = a7_opr;
+  _caller_save_cpu_regs[8] = t0_opr;
+  _caller_save_cpu_regs[9] = t1_opr;
+  _caller_save_cpu_regs[10] = t2_opr;
+  _caller_save_cpu_regs[11] = t3_opr;
+  _caller_save_cpu_regs[12] = t5_opr;
+  _caller_save_cpu_regs[13] = t6_opr;
+  _caller_save_cpu_regs[14] = t8_opr;
+
+  for (int i = 0; i < 8; i++) {
+    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+  }
+
+  _init_done = true;
+
+  ra_oop_opr = as_oop_opr(RA);
+  a0_oop_opr = as_oop_opr(A0);
+  a1_oop_opr = as_oop_opr(A1);
+  a2_oop_opr = as_oop_opr(A2);
+  a3_oop_opr = as_oop_opr(A3);
+  a4_oop_opr = as_oop_opr(A4);
+  a5_oop_opr = as_oop_opr(A5);
+  a6_oop_opr = as_oop_opr(A6);
+  a7_oop_opr = as_oop_opr(A7);
+  t0_oop_opr = as_oop_opr(T0);
+  t1_oop_opr = as_oop_opr(T1);
+  t2_oop_opr = as_oop_opr(T2);
+  t3_oop_opr = as_oop_opr(T3);
+  t4_oop_opr = as_oop_opr(T4);
+  t5_oop_opr = as_oop_opr(T5);
+  t6_oop_opr = as_oop_opr(T6);
+  t7_oop_opr = as_oop_opr(T7);
+  t8_oop_opr = as_oop_opr(T8);
+  fp_oop_opr = as_oop_opr(FP);
+  s0_oop_opr = as_oop_opr(S0);
+  s1_oop_opr = as_oop_opr(S1);
+  s2_oop_opr = as_oop_opr(S2);
+  s3_oop_opr = as_oop_opr(S3);
+  s4_oop_opr = as_oop_opr(S4);
+  s5_oop_opr = as_oop_opr(S5);
+  s6_oop_opr = as_oop_opr(S6);
+  s7_oop_opr = as_oop_opr(S7);
+  s8_oop_opr = as_oop_opr(S8);
+
+  a0_metadata_opr = as_metadata_opr(A0);
+  a1_metadata_opr = as_metadata_opr(A1);
+  a2_metadata_opr = as_metadata_opr(A2);
+  a3_metadata_opr = as_metadata_opr(A3);
+  a4_metadata_opr = as_metadata_opr(A4);
+  a5_metadata_opr = as_metadata_opr(A5);
+
+  sp_opr = as_pointer_opr(SP);
+  fp_opr = as_pointer_opr(FP);
+
+  VMRegPair regs;
+  BasicType sig_bt = T_OBJECT;
+  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
+  receiver_opr = as_oop_opr(regs.first()->as_Register());
+
+  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
+    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+  }
+}
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+  // for sp, based address use this:
+  // return Address(sp, in_bytes(sp_offset) - (framesize() - 2) * 4);
+  return Address(SP, in_bytes(sp_offset));
+}
+
+// ----------------mapping-----------------------
+// all mapping is based on fp addressing, except for simple leaf methods where we access
+// the locals sp based (and no frame is built)
+
+// Frame for simple leaf methods (quick entries)
+//
+//   +----------+
+//   | ret addr |   <- TOS
+//   +----------+
+//   | args     |
+//   | ......   |
+
+// Frame for standard methods
+//
+//   | .........|  <- TOS
+//   | locals   |
+//   +----------+
+//   |  old fp, |  <- RFP
+//   +----------+
+//   | ret addr |
+//   +----------+
+//   |  args    |
+//   | .........|
+
+// For OopMaps, map a local variable or spill index to an VMRegImpl name.
+// This is the offset from sp() in the frame of the slot for the index,
+// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
+//
+//           framesize +
+//           stack0         stack0          0  <- VMReg
+//             |              | <registers> |
+//  ...........|..............|.............|
+//      0 1 2 3 x x 4 5 6 ... |                <- local indices
+//      ^           ^        sp()                 ( x x indicate link
+//      |           |                               and return addr)
+//  arguments   non-argument locals
+
+VMReg FrameMap::fpu_regname(int n) {
+  // Return the OptoReg name for the fpu stack slot "n"
+  // A spilled fpu stack slot comprises to two single-word OptoReg's.
+  return as_FloatRegister(n)->as_VMReg();
+}
+
+LIR_Opr FrameMap::stack_pointer() {
+  return FrameMap::sp_opr;
+}
+
+// JSR 292
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
+  return LIR_OprFact::illegalOpr;  // Not needed on LoongArch64
+}
+
+bool FrameMap::validate_frame() {
+  return true;
+}
diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp
new file mode 100644
index 00000000000..38b0daa0257
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch.hpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
+#define CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
+
+// ArrayCopyStub needs access to bailout
+friend class ArrayCopyStub;
+
+ private:
+  int array_element_size(BasicType type) const;
+
+  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index,
+                                int dest_index, bool pop_fpu_stack);
+
+  // helper functions which checks for overflow and sets bailout if it
+  // occurs.  Always returns a valid embeddable pointer but in the
+  // bailout case the pointer won't be to unique storage.
+  address float_constant(float f);
+  address double_constant(double d);
+
+  address int_constant(jlong n);
+
+  bool is_literal_address(LIR_Address* addr);
+
+  // Ensure we have a valid Address (base+offset) to a stack-slot.
+  Address stack_slot_address(int index, uint shift, int adjust = 0);
+
+  // Record the type of the receiver in ReceiverTypeData
+  void type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
+                           Register recv, Label* update_done);
+  void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
+
+  void casw(Register addr, Register newval, Register cmpval, bool sign);
+  void casl(Register addr, Register newval, Register cmpval);
+
+  void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL);
+
+  static const int max_tableswitches = 20;
+  struct tableswitch switches[max_tableswitches];
+  int tableswitch_count;
+
+  void init() { tableswitch_count = 0; }
+
+  void deoptimize_trap(CodeEmitInfo *info);
+
+public:
+  void store_parameter(Register r, int offset_from_sp_in_words);
+  void store_parameter(jint c,     int offset_from_sp_in_words);
+  void store_parameter(jobject c,  int offset_from_sp_in_words);
+
+  enum {
+    // call stub: CompiledStaticCall::to_interp_stub_size() +
+    //            NativeInstruction::nop_instruction_size   +
+    //            NativeCallTrampolineStub::instruction_size
+    call_stub_size = 13 * NativeInstruction::nop_instruction_size,
+    exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
+    deopt_handler_size = 7 * NativeInstruction::nop_instruction_size
+  };
+
+#endif // CPU_LOONGARCH_C1_LIRASSEMBLER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp
new file mode 100644
index 00000000000..ee48326becf
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_LIRAssembler_loongarch_64.cpp
@@ -0,0 +1,3377 @@
+/*
+ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "asm/assembler.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArrayKlass.hpp"
+#include "ci/ciInstance.hpp"
+#include "code/compiledIC.hpp"
+#include "nativeInst_loongarch.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_loongarch.inline.hpp"
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+#ifndef PRODUCT
+#define COMMENT(x) do { __ block_comment(x); } while (0)
+#else
+#define COMMENT(x)
+#endif
+
+NEEDS_CLEANUP // remove this definitions?
+
+#define __ _masm->
+
+static void select_different_registers(Register preserve, Register extra,
+                                       Register &tmp1, Register &tmp2) {
+  if (tmp1 == preserve) {
+    assert_different_registers(tmp1, tmp2, extra);
+    tmp1 = extra;
+  } else if (tmp2 == preserve) {
+    assert_different_registers(tmp1, tmp2, extra);
+    tmp2 = extra;
+  }
+  assert_different_registers(preserve, tmp1, tmp2);
+}
+
+static void select_different_registers(Register preserve, Register extra,
+                                       Register &tmp1, Register &tmp2,
+                                       Register &tmp3) {
+  if (tmp1 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp1 = extra;
+  } else if (tmp2 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp2 = extra;
+  } else if (tmp3 == preserve) {
+    assert_different_registers(tmp1, tmp2, tmp3, extra);
+    tmp3 = extra;
+  }
+  assert_different_registers(preserve, tmp1, tmp2, tmp3);
+}
+
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
+
+LIR_Opr LIR_Assembler::receiverOpr() {
+  return FrameMap::receiver_opr;
+}
+
+LIR_Opr LIR_Assembler::osrBufferPointer() {
+  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
+}
+
+//--------------fpu register translations-----------------------
+
+address LIR_Assembler::float_constant(float f) {
+  address const_addr = __ float_constant(f);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+address LIR_Assembler::double_constant(double d) {
+  address const_addr = __ double_constant(d);
+  if (const_addr == NULL) {
+    bailout("const section overflow");
+    return __ code()->consts()->start();
+  } else {
+    return const_addr;
+  }
+}
+
+void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
+
+void LIR_Assembler::reset_FPU() { Unimplemented(); }
+
+void LIR_Assembler::fpop() { Unimplemented(); }
+
+void LIR_Assembler::fxch(int i) { Unimplemented(); }
+
+void LIR_Assembler::fld(int i) { Unimplemented(); }
+
+void LIR_Assembler::ffree(int i) { Unimplemented(); }
+
+void LIR_Assembler::breakpoint() { Unimplemented(); }
+
+void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
+
+void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
+
+bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; }
+
+static Register as_reg(LIR_Opr op) {
+  return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
+}
+
+static jlong as_long(LIR_Opr data) {
+  jlong result;
+  switch (data->type()) {
+  case T_INT:
+    result = (data->as_jint());
+    break;
+  case T_LONG:
+    result = (data->as_jlong());
+    break;
+  default:
+    ShouldNotReachHere();
+    result = 0; // unreachable
+  }
+  return result;
+}
+
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
+  Register base = addr->base()->as_pointer_register();
+  LIR_Opr opr = addr->index();
+  if (opr->is_cpu_register()) {
+    Register index;
+    if (opr->is_single_cpu())
+      index = opr->as_register();
+    else
+      index = opr->as_register_lo();
+    assert(addr->disp() == 0, "must be");
+    return Address(base, index, Address::ScaleFactor(addr->scale()));
+  } else {
+    assert(addr->scale() == 0, "must be");
+    return Address(base, addr->disp());
+  }
+  return Address();
+}
+
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
+  ShouldNotReachHere();
+  return Address();
+}
+
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
+  return as_Address(addr); // Ouch
+  // FIXME: This needs to be much more clever. See x86.
+}
+
+// Ensure a valid Address (base + offset) to a stack-slot. If stack access is
+// not encodable as a base + (immediate) offset, generate an explicit address
+// calculation to hold the address in a temporary register.
+Address LIR_Assembler::stack_slot_address(int index, uint size, int adjust) {
+  precond(size == 4 || size == 8);
+  Address addr = frame_map()->address_for_slot(index, adjust);
+  precond(addr.index() == noreg);
+  precond(addr.base() == SP);
+  precond(addr.disp() > 0);
+  uint mask = size - 1;
+  assert((addr.disp() & mask) == 0, "scaled offsets only");
+  return addr;
+}
+
+void LIR_Assembler::osr_entry() {
+  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
+  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
+  ValueStack* entry_state = osr_entry->state();
+  int number_of_locks = entry_state->locks_size();
+
+  // we jump here if osr happens with the interpreter
+  // state set up to continue at the beginning of the
+  // loop that triggered osr - in particular, we have
+  // the following registers setup:
+  //
+  // A2: osr buffer
+  //
+
+  // build frame
+  ciMethod* m = compilation()->method();
+  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+
+  // OSR buffer is
+  //
+  // locals[nlocals-1..0]
+  // monitors[0..number_of_locks]
+  //
+  // locals is a direct copy of the interpreter frame so in the osr buffer
+  // so first slot in the local array is the last local from the interpreter
+  // and last slot is local[0] (receiver) from the interpreter
+  //
+  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
+  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
+  // in the interpreter frame (the method lock if a sync method)
+
+  // Initialize monitors in the compiled activation.
+  //   A2: pointer to osr buffer
+  //
+  // All other registers are dead at this point and the locals will be
+  // copied into place by code emitted in the IR.
+
+  Register OSR_buf = osrBufferPointer()->as_pointer_register();
+  {
+    assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
+    int monitor_offset = BytesPerWord * method()->max_locals() + (2 * BytesPerWord) * (number_of_locks - 1);
+    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
+    // the OSR buffer using 2 word entries: first the lock and then
+    // the oop.
+    for (int i = 0; i < number_of_locks; i++) {
+      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
+#ifdef ASSERT
+      // verify the interpreter's monitor has a non-null object
+      {
+        Label L;
+        __ ld_ptr(SCR1, Address(OSR_buf, slot_offset + 1 * BytesPerWord));
+        __ bnez(SCR1, L);
+        __ stop("locked object is NULL");
+        __ bind(L);
+      }
+#endif
+      __ ld_ptr(S0, Address(OSR_buf, slot_offset + 0));
+      __ st_ptr(S0, frame_map()->address_for_monitor_lock(i));
+      __ ld_ptr(S0, Address(OSR_buf, slot_offset + 1*BytesPerWord));
+      __ st_ptr(S0, frame_map()->address_for_monitor_object(i));
+    }
+  }
+}
+
+// inline cache check; done before the frame is built.
+int LIR_Assembler::check_icache() {
+  Register receiver = FrameMap::receiver_opr->as_register();
+  Register ic_klass = IC_Klass;
+  int start_offset = __ offset();
+  Label dont;
+
+  __ verify_oop(receiver);
+
+  // explicit NULL check not needed since load from [klass_offset] causes a trap
+  // check against inline cache
+  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()),
+         "must add explicit null check");
+
+  __ load_klass(SCR2, receiver);
+  __ beq(SCR2, ic_klass, dont);
+
+  // if icache check fails, then jump to runtime routine
+  // Note: RECEIVER must still contain the receiver!
+  __ jmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
+
+  // We align the verified entry point unless the method body
+  // (including its inline cache check) will fit in a single 64-byte
+  // icache line.
+  if (!method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
+    // force alignment after the cache check.
+    __ align(CodeEntryAlignment);
+  }
+
+  __ bind(dont);
+  return start_offset;
+}
+
+void LIR_Assembler::jobject2reg(jobject o, Register reg) {
+  if (o == NULL) {
+    __ move(reg, R0);
+  } else {
+    int oop_index = __ oop_recorder()->find_index(o);
+    RelocationHolder rspec = oop_Relocation::spec(oop_index);
+    __ relocate(rspec);
+    __ patchable_li52(reg, (long)o);
+  }
+}
+
+void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
+  address target = NULL;
+
+  switch (patching_id(info)) {
+  case PatchingStub::access_field_id:
+    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
+    break;
+  case PatchingStub::load_klass_id:
+    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+    break;
+  case PatchingStub::load_mirror_id:
+    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+    break;
+  case PatchingStub::load_appendix_id:
+    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+    break;
+  default: ShouldNotReachHere();
+  }
+
+  __ call(target, relocInfo::runtime_call_type);
+  add_call_info_here(info);
+}
+
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
+  deoptimize_trap(info);
+}
+
+// This specifies the rsp decrement needed to build the frame
+int LIR_Assembler::initial_frame_size_in_bytes() const {
+  // if rounding, must let FrameMap know!
+  return in_bytes(frame_map()->framesize_in_bytes());
+}
+
+int LIR_Assembler::emit_exception_handler() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+
+  // generate code for exception handler
+  address handler_base = __ start_a_stub(exception_handler_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("exception handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+
+  // the exception oop and pc are in A0, and A1
+  // no other registers need to be preserved, so invalidate them
+  __ invalidate_registers(false, true, true, true, true, true);
+
+  // check that there is really an exception
+  __ verify_not_null_oop(A0);
+
+  // search an exception handler (A0: exception oop, A1: throwing pc)
+  __ call(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id), relocInfo::runtime_call_type);
+  __ should_not_reach_here();
+  guarantee(code_offset() - offset <= exception_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+// Emit the code to remove the frame from the stack in the exception unwind path.
+int LIR_Assembler::emit_unwind_handler() {
+#ifndef PRODUCT
+  if (CommentedAssembly) {
+    _masm->block_comment("Unwind handler");
+  }
+#endif
+
+  int offset = code_offset();
+
+  // Fetch the exception from TLS and clear out exception related thread state
+  __ ld_ptr(A0, Address(TREG, JavaThread::exception_oop_offset()));
+  __ st_ptr(R0, Address(TREG, JavaThread::exception_oop_offset()));
+  __ st_ptr(R0, Address(TREG, JavaThread::exception_pc_offset()));
+
+  __ bind(_unwind_handler_entry);
+  __ verify_not_null_oop(V0);
+  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+    __ move(S0, V0);  // Preserve the exception
+  }
+
+  // Perform needed unlocking
+  MonitorExitStub* stub = NULL;
+  if (method()->is_synchronized()) {
+    monitor_address(0, FrameMap::a0_opr);
+    stub = new MonitorExitStub(FrameMap::a0_opr, true, 0);
+    __ unlock_object(A5, A4, A0, *stub->entry());
+    __ bind(*stub->continuation());
+  }
+
+  if (compilation()->env()->dtrace_method_probes()) {
+    __ mov_metadata(A1, method()->constant_encoding());
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), TREG, A1);
+  }
+
+  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
+    __ move(A0, S0);  // Restore the exception
+  }
+
+  // remove the activation and dispatch to the unwind handler
+  __ block_comment("remove_frame and dispatch to the unwind handler");
+  __ remove_frame(initial_frame_size_in_bytes());
+  __ jmp(Runtime1::entry_for(Runtime1::unwind_exception_id), relocInfo::runtime_call_type);
+
+  // Emit the slow path assembly
+  if (stub != NULL) {
+    stub->emit_code(this);
+  }
+
+  return offset;
+}
+
+int LIR_Assembler::emit_deopt_handler() {
+  // if the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri)
+  __ nop();
+
+  // generate code for exception handler
+  address handler_base = __ start_a_stub(deopt_handler_size);
+  if (handler_base == NULL) {
+    // not enough space left for the handler
+    bailout("deopt handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+
+  __ call(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type);
+  guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
+  _masm->code_section()->relocate(adr, relocInfo::poll_type);
+  int pc_offset = code_offset();
+  flush_debug_info(pc_offset);
+  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
+  if (info->exception_handlers() != NULL) {
+    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
+  }
+}
+
+void LIR_Assembler::return_op(LIR_Opr result) {
+  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == V0,
+         "word returns are in V0,");
+
+  // Pop the stack before the safepoint code
+  __ remove_frame(initial_frame_size_in_bytes());
+
+  __ li(SCR2, os::get_polling_page());
+  __ relocate(relocInfo::poll_return_type);
+  __ ld_w(SCR1, SCR2, 0);
+  __ jr(RA);
+}
+
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
+  guarantee(info != NULL, "Shouldn't be NULL");
+  __ li(SCR2, os::get_polling_page());
+  add_debug_info_for_branch(info); // This isn't just debug info: it's the oop map
+  __ relocate(relocInfo::poll_type);
+  __ ld_w(SCR1, SCR2, 0);
+  return __ offset();
+}
+
+void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
+  __ move(to_reg, from_reg);
+}
+
+void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); }
+
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+  assert(src->is_constant(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+
+  switch (c->type()) {
+    case T_INT:
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ li(dest->as_register(), c->as_jint());
+      break;
+    case T_ADDRESS:
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ li(dest->as_register(), c->as_jint());
+      break;
+    case T_LONG:
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ li(dest->as_register_lo(), (intptr_t)c->as_jlong());
+      break;
+    case T_OBJECT:
+      if (patch_code == lir_patch_none) {
+        jobject2reg(c->as_jobject(), dest->as_register());
+      } else {
+        jobject2reg_with_patching(dest->as_register(), info);
+      }
+      break;
+    case T_METADATA:
+      if (patch_code != lir_patch_none) {
+        klass2reg_with_patching(dest->as_register(), info);
+      } else {
+        __ mov_metadata(dest->as_register(), c->as_metadata());
+      }
+      break;
+    case T_FLOAT:
+      __ relocate(relocInfo::internal_word_type);
+      __ patchable_li52(SCR1, (jlong) float_constant(c->as_jfloat()));
+      __ fld_s(dest->as_float_reg(), SCR1, 0);
+      break;
+    case T_DOUBLE:
+      __ relocate(relocInfo::internal_word_type);
+      __ patchable_li52(SCR1, (jlong) double_constant(c->as_jdouble()));
+      __ fld_d(dest->as_double_reg(), SCR1, 0);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
+  LIR_Const* c = src->as_constant_ptr();
+  switch (c->type()) {
+  case T_OBJECT:
+    if (!c->as_jobject())
+      __ st_ptr(R0, frame_map()->address_for_slot(dest->single_stack_ix()));
+    else {
+      const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL);
+      reg2stack(FrameMap::scr1_opr, dest, c->type(), false);
+    }
+    break;
+  case T_ADDRESS:
+    const2reg(src, FrameMap::scr1_opr, lir_patch_none, NULL);
+    reg2stack(FrameMap::scr1_opr, dest, c->type(), false);
+  case T_INT:
+  case T_FLOAT:
+    if (c->as_jint_bits() == 0)
+      __ st_w(R0, frame_map()->address_for_slot(dest->single_stack_ix()));
+    else {
+      __ li(SCR2, c->as_jint_bits());
+      __ st_w(SCR2, frame_map()->address_for_slot(dest->single_stack_ix()));
+    }
+    break;
+  case T_LONG:
+  case T_DOUBLE:
+    if (c->as_jlong_bits() == 0)
+      __ st_ptr(R0, frame_map()->address_for_slot(dest->double_stack_ix(),
+                lo_word_offset_in_bytes));
+    else {
+      __ li(SCR2, (intptr_t)c->as_jlong_bits());
+      __ st_ptr(SCR2, frame_map()->address_for_slot(dest->double_stack_ix(),
+                lo_word_offset_in_bytes));
+    }
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type,
+                              CodeEmitInfo* info, bool wide) {
+  assert(src->is_constant(), "should not call otherwise");
+  LIR_Const* c = src->as_constant_ptr();
+  LIR_Address* to_addr = dest->as_address_ptr();
+
+  void (Assembler::* insn)(Register Rt, Address adr);
+
+  switch (type) {
+  case T_ADDRESS:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::st_d;
+    break;
+  case T_LONG:
+    assert(c->as_jlong() == 0, "should be");
+    insn = &Assembler::st_d;
+    break;
+  case T_INT:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::st_w;
+    break;
+  case T_OBJECT:
+  case T_ARRAY:
+    assert(c->as_jobject() == 0, "should be");
+    if (UseCompressedOops && !wide) {
+      insn = &Assembler::st_w;
+    } else {
+      insn = &Assembler::st_d;
+    }
+    break;
+  case T_CHAR:
+  case T_SHORT:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::st_h;
+    break;
+  case T_BOOLEAN:
+  case T_BYTE:
+    assert(c->as_jint() == 0, "should be");
+    insn = &Assembler::st_b;
+    break;
+  default:
+    ShouldNotReachHere();
+    insn = &Assembler::st_d;  // unreachable
+  }
+
+  if (info) add_debug_info_for_null_check_here(info);
+  (_masm->*insn)(R0, as_Address(to_addr));
+}
+
+void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
+  assert(src->is_register(), "should not call otherwise");
+  assert(dest->is_register(), "should not call otherwise");
+
+  // move between cpu-registers
+  if (dest->is_single_cpu()) {
+    if (src->type() == T_LONG) {
+      // Can do LONG -> OBJECT
+      move_regs(src->as_register_lo(), dest->as_register());
+      return;
+    }
+    assert(src->is_single_cpu(), "must match");
+    if (src->type() == T_OBJECT) {
+      __ verify_oop(src->as_register());
+    }
+    move_regs(src->as_register(), dest->as_register());
+  } else if (dest->is_double_cpu()) {
+    if (is_reference_type(src->type())) {
+      // Surprising to me but we can see move of a long to t_object
+      __ verify_oop(src->as_register());
+      move_regs(src->as_register(), dest->as_register_lo());
+      return;
+    }
+    assert(src->is_double_cpu(), "must match");
+    Register f_lo = src->as_register_lo();
+    Register f_hi = src->as_register_hi();
+    Register t_lo = dest->as_register_lo();
+    Register t_hi = dest->as_register_hi();
+    assert(f_hi == f_lo, "must be same");
+    assert(t_hi == t_lo, "must be same");
+    move_regs(f_lo, t_lo);
+  } else if (dest->is_single_fpu()) {
+    __ fmov_s(dest->as_float_reg(), src->as_float_reg());
+  } else if (dest->is_double_fpu()) {
+    __ fmov_d(dest->as_double_reg(), src->as_double_reg());
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
+  precond(src->is_register() && dest->is_stack());
+
+  uint const c_sz32 = sizeof(uint32_t);
+  uint const c_sz64 = sizeof(uint64_t);
+
+  if (src->is_single_cpu()) {
+    int index = dest->single_stack_ix();
+    if (is_reference_type(type)) {
+      __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64));
+      __ verify_oop(src->as_register());
+    } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) {
+      __ st_ptr(src->as_register(), stack_slot_address(index, c_sz64));
+    } else {
+      __ st_w(src->as_register(), stack_slot_address(index, c_sz32));
+    }
+  } else if (src->is_double_cpu()) {
+    int index = dest->double_stack_ix();
+    Address dest_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
+    __ st_ptr(src->as_register_lo(), dest_addr_LO);
+  } else if (src->is_single_fpu()) {
+    int index = dest->single_stack_ix();
+    __ fst_s(src->as_float_reg(), stack_slot_address(index, c_sz32));
+  } else if (src->is_double_fpu()) {
+    int index = dest->double_stack_ix();
+    __ fst_d(src->as_double_reg(), stack_slot_address(index, c_sz64));
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code,
+                            CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
+  LIR_Address* to_addr = dest->as_address_ptr();
+  PatchingStub* patch = NULL;
+  Register compressed_src = SCR2;
+
+  if (patch_code != lir_patch_none) {
+    deoptimize_trap(info);
+    return;
+  }
+
+  if (is_reference_type(type)) {
+    __ verify_oop(src->as_register());
+
+    if (UseCompressedOops && !wide) {
+      __ encode_heap_oop(compressed_src, src->as_register());
+    } else {
+      compressed_src = src->as_register();
+    }
+  }
+
+  int null_check_here = code_offset();
+  switch (type) {
+    case T_FLOAT:
+      __ fst_s(src->as_float_reg(), as_Address(to_addr));
+      break;
+    case T_DOUBLE:
+      __ fst_d(src->as_double_reg(), as_Address(to_addr));
+      break;
+    case T_ARRAY:  // fall through
+    case T_OBJECT: // fall through
+      if (UseCompressedOops && !wide) {
+        __ st_w(compressed_src, as_Address(to_addr));
+      } else {
+         __ st_ptr(compressed_src, as_Address(to_addr));
+      }
+      break;
+    case T_METADATA:
+      // We get here to store a method pointer to the stack to pass to
+      // a dtrace runtime call. This can't work on 64 bit with
+      // compressed klass ptrs: T_METADATA can be a compressed klass
+      // ptr or a 64 bit method pointer.
+      ShouldNotReachHere();
+      __ st_ptr(src->as_register(), as_Address(to_addr));
+      break;
+    case T_ADDRESS:
+      __ st_ptr(src->as_register(), as_Address(to_addr));
+      break;
+    case T_INT:
+      __ st_w(src->as_register(), as_Address(to_addr));
+      break;
+    case T_LONG:
+      __ st_ptr(src->as_register_lo(), as_Address_lo(to_addr));
+      break;
+    case T_BYTE: // fall through
+    case T_BOOLEAN:
+      __ st_b(src->as_register(), as_Address(to_addr));
+      break;
+    case T_CHAR: // fall through
+    case T_SHORT:
+      __ st_h(src->as_register(), as_Address(to_addr));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+  if (info != NULL) {
+    add_debug_info_for_null_check(null_check_here, info);
+  }
+}
+
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  precond(src->is_stack() && dest->is_register());
+
+  uint const c_sz32 = sizeof(uint32_t);
+  uint const c_sz64 = sizeof(uint64_t);
+
+  if (dest->is_single_cpu()) {
+    int index = src->single_stack_ix();
+    if (is_reference_type(type)) {
+      __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64));
+      __ verify_oop(dest->as_register());
+    } else if (type == T_METADATA || type == T_ADDRESS) {
+      __ ld_ptr(dest->as_register(), stack_slot_address(index, c_sz64));
+    } else {
+      __ ld_w(dest->as_register(), stack_slot_address(index, c_sz32));
+    }
+  } else if (dest->is_double_cpu()) {
+    int index = src->double_stack_ix();
+    Address src_addr_LO = stack_slot_address(index, c_sz64, lo_word_offset_in_bytes);
+    __ ld_ptr(dest->as_register_lo(), src_addr_LO);
+  } else if (dest->is_single_fpu()) {
+    int index = src->single_stack_ix();
+    __ fld_s(dest->as_float_reg(), stack_slot_address(index, c_sz32));
+  } else if (dest->is_double_fpu()) {
+    int index = src->double_stack_ix();
+    __ fld_d(dest->as_double_reg(), stack_slot_address(index, c_sz64));
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
+  address target = NULL;
+
+  switch (patching_id(info)) {
+  case PatchingStub::access_field_id:
+    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
+    break;
+  case PatchingStub::load_klass_id:
+    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+    break;
+  case PatchingStub::load_mirror_id:
+    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+    break;
+  case PatchingStub::load_appendix_id:
+    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+    break;
+  default: ShouldNotReachHere();
+  }
+
+  __ call(target, relocInfo::runtime_call_type);
+  add_call_info_here(info);
+}
+
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  LIR_Opr temp;
+
+  if (type == T_LONG || type == T_DOUBLE)
+    temp = FrameMap::scr1_long_opr;
+  else
+    temp = FrameMap::scr1_opr;
+
+  stack2reg(src, temp, src->type());
+  reg2stack(temp, dest, dest->type(), false);
+}
+
+void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
+  LIR_Address* addr = src->as_address_ptr();
+  LIR_Address* from_addr = src->as_address_ptr();
+
+  if (addr->base()->type() == T_OBJECT) {
+    __ verify_oop(addr->base()->as_pointer_register());
+  }
+
+  if (patch_code != lir_patch_none) {
+    deoptimize_trap(info);
+    return;
+  }
+
+  if (info != NULL) {
+    add_debug_info_for_null_check_here(info);
+  }
+  int null_check_here = code_offset();
+  switch (type) {
+    case T_FLOAT:
+      __ fld_s(dest->as_float_reg(), as_Address(from_addr));
+      break;
+    case T_DOUBLE:
+      __ fld_d(dest->as_double_reg(), as_Address(from_addr));
+      break;
+    case T_ARRAY:  // fall through
+    case T_OBJECT: // fall through
+      if (UseCompressedOops && !wide) {
+        __ ld_wu(dest->as_register(), as_Address(from_addr));
+      } else {
+         __ ld_ptr(dest->as_register(), as_Address(from_addr));
+      }
+      break;
+    case T_METADATA:
+      // We get here to store a method pointer to the stack to pass to
+      // a dtrace runtime call. This can't work on 64 bit with
+      // compressed klass ptrs: T_METADATA can be a compressed klass
+      // ptr or a 64 bit method pointer.
+      ShouldNotReachHere();
+      __ ld_ptr(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_ADDRESS:
+      // FIXME: OMG this is a horrible kludge.  Any offset from an
+      // address that matches klass_offset_in_bytes() will be loaded
+      // as a word, not a long.
+      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
+        __ ld_wu(dest->as_register(), as_Address(from_addr));
+      } else {
+        __ ld_ptr(dest->as_register(), as_Address(from_addr));
+      }
+      break;
+    case T_INT:
+      __ ld_w(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_LONG:
+      __ ld_ptr(dest->as_register_lo(), as_Address_lo(from_addr));
+      break;
+    case T_BYTE:
+      __ ld_b(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_BOOLEAN:
+      __ ld_bu(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_CHAR:
+      __ ld_hu(dest->as_register(), as_Address(from_addr));
+      break;
+    case T_SHORT:
+      __ ld_h(dest->as_register(), as_Address(from_addr));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  if (is_reference_type(type)) {
+    if (UseCompressedOops && !wide) {
+      __ decode_heap_oop(dest->as_register());
+    }
+
+    // Load barrier has not yet been applied, so ZGC can't verify the oop here
+    __ verify_oop(dest->as_register());
+  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
+    if (UseCompressedClassPointers) {
+      __ decode_klass_not_null(dest->as_register());
+    }
+  }
+}
+
+void LIR_Assembler::prefetchr(LIR_Opr src) { Unimplemented(); }
+
+void LIR_Assembler::prefetchw(LIR_Opr src) { Unimplemented(); }
+
+int LIR_Assembler::array_element_size(BasicType type) const {
+  int elem_size = type2aelembytes(type);
+  return exact_log2(elem_size);
+}
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  switch (op->code()) {
+  case lir_idiv:
+  case lir_irem:
+    arithmetic_idiv(op->code(), op->in_opr1(), op->in_opr2(), op->in_opr3(),
+                    op->result_opr(), op->info());
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+}
+
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+#ifdef ASSERT
+  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
+  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
+  assert(op->cond() == lir_cond_always, "must be");
+#endif
+
+  if (op->info() != NULL)
+    add_debug_info_for_branch(op->info());
+
+  __ b_far(*(op->label()));
+}
+
+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
+#ifdef ASSERT
+  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
+  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
+  if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
+#endif
+
+  if (op->info() != NULL) {
+    assert(op->in_opr1()->is_address() || op->in_opr2()->is_address(),
+           "shouldn't be codeemitinfo for non-address operands");
+    add_debug_info_for_null_check_here(op->info()); // exception possible
+  }
+
+  Label& L = *(op->label());
+  Assembler::Condition acond;
+  LIR_Opr opr1 = op->in_opr1();
+  LIR_Opr opr2 = op->in_opr2();
+  assert(op->condition() != lir_cond_always, "must be");
+
+  if (op->code() == lir_cmp_float_branch) {
+    bool is_unordered = (op->ublock() == op->block());
+    if (opr1->is_single_fpu()) {
+      FloatRegister reg1 = opr1->as_float_reg();
+      assert(opr2->is_single_fpu(), "expect single float register");
+      FloatRegister reg2 = opr2->as_float_reg();
+      switch(op->condition()) {
+      case lir_cond_equal:
+        if (is_unordered)
+          __ fcmp_cueq_s(FCC0, reg1, reg2);
+        else
+          __ fcmp_ceq_s(FCC0, reg1, reg2);
+        break;
+      case lir_cond_notEqual:
+        if (is_unordered)
+          __ fcmp_cune_s(FCC0, reg1, reg2);
+        else
+          __ fcmp_cne_s(FCC0, reg1, reg2);
+        break;
+      case lir_cond_less:
+        if (is_unordered)
+          __ fcmp_cult_s(FCC0, reg1, reg2);
+        else
+          __ fcmp_clt_s(FCC0, reg1, reg2);
+        break;
+      case lir_cond_lessEqual:
+        if (is_unordered)
+          __ fcmp_cule_s(FCC0, reg1, reg2);
+        else
+          __ fcmp_cle_s(FCC0, reg1, reg2);
+        break;
+      case lir_cond_greaterEqual:
+        if (is_unordered)
+          __ fcmp_cule_s(FCC0, reg2, reg1);
+        else
+          __ fcmp_cle_s(FCC0, reg2, reg1);
+        break;
+      case lir_cond_greater:
+        if (is_unordered)
+          __ fcmp_cult_s(FCC0, reg2, reg1);
+        else
+          __ fcmp_clt_s(FCC0, reg2, reg1);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+    } else if (opr1->is_double_fpu()) {
+      FloatRegister reg1 = opr1->as_double_reg();
+      assert(opr2->is_double_fpu(), "expect double float register");
+      FloatRegister reg2 = opr2->as_double_reg();
+      switch(op->condition()) {
+      case lir_cond_equal:
+        if (is_unordered)
+          __ fcmp_cueq_d(FCC0, reg1, reg2);
+        else
+          __ fcmp_ceq_d(FCC0, reg1, reg2);
+        break;
+      case lir_cond_notEqual:
+        if (is_unordered)
+          __ fcmp_cune_d(FCC0, reg1, reg2);
+        else
+          __ fcmp_cne_d(FCC0, reg1, reg2);
+        break;
+      case lir_cond_less:
+        if (is_unordered)
+          __ fcmp_cult_d(FCC0, reg1, reg2);
+        else
+          __ fcmp_clt_d(FCC0, reg1, reg2);
+        break;
+      case lir_cond_lessEqual:
+        if (is_unordered)
+          __ fcmp_cule_d(FCC0, reg1, reg2);
+        else
+          __ fcmp_cle_d(FCC0, reg1, reg2);
+        break;
+      case lir_cond_greaterEqual:
+        if (is_unordered)
+          __ fcmp_cule_d(FCC0, reg2, reg1);
+        else
+          __ fcmp_cle_d(FCC0, reg2, reg1);
+        break;
+      case lir_cond_greater:
+        if (is_unordered)
+          __ fcmp_cult_d(FCC0, reg2, reg1);
+        else
+          __ fcmp_clt_d(FCC0, reg2, reg1);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+    __ bcnez(FCC0, L);
+  } else {
+    if (opr1->is_constant() && opr2->is_single_cpu()) {
+      // tableswitch
+      Unimplemented();
+    } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) {
+      Register reg1 = as_reg(opr1);
+      Register reg2 = noreg;
+      jlong imm2 = 0;
+      if (opr2->is_single_cpu()) {
+        // cpu register - cpu register
+        reg2 = opr2->as_register();
+      } else if (opr2->is_double_cpu()) {
+        // cpu register - cpu register
+        reg2 = opr2->as_register_lo();
+      } else if (opr2->is_constant()) {
+        switch(opr2->type()) {
+        case T_INT:
+        case T_ADDRESS:
+          imm2 = opr2->as_constant_ptr()->as_jint();
+          break;
+        case T_LONG:
+          imm2 = opr2->as_constant_ptr()->as_jlong();
+          break;
+        case T_METADATA:
+          imm2 = (intptr_t)opr2->as_constant_ptr()->as_metadata();
+          break;
+        case T_OBJECT:
+        case T_ARRAY:
+          if (opr2->as_constant_ptr()->as_jobject() != NULL) {
+            reg2 = SCR1;
+            jobject2reg(opr2->as_constant_ptr()->as_jobject(), reg2);
+          } else {
+            reg2 = R0;
+          }
+          break;
+        default:
+          ShouldNotReachHere();
+          break;
+        }
+      } else {
+        ShouldNotReachHere();
+      }
+      if (reg2 == noreg) {
+        if (imm2 == 0) {
+          reg2 = R0;
+        } else {
+          reg2 = SCR1;
+          __ li(reg2, imm2);
+        }
+      }
+      switch (op->condition()) {
+        case lir_cond_equal:
+          __ beq_far(reg1, reg2, L); break;
+        case lir_cond_notEqual:
+          __ bne_far(reg1, reg2, L); break;
+        case lir_cond_less:
+          __ blt_far(reg1, reg2, L, true); break;
+        case lir_cond_lessEqual:
+          __ bge_far(reg2, reg1, L, true); break;
+        case lir_cond_greaterEqual:
+          __ bge_far(reg1, reg2, L, true); break;
+        case lir_cond_greater:
+          __ blt_far(reg2, reg1, L, true); break;
+        case lir_cond_belowEqual:
+          __ bge_far(reg2, reg1, L, false); break;
+        case lir_cond_aboveEqual:
+          __ bge_far(reg1, reg2, L, false); break;
+        default:
+          ShouldNotReachHere();
+      }
+    }
+  }
+}
+
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+  LIR_Opr src  = op->in_opr();
+  LIR_Opr dest = op->result_opr();
+  LIR_Opr tmp  = op->tmp();
+
+  switch (op->bytecode()) {
+    case Bytecodes::_i2f:
+      __ movgr2fr_w(dest->as_float_reg(), src->as_register());
+      __ ffint_s_w(dest->as_float_reg(), dest->as_float_reg());
+      break;
+    case Bytecodes::_i2d:
+      __ movgr2fr_w(dest->as_double_reg(), src->as_register());
+      __ ffint_d_w(dest->as_double_reg(), dest->as_double_reg());
+      break;
+    case Bytecodes::_l2d:
+      __ movgr2fr_d(dest->as_double_reg(), src->as_register_lo());
+      __ ffint_d_l(dest->as_double_reg(), dest->as_double_reg());
+      break;
+    case Bytecodes::_l2f:
+      __ movgr2fr_d(dest->as_float_reg(), src->as_register_lo());
+      __ ffint_s_l(dest->as_float_reg(), dest->as_float_reg());
+      break;
+    case Bytecodes::_f2d:
+      __ fcvt_d_s(dest->as_double_reg(), src->as_float_reg());
+      break;
+    case Bytecodes::_d2f:
+      __ fcvt_s_d(dest->as_float_reg(), src->as_double_reg());
+      break;
+    case Bytecodes::_i2c:
+      __ bstrpick_w(dest->as_register(), src->as_register(), 15, 0);
+      break;
+    case Bytecodes::_i2l:
+      _masm->block_comment("FIXME: This could be a no-op");
+      __ slli_w(dest->as_register_lo(), src->as_register(), 0);
+      break;
+    case Bytecodes::_i2s:
+      __ ext_w_h(dest->as_register(), src->as_register());
+      break;
+    case Bytecodes::_i2b:
+      __ ext_w_b(dest->as_register(), src->as_register());
+      break;
+    case Bytecodes::_l2i:
+      __ slli_w(dest->as_register(), src->as_register_lo(), 0);
+      break;
+    case Bytecodes::_d2l:
+      __ ftintrz_l_d(tmp->as_double_reg(), src->as_double_reg());
+      __ movfr2gr_d(dest->as_register_lo(), tmp->as_double_reg());
+      break;
+    case Bytecodes::_f2i:
+      __ ftintrz_w_s(tmp->as_float_reg(), src->as_float_reg());
+      __ movfr2gr_s(dest->as_register(), tmp->as_float_reg());
+      break;
+    case Bytecodes::_f2l:
+      __ ftintrz_l_s(tmp->as_float_reg(), src->as_float_reg());
+      __ movfr2gr_d(dest->as_register_lo(), tmp->as_float_reg());
+      break;
+    case Bytecodes::_d2i:
+      __ ftintrz_w_d(tmp->as_double_reg(), src->as_double_reg());
+      __ movfr2gr_s(dest->as_register(), tmp->as_double_reg());
+      break;
+    default: ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
+  if (op->init_check()) {
+    __ ld_bu(SCR1, Address(op->klass()->as_register(), InstanceKlass::init_state_offset()));
+    __ li(SCR2, InstanceKlass::fully_initialized);
+    add_debug_info_for_null_check_here(op->stub()->info());
+    __ bne_far(SCR1, SCR2, *op->stub()->entry());
+  }
+  __ allocate_object(op->obj()->as_register(), op->tmp1()->as_register(),
+                     op->tmp2()->as_register(), op->header_size(),
+                     op->object_size(), op->klass()->as_register(),
+                     *op->stub()->entry());
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
+  Register len =  op->len()->as_register();
+  if (UseSlowPath ||
+      (!UseFastNewObjectArray && is_reference_type(op->type())) ||
+      (!UseFastNewTypeArray   && !is_reference_type(op->type()))) {
+    __ b(*op->stub()->entry());
+  } else {
+    Register tmp1 = op->tmp1()->as_register();
+    Register tmp2 = op->tmp2()->as_register();
+    Register tmp3 = op->tmp3()->as_register();
+    if (len == tmp1) {
+      tmp1 = tmp3;
+    } else if (len == tmp2) {
+      tmp2 = tmp3;
+    } else if (len == tmp3) {
+      // everything is ok
+    } else {
+      __ move(tmp3, len);
+    }
+    __ allocate_array(op->obj()->as_register(), len, tmp1, tmp2,
+                      arrayOopDesc::header_size(op->type()),
+                      array_element_size(op->type()),
+                      op->klass()->as_register(),
+                      *op->stub()->entry());
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::type_profile_helper(Register mdo, ciMethodData *md, ciProfileData *data,
+                                        Register recv, Label* update_done) {
+  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
+    Label next_test;
+    // See if the receiver is receiver[n].
+    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
+    __ ld_ptr(SCR1, Address(SCR2));
+    __ bne(recv, SCR1, next_test);
+    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
+    __ ld_ptr(SCR2, data_addr);
+    __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
+    __ st_ptr(SCR2, data_addr);
+    __ b(*update_done);
+    __ bind(next_test);
+  }
+
+  // Didn't find receiver; find next empty slot and fill it in
+  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
+    Label next_test;
+    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
+    Address recv_addr(SCR2);
+    __ ld_ptr(SCR1, recv_addr);
+    __ bnez(SCR1, next_test);
+    __ st_ptr(recv, recv_addr);
+    __ li(SCR1, DataLayout::counter_increment);
+    __ lea(SCR2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
+    __ st_ptr(SCR1, Address(SCR2));
+    __ b(*update_done);
+    __ bind(next_test);
+  }
+}
+
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success,
+                                          Label* failure, Label* obj_is_null) {
+  // we always need a stub for the failure case.
+  CodeStub* stub = op->stub();
+  Register obj = op->object()->as_register();
+  Register k_RInfo = op->tmp1()->as_register();
+  Register klass_RInfo = op->tmp2()->as_register();
+  Register dst = op->result_opr()->as_register();
+  ciKlass* k = op->klass();
+  Register Rtmp1 = noreg;
+
+  // check if it needs to be profiled
+  ciMethodData* md;
+  ciProfileData* data;
+
+  const bool should_profile = op->should_profile();
+
+  if (should_profile) {
+    ciMethod* method = op->profiled_method();
+    assert(method != NULL, "Should have method");
+    int bci = op->profiled_bci();
+    md = method->method_data_or_null();
+    assert(md != NULL, "Sanity");
+    data = md->bci_to_data(bci);
+    assert(data != NULL, "need data for type check");
+    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+  }
+
+  Label profile_cast_success, profile_cast_failure;
+  Label *success_target = should_profile ? &profile_cast_success : success;
+  Label *failure_target = should_profile ? &profile_cast_failure : failure;
+
+  if (obj == k_RInfo) {
+    k_RInfo = dst;
+  } else if (obj == klass_RInfo) {
+    klass_RInfo = dst;
+  }
+  if (k->is_loaded() && !UseCompressedClassPointers) {
+    select_different_registers(obj, dst, k_RInfo, klass_RInfo);
+  } else {
+    Rtmp1 = op->tmp3()->as_register();
+    select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
+  }
+
+  assert_different_registers(obj, k_RInfo, klass_RInfo);
+
+  if (should_profile) {
+    Label not_null;
+    __ bnez(obj, not_null);
+    // Object is null; update MDO and exit
+    Register mdo = klass_RInfo;
+    __ mov_metadata(mdo, md->constant_encoding());
+    Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
+    __ ld_bu(SCR2, data_addr);
+    __ ori(SCR2, SCR2, BitData::null_seen_byte_constant());
+    __ st_b(SCR2, data_addr);
+    __ b(*obj_is_null);
+    __ bind(not_null);
+  } else {
+    __ beqz(obj, *obj_is_null);
+  }
+
+  if (!k->is_loaded()) {
+    klass2reg_with_patching(k_RInfo, op->info_for_patch());
+  } else {
+    __ mov_metadata(k_RInfo, k->constant_encoding());
+  }
+  __ verify_oop(obj);
+
+  if (op->fast_check()) {
+    // get object class
+    // not a safepoint as obj null check happens earlier
+    __ load_klass(SCR2, obj);
+    __ bne_far(SCR2, k_RInfo, *failure_target);
+    // successful cast, fall through to profile or jump
+  } else {
+    // get object class
+    // not a safepoint as obj null check happens earlier
+    __ load_klass(klass_RInfo, obj);
+    if (k->is_loaded()) {
+      // See if we get an immediate positive hit
+      __ ld_ptr(SCR1, Address(klass_RInfo, int64_t(k->super_check_offset())));
+      if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
+        __ bne_far(k_RInfo, SCR1, *failure_target);
+        // successful cast, fall through to profile or jump
+      } else {
+        // See if we get an immediate positive hit
+        __ beq_far(k_RInfo, SCR1, *success_target);
+        // check for self
+        __ beq_far(klass_RInfo, k_RInfo, *success_target);
+
+        __ addi_d(SP, SP, -2 * wordSize);
+        __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
+        __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
+        __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+        __ ld_ptr(klass_RInfo, Address(SP, 0 * wordSize));
+        __ addi_d(SP, SP, 2 * wordSize);
+        // result is a boolean
+        __ beqz(klass_RInfo, *failure_target);
+        // successful cast, fall through to profile or jump
+      }
+    } else {
+      // perform the fast part of the checking logic
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
+      // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+      __ addi_d(SP, SP, -2 * wordSize);
+      __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
+      __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
+      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+      __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize));
+      __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize));
+      __ addi_d(SP, SP, 2 * wordSize);
+      // result is a boolean
+      __ beqz(k_RInfo, *failure_target);
+      // successful cast, fall through to profile or jump
+    }
+  }
+  if (should_profile) {
+    Register mdo = klass_RInfo, recv = k_RInfo;
+    __ bind(profile_cast_success);
+    __ mov_metadata(mdo, md->constant_encoding());
+    __ load_klass(recv, obj);
+    Label update_done;
+    type_profile_helper(mdo, md, data, recv, success);
+    __ b(*success);
+
+    __ bind(profile_cast_failure);
+    __ mov_metadata(mdo, md->constant_encoding());
+    Address counter_addr = Address(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+    __ ld_ptr(SCR2, counter_addr);
+    __ addi_d(SCR2, SCR2, -DataLayout::counter_increment);
+    __ st_ptr(SCR2, counter_addr);
+    __ b(*failure);
+  }
+  __ b(*success);
+}
+
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+  const bool should_profile = op->should_profile();
+
+  LIR_Code code = op->code();
+  if (code == lir_store_check) {
+    Register value = op->object()->as_register();
+    Register array = op->array()->as_register();
+    Register k_RInfo = op->tmp1()->as_register();
+    Register klass_RInfo = op->tmp2()->as_register();
+    Register Rtmp1 = op->tmp3()->as_register();
+    CodeStub* stub = op->stub();
+
+    // check if it needs to be profiled
+    ciMethodData* md;
+    ciProfileData* data;
+
+    if (should_profile) {
+      ciMethod* method = op->profiled_method();
+      assert(method != NULL, "Should have method");
+      int bci = op->profiled_bci();
+      md = method->method_data_or_null();
+      assert(md != NULL, "Sanity");
+      data = md->bci_to_data(bci);
+      assert(data != NULL, "need data for type check");
+      assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+    }
+    Label profile_cast_success, profile_cast_failure, done;
+    Label *success_target = should_profile ? &profile_cast_success : &done;
+    Label *failure_target = should_profile ? &profile_cast_failure : stub->entry();
+
+    if (should_profile) {
+      Label not_null;
+      __ bnez(value, not_null);
+      // Object is null; update MDO and exit
+      Register mdo = klass_RInfo;
+      __ mov_metadata(mdo, md->constant_encoding());
+      Address data_addr = Address(mdo, md->byte_offset_of_slot(data, DataLayout::flags_offset()));
+      __ ld_bu(SCR2, data_addr);
+      __ ori(SCR2, SCR2, BitData::null_seen_byte_constant());
+      __ st_b(SCR2, data_addr);
+      __ b(done);
+      __ bind(not_null);
+    } else {
+      __ beqz(value, done);
+    }
+
+    add_debug_info_for_null_check_here(op->info_for_exception());
+    __ load_klass(k_RInfo, array);
+    __ load_klass(klass_RInfo, value);
+
+    // get instance klass (it's already uncompressed)
+    __ ld_ptr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
+    // perform the fast part of the checking logic
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
+    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
+    __ addi_d(SP, SP, -2 * wordSize);
+    __ st_ptr(k_RInfo, Address(SP, 0 * wordSize));
+    __ st_ptr(klass_RInfo, Address(SP, 1 * wordSize));
+    __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+    __ ld_ptr(k_RInfo, Address(SP, 0 * wordSize));
+    __ ld_ptr(klass_RInfo, Address(SP, 1 * wordSize));
+    __ addi_d(SP, SP, 2 * wordSize);
+    // result is a boolean
+    __ beqz(k_RInfo, *failure_target);
+    // fall through to the success case
+
+    if (should_profile) {
+      Register mdo = klass_RInfo, recv = k_RInfo;
+      __ bind(profile_cast_success);
+      __ mov_metadata(mdo, md->constant_encoding());
+      __ load_klass(recv, value);
+      Label update_done;
+      type_profile_helper(mdo, md, data, recv, &done);
+      __ b(done);
+
+      __ bind(profile_cast_failure);
+      __ mov_metadata(mdo, md->constant_encoding());
+      Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+      __ lea(SCR2, counter_addr);
+      __ ld_ptr(SCR1, Address(SCR2));
+      __ addi_d(SCR1, SCR1, -DataLayout::counter_increment);
+      __ st_ptr(SCR1, Address(SCR2));
+      __ b(*stub->entry());
+    }
+
+    __ bind(done);
+  } else if (code == lir_checkcast) {
+    Register obj = op->object()->as_register();
+    Register dst = op->result_opr()->as_register();
+    Label success;
+    emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
+    __ bind(success);
+    if (dst != obj) {
+      __ move(dst, obj);
+    }
+  } else if (code == lir_instanceof) {
+    Register obj = op->object()->as_register();
+    Register dst = op->result_opr()->as_register();
+    Label success, failure, done;
+    emit_typecheck_helper(op, &success, &failure, &failure);
+    __ bind(failure);
+    __ move(dst, R0);
+    __ b(done);
+    __ bind(success);
+    __ li(dst, 1);
+    __ bind(done);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, bool sign) {
+  __ cmpxchg32(Address(addr, 0), cmpval, newval, SCR1, sign,
+               /* retold */ false, /* barrier */ true);
+}
+
+void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
+  __ cmpxchg(Address(addr, 0), cmpval, newval, SCR1,
+             /* retold */ false, /* barrier */ true);
+}
+
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+  assert(VM_Version::supports_cx8(), "wrong machine");
+  Register addr;
+  if (op->addr()->is_register()) {
+    addr = as_reg(op->addr());
+  } else {
+    assert(op->addr()->is_address(), "what else?");
+    LIR_Address* addr_ptr = op->addr()->as_address_ptr();
+    assert(addr_ptr->disp() == 0, "need 0 disp");
+    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
+    addr = as_reg(addr_ptr->base());
+  }
+  Register newval = as_reg(op->new_value());
+  Register cmpval = as_reg(op->cmp_value());
+
+  if (op->code() == lir_cas_obj) {
+    if (UseCompressedOops) {
+      Register t1 = op->tmp1()->as_register();
+      assert(op->tmp1()->is_valid(), "must be");
+      __ encode_heap_oop(t1, cmpval);
+      cmpval = t1;
+      __ encode_heap_oop(SCR2, newval);
+      newval = SCR2;
+      casw(addr, newval, cmpval, false);
+    } else {
+      casl(addr, newval, cmpval);
+    }
+  } else if (op->code() == lir_cas_int) {
+    casw(addr, newval, cmpval, true);
+  } else {
+    casl(addr, newval, cmpval);
+  }
+}
+
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2,
+                          LIR_Opr result, BasicType type) {
+  Unimplemented();
+}
+
+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right,
+                              LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
+  assert(result->is_single_cpu() || result->is_double_cpu(), "expect single register for result");
+  assert(left->is_single_cpu() || left->is_double_cpu(), "must be");
+  Register regd = (result->type() == T_LONG) ? result->as_register_lo() : result->as_register();
+  Register regl = as_reg(left);
+  Register regr = noreg;
+  Register reg1 = noreg;
+  Register reg2 = noreg;
+  jlong immr = 0;
+
+  // comparison operands
+  if (right->is_single_cpu()) {
+    // cpu register - cpu register
+    regr = right->as_register();
+  } else if (right->is_double_cpu()) {
+    // cpu register - cpu register
+    regr = right->as_register_lo();
+  } else if (right->is_constant()) {
+    switch(right->type()) {
+    case T_INT:
+    case T_ADDRESS:
+      immr = right->as_constant_ptr()->as_jint();
+      break;
+    case T_LONG:
+      immr = right->as_constant_ptr()->as_jlong();
+      break;
+    case T_METADATA:
+      immr = (intptr_t)right->as_constant_ptr()->as_metadata();
+      break;
+    case T_OBJECT:
+    case T_ARRAY:
+      if (right->as_constant_ptr()->as_jobject() != NULL) {
+        regr = SCR1;
+        jobject2reg(right->as_constant_ptr()->as_jobject(), regr);
+      } else {
+        immr = 0;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+
+  if (regr == noreg) {
+    switch (condition) {
+    case lir_cond_equal:
+    case lir_cond_notEqual:
+      if (!Assembler::is_simm(-immr, 12)) {
+        regr = SCR1;
+        __ li(regr, immr);
+      }
+      break;
+    default:
+      if (!Assembler::is_simm(immr, 12)) {
+        regr = SCR1;
+        __ li(regr, immr);
+      }
+    }
+  }
+
+  // special cases
+  if (src1->is_constant() && src2->is_constant()) {
+    jlong val1 = 0, val2 = 0;
+    if (src1->type() == T_INT && src2->type() == T_INT) {
+      val1 = src1->as_jint();
+      val2 = src2->as_jint();
+    } else if (src1->type() == T_LONG && src2->type() == T_LONG) {
+      val1 = src1->as_jlong();
+      val2 = src2->as_jlong();
+    }
+    if (val1 == 0 && val2 == 1) {
+      if (regr == noreg) {
+        switch (condition) {
+          case lir_cond_equal:
+            if (immr == 0) {
+              __ sltu(regd, R0, regl);
+            } else {
+              __ addi_d(SCR1, regl, -immr);
+              __ li(regd, 1);
+              __ maskeqz(regd, regd, SCR1);
+            }
+            break;
+          case lir_cond_notEqual:
+            if (immr == 0) {
+              __ sltu(regd, R0, regl);
+              __ xori(regd, regd, 1);
+            } else {
+              __ addi_d(SCR1, regl, -immr);
+              __ li(regd, 1);
+              __ masknez(regd, regd, SCR1);
+            }
+            break;
+          case lir_cond_less:
+            __ slti(regd, regl, immr);
+            __ xori(regd, regd, 1);
+            break;
+          case lir_cond_lessEqual:
+            if (immr == 0) {
+              __ slt(regd, R0, regl);
+            } else {
+              __ li(SCR1, immr);
+              __ slt(regd, SCR1, regl);
+            }
+            break;
+          case lir_cond_greater:
+            if (immr == 0) {
+              __ slt(regd, R0, regl);
+            } else {
+              __ li(SCR1, immr);
+              __ slt(regd, SCR1, regl);
+            }
+            __ xori(regd, regd, 1);
+            break;
+          case lir_cond_greaterEqual:
+            __ slti(regd, regl, immr);
+            break;
+          case lir_cond_belowEqual:
+            if (immr == 0) {
+              __ sltu(regd, R0, regl);
+            } else {
+              __ li(SCR1, immr);
+              __ sltu(regd, SCR1, regl);
+            }
+            break;
+          case lir_cond_aboveEqual:
+            __ sltui(regd, regl, immr);
+            break;
+          default:
+            ShouldNotReachHere();
+        }
+      } else {
+        switch (condition) {
+          case lir_cond_equal:
+            __ sub_d(SCR1, regl, regr);
+            __ li(regd, 1);
+            __ maskeqz(regd, regd, SCR1);
+            break;
+          case lir_cond_notEqual:
+            __ sub_d(SCR1, regl, regr);
+            __ li(regd, 1);
+            __ masknez(regd, regd, SCR1);
+            break;
+          case lir_cond_less:
+            __ slt(regd, regl, regr);
+            __ xori(regd, regd, 1);
+            break;
+          case lir_cond_lessEqual:
+            __ slt(regd, regr, regl);
+            break;
+          case lir_cond_greater:
+            __ slt(regd, regr, regl);
+            __ xori(regd, regd, 1);
+            break;
+          case lir_cond_greaterEqual:
+            __ slt(regd, regl, regr);
+            break;
+          case lir_cond_belowEqual:
+            __ sltu(regd, regr, regl);
+            break;
+          case lir_cond_aboveEqual:
+            __ sltu(regd, regl, regr);
+            break;
+          default:
+            ShouldNotReachHere();
+        }
+      }
+      return;
+    } else if (val1 == 1 && val2 == 0) {
+      if (regr == noreg) {
+        switch (condition) {
+          case lir_cond_equal:
+            if (immr == 0) {
+              __ sltu(regd, R0, regl);
+              __ xori(regd, regd, 1);
+            } else {
+              __ addi_d(SCR1, regl, -immr);
+              __ li(regd, 1);
+              __ masknez(regd, regd, SCR1);
+            }
+            break;
+          case lir_cond_notEqual:
+            if (immr == 0) {
+              __ sltu(regd, R0, regl);
+            } else {
+              __ addi_d(SCR1, regl, -immr);
+              __ li(regd, 1);
+              __ maskeqz(regd, regd, SCR1);
+            }
+            break;
+          case lir_cond_less:
+            __ slti(regd, regl, immr);
+            break;
+          case lir_cond_lessEqual:
+            if (immr == 0) {
+              __ slt(regd, R0, regl);
+            } else {
+              __ li(SCR1, immr);
+              __ slt(regd, SCR1, regl);
+            }
+            __ xori(regd, regd, 1);
+            break;
+          case lir_cond_greater:
+            if (immr == 0) {
+              __ slt(regd, R0, regl);
+            } else {
+              __ li(SCR1, immr);
+              __ slt(regd, SCR1, regl);
+            }
+            break;
+          case lir_cond_greaterEqual:
+            __ slti(regd, regl, immr);
+            __ xori(regd, regd, 1);
+            break;
+          case lir_cond_belowEqual:
+            if (immr == 0) {
+              __ sltu(regd, R0, regl);
+            } else {
+              __ li(SCR1, immr);
+              __ sltu(regd, SCR1, regl);
+            }
+            __ xori(regd, regd, 1);
+            break;
+          case lir_cond_aboveEqual:
+            __ sltui(regd, regl, immr);
+            __ xori(regd, regd, 1);
+            break;
+          default:
+            ShouldNotReachHere();
+        }
+      } else {
+        switch (condition) {
+          case lir_cond_equal:
+            __ sub_d(SCR1, regl, regr);
+            __ li(regd, 1);
+            __ masknez(regd, regd, SCR1);
+            break;
+          case lir_cond_notEqual:
+            __ sub_d(SCR1, regl, regr);
+            __ li(regd, 1);
+            __ maskeqz(regd, regd, SCR1);
+            break;
+          case lir_cond_less:
+            __ slt(regd, regl, regr);
+            break;
+          case lir_cond_lessEqual:
+            __ slt(regd, regr, regl);
+            __ xori(regd, regd, 1);
+            break;
+          case lir_cond_greater:
+            __ slt(regd, regr, regl);
+            break;
+          case lir_cond_greaterEqual:
+            __ slt(regd, regl, regr);
+            __ xori(regd, regd, 1);
+            break;
+          case lir_cond_belowEqual:
+            __ sltu(regd, regr, regl);
+            __ xori(regd, regd, 1);
+            break;
+          case lir_cond_aboveEqual:
+            __ sltu(regd, regl, regr);
+            __ xori(regd, regd, 1);
+            break;
+          default:
+            ShouldNotReachHere();
+        }
+      }
+      return;
+    }
+  }
+
+  // cmp
+  if (regr == noreg) {
+    switch (condition) {
+      case lir_cond_equal:
+        __ addi_d(SCR2, regl, -immr);
+        break;
+      case lir_cond_notEqual:
+        __ addi_d(SCR2, regl, -immr);
+        break;
+      case lir_cond_less:
+        __ slti(SCR2, regl, immr);
+        break;
+      case lir_cond_lessEqual:
+        __ li(SCR1, immr);
+        __ slt(SCR2, SCR1, regl);
+        break;
+      case lir_cond_greater:
+        __ li(SCR1, immr);
+        __ slt(SCR2, SCR1, regl);
+        break;
+      case lir_cond_greaterEqual:
+        __ slti(SCR2, regl, immr);
+        break;
+      case lir_cond_belowEqual:
+        __ li(SCR1, immr);
+        __ sltu(SCR2, SCR1, regl);
+        break;
+      case lir_cond_aboveEqual:
+        __ sltui(SCR2, regl, immr);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  } else {
+    switch (condition) {
+      case lir_cond_equal:
+        __ sub_d(SCR2, regl, regr);
+        break;
+      case lir_cond_notEqual:
+        __ sub_d(SCR2, regl, regr);
+        break;
+      case lir_cond_less:
+        __ slt(SCR2, regl, regr);
+        break;
+      case lir_cond_lessEqual:
+        __ slt(SCR2, regr, regl);
+        break;
+      case lir_cond_greater:
+        __ slt(SCR2, regr, regl);
+        break;
+      case lir_cond_greaterEqual:
+        __ slt(SCR2, regl, regr);
+        break;
+      case lir_cond_belowEqual:
+        __ sltu(SCR2, regr, regl);
+        break;
+      case lir_cond_aboveEqual:
+        __ sltu(SCR2, regl, regr);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  // value operands
+  if (src1->is_stack()) {
+    stack2reg(src1, result, result->type());
+    reg1 = regd;
+  } else if (src1->is_constant()) {
+    const2reg(src1, result, lir_patch_none, NULL);
+    reg1 = regd;
+  } else {
+    reg1 = (src1->type() == T_LONG) ? src1->as_register_lo() : src1->as_register();
+  }
+
+  if (src2->is_stack()) {
+    stack2reg(src2, FrameMap::scr1_opr, result->type());
+    reg2 = SCR1;
+  } else if (src2->is_constant()) {
+    LIR_Opr tmp = src2->type() == T_LONG ? FrameMap::scr1_long_opr : FrameMap::scr1_opr;
+    const2reg(src2, tmp, lir_patch_none, NULL);
+    reg2 = SCR1;
+  } else {
+    reg2 = (src2->type() == T_LONG) ? src2->as_register_lo() : src2->as_register();
+  }
+
+  // cmove
+  switch (condition) {
+    case lir_cond_equal:
+      __ masknez(regd, reg1, SCR2);
+      __ maskeqz(SCR2, reg2, SCR2);
+      break;
+    case lir_cond_notEqual:
+      __ maskeqz(regd, reg1, SCR2);
+      __ masknez(SCR2, reg2, SCR2);
+      break;
+    case lir_cond_less:
+      __ maskeqz(regd, reg1, SCR2);
+      __ masknez(SCR2, reg2, SCR2);
+      break;
+    case lir_cond_lessEqual:
+      __ masknez(regd, reg1, SCR2);
+      __ maskeqz(SCR2, reg2, SCR2);
+      break;
+    case lir_cond_greater:
+      __ maskeqz(regd, reg1, SCR2);
+      __ masknez(SCR2, reg2, SCR2);
+      break;
+    case lir_cond_greaterEqual:
+      __ masknez(regd, reg1, SCR2);
+      __ maskeqz(SCR2, reg2, SCR2);
+      break;
+    case lir_cond_belowEqual:
+      __ masknez(regd, reg1, SCR2);
+      __ maskeqz(SCR2, reg2, SCR2);
+      break;
+    case lir_cond_aboveEqual:
+      __ masknez(regd, reg1, SCR2);
+      __ maskeqz(SCR2, reg2, SCR2);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  __ OR(regd, regd, SCR2);
+}
+
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                             CodeEmitInfo* info, bool pop_fpu_stack) {
+  assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+
+  if (left->is_single_cpu()) {
+    Register lreg = left->as_register();
+    Register dreg = as_reg(dest);
+
+    if (right->is_single_cpu()) {
+      // cpu register - cpu register
+      assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT, "should be");
+      Register rreg = right->as_register();
+      switch (code) {
+        case lir_add: __ add_w (dest->as_register(), lreg, rreg); break;
+        case lir_sub: __ sub_w (dest->as_register(), lreg, rreg); break;
+        case lir_mul: __ mul_w (dest->as_register(), lreg, rreg); break;
+        default:      ShouldNotReachHere();
+      }
+    } else if (right->is_double_cpu()) {
+      Register rreg = right->as_register_lo();
+      // single_cpu + double_cpu: can happen with obj+long
+      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
+      switch (code) {
+        case lir_add: __ add_d(dreg, lreg, rreg); break;
+        case lir_sub: __ sub_d(dreg, lreg, rreg); break;
+        default:      ShouldNotReachHere();
+      }
+    } else if (right->is_constant()) {
+      // cpu register - constant
+      jlong c;
+
+      // FIXME: This is fugly: we really need to factor all this logic.
+      switch(right->type()) {
+        case T_LONG:
+          c = right->as_constant_ptr()->as_jlong();
+          break;
+        case T_INT:
+        case T_ADDRESS:
+          c = right->as_constant_ptr()->as_jint();
+          break;
+        default:
+          ShouldNotReachHere();
+          c = 0; // unreachable
+          break;
+      }
+
+      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
+      if (c == 0 && dreg == lreg) {
+        COMMENT("effective nop elided");
+        return;
+      }
+
+      switch(left->type()) {
+        case T_INT:
+          switch (code) {
+            case lir_add: __ addi_w(dreg, lreg, c); break;
+            case lir_sub: __ addi_w(dreg, lreg, -c); break;
+            default:      ShouldNotReachHere();
+          }
+          break;
+        case T_OBJECT:
+        case T_ADDRESS:
+          switch (code) {
+          case lir_add: __ addi_d(dreg, lreg, c); break;
+          case lir_sub: __ addi_d(dreg, lreg, -c); break;
+          default:      ShouldNotReachHere();
+          }
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (left->is_double_cpu()) {
+    Register lreg_lo = left->as_register_lo();
+
+    if (right->is_double_cpu()) {
+      // cpu register - cpu register
+      Register rreg_lo = right->as_register_lo();
+      switch (code) {
+        case lir_add: __ add_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
+        case lir_sub: __ sub_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
+        case lir_mul: __ mul_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
+        case lir_div: __ div_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
+        case lir_rem: __ mod_d(dest->as_register_lo(), lreg_lo, rreg_lo); break;
+        default:      ShouldNotReachHere();
+      }
+
+    } else if (right->is_constant()) {
+      jlong c = right->as_constant_ptr()->as_jlong();
+      Register dreg = as_reg(dest);
+      switch (code) {
+        case lir_add:
+        case lir_sub:
+          if (c == 0 && dreg == lreg_lo) {
+            COMMENT("effective nop elided");
+            return;
+          }
+          code == lir_add ? __ addi_d(dreg, lreg_lo, c) : __ addi_d(dreg, lreg_lo, -c);
+          break;
+        case lir_div:
+          assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
+          if (c == 1) {
+            // move lreg_lo to dreg if divisor is 1
+            __ move(dreg, lreg_lo);
+          } else {
+            unsigned int shift = exact_log2(c);
+            // use scr1 as intermediate result register
+            __ srai_d(SCR1, lreg_lo, 63);
+            __ srli_d(SCR1, SCR1, 64 - shift);
+            __ add_d(SCR1, lreg_lo, SCR1);
+            __ srai_d(dreg, SCR1, shift);
+          }
+          break;
+        case lir_rem:
+          assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
+          if (c == 1) {
+            // move 0 to dreg if divisor is 1
+            __ move(dreg, R0);
+          } else {
+            // use scr1/2 as intermediate result register
+            __ sub_d(SCR1, R0, lreg_lo);
+            __ slt(SCR2, SCR1, R0);
+            __ andi(dreg, lreg_lo, c - 1);
+            __ andi(SCR1, SCR1, c - 1);
+            __ sub_d(SCR1, R0, SCR1);
+            __ maskeqz(dreg, dreg, SCR2);
+            __ masknez(SCR1, SCR1, SCR2);
+            __ OR(dreg, dreg, SCR1);
+          }
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (left->is_single_fpu()) {
+    assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register");
+    switch (code) {
+      case lir_add: __ fadd_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+      case lir_sub: __ fsub_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+      case lir_mul_strictfp: // fall through
+      case lir_mul: __ fmul_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+      case lir_div_strictfp: // fall through
+      case lir_div: __ fdiv_s (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
+      default:      ShouldNotReachHere();
+    }
+  } else if (left->is_double_fpu()) {
+    if (right->is_double_fpu()) {
+      // fpu register - fpu register
+      switch (code) {
+        case lir_add: __ fadd_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+        case lir_sub: __ fsub_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+        case lir_mul_strictfp: // fall through
+        case lir_mul: __ fmul_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+        case lir_div_strictfp: // fall through
+        case lir_div: __ fdiv_d (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
+        default:      ShouldNotReachHere();
+      }
+    } else {
+      if (right->is_constant()) {
+        ShouldNotReachHere();
+      }
+      ShouldNotReachHere();
+    }
+  } else if (left->is_single_stack() || left->is_address()) {
+    assert(left == dest, "left and dest must be equal");
+    ShouldNotReachHere();
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index,
+                                             int dest_index, bool pop_fpu_stack) {
+  Unimplemented();
+}
+
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
+  switch(code) {
+    case lir_abs : __ fabs_d(dest->as_double_reg(), value->as_double_reg()); break;
+    case lir_sqrt: __ fsqrt_d(dest->as_double_reg(), value->as_double_reg()); break;
+    default      : ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
+  assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
+  Register Rleft = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
+
+   if (dst->is_single_cpu()) {
+     Register Rdst = dst->as_register();
+     if (right->is_constant()) {
+       switch (code) {
+         case lir_logic_and:
+           if (Assembler::is_uimm(right->as_jint(), 12)) {
+             __ andi(Rdst, Rleft, right->as_jint());
+           } else {
+             __ li(AT, right->as_jint());
+             __ AND(Rdst, Rleft, AT);
+           }
+           break;
+         case lir_logic_or:  __  ori(Rdst, Rleft, right->as_jint()); break;
+         case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jint()); break;
+         default:            ShouldNotReachHere(); break;
+       }
+     } else {
+       Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
+       switch (code) {
+         case lir_logic_and: __ AND(Rdst, Rleft, Rright); break;
+         case lir_logic_or:  __  OR(Rdst, Rleft, Rright); break;
+         case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break;
+         default:            ShouldNotReachHere(); break;
+       }
+     }
+   } else {
+     Register Rdst = dst->as_register_lo();
+     if (right->is_constant()) {
+       switch (code) {
+         case lir_logic_and:
+           if (Assembler::is_uimm(right->as_jlong(), 12)) {
+             __ andi(Rdst, Rleft, right->as_jlong());
+           } else {
+             // We can guarantee that transform from HIR LogicOp is in range of
+             // uimm(12), but the common code directly generates LIR LogicAnd,
+             // and the right-operand is mask with all ones in the high bits.
+             __ li(AT, right->as_jlong());
+             __ AND(Rdst, Rleft, AT);
+           }
+           break;
+         case lir_logic_or:  __  ori(Rdst, Rleft, right->as_jlong()); break;
+         case lir_logic_xor: __ xori(Rdst, Rleft, right->as_jlong()); break;
+         default:            ShouldNotReachHere(); break;
+       }
+     } else {
+       Register Rright = right->is_single_cpu() ? right->as_register() : right->as_register_lo();
+       switch (code) {
+         case lir_logic_and: __ AND(Rdst, Rleft, Rright); break;
+         case lir_logic_or:  __  OR(Rdst, Rleft, Rright); break;
+         case lir_logic_xor: __ XOR(Rdst, Rleft, Rright); break;
+         default:            ShouldNotReachHere(); break;
+       }
+     }
+   }
+}
+
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
+                                    LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) {
+  // opcode check
+  assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
+  bool is_irem = (code == lir_irem);
+
+  // operand check
+  assert(left->is_single_cpu(), "left must be register");
+  assert(right->is_single_cpu() || right->is_constant(), "right must be register or constant");
+  assert(result->is_single_cpu(), "result must be register");
+  Register lreg = left->as_register();
+  Register dreg = result->as_register();
+
+  // power-of-2 constant check and codegen
+  if (right->is_constant()) {
+    int c = right->as_constant_ptr()->as_jint();
+    assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
+    if (is_irem) {
+      if (c == 1) {
+        // move 0 to dreg if divisor is 1
+        __ move(dreg, R0);
+      } else {
+        // use scr1/2 as intermediate result register
+        __ sub_w(SCR1, R0, lreg);
+        __ slt(SCR2, SCR1, R0);
+        __ andi(dreg, lreg, c - 1);
+        __ andi(SCR1, SCR1, c - 1);
+        __ sub_w(SCR1, R0, SCR1);
+        __ maskeqz(dreg, dreg, SCR2);
+        __ masknez(SCR1, SCR1, SCR2);
+        __ OR(dreg, dreg, SCR1);
+      }
+    } else {
+      if (c == 1) {
+        // move lreg to dreg if divisor is 1
+        __ move(dreg, lreg);
+      } else {
+        unsigned int shift = exact_log2(c);
+        // use scr1 as intermediate result register
+        __ srai_w(SCR1, lreg, 31);
+        __ srli_w(SCR1, SCR1, 32 - shift);
+        __ add_w(SCR1, lreg, SCR1);
+        __ srai_w(dreg, SCR1, shift);
+      }
+    }
+  } else {
+    Register rreg = right->as_register();
+    if (is_irem)
+      __ mod_w(dreg, lreg, rreg);
+    else
+      __ div_w(dreg, lreg, rreg);
+  }
+}
+
+void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
+  Unimplemented();
+}
+
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
+  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
+    bool is_unordered_less = (code == lir_ucmp_fd2i);
+    if (left->is_single_fpu()) {
+      if (is_unordered_less) {
+        __ fcmp_clt_s(FCC0, right->as_float_reg(), left->as_float_reg());
+        __ fcmp_cult_s(FCC1, left->as_float_reg(), right->as_float_reg());
+      } else {
+        __ fcmp_cult_s(FCC0, right->as_float_reg(), left->as_float_reg());
+        __ fcmp_clt_s(FCC1, left->as_float_reg(), right->as_float_reg());
+      }
+    } else if (left->is_double_fpu()) {
+      if (is_unordered_less) {
+        __ fcmp_clt_d(FCC0, right->as_double_reg(), left->as_double_reg());
+        __ fcmp_cult_d(FCC1, left->as_double_reg(), right->as_double_reg());
+      } else {
+        __ fcmp_cult_d(FCC0, right->as_double_reg(), left->as_double_reg());
+        __ fcmp_clt_d(FCC1, left->as_double_reg(), right->as_double_reg());
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+    __ movcf2gr(dst->as_register(), FCC0);
+    __ movcf2gr(SCR1, FCC1);
+    __ sub_d(dst->as_register(), dst->as_register(), SCR1);
+  } else if (code == lir_cmp_l2i) {
+    __ slt(SCR1, left->as_register_lo(), right->as_register_lo());
+    __ slt(dst->as_register(), right->as_register_lo(), left->as_register_lo());
+    __ sub_d(dst->as_register(), dst->as_register(), SCR1);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void LIR_Assembler::align_call(LIR_Code code) {}
+
+void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
+  address call = __ trampoline_call(AddressLiteral(op->addr(), rtype));
+  if (call == NULL) {
+    bailout("trampoline stub overflow");
+    return;
+  }
+  add_call_info(code_offset(), op->info());
+}
+
+void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
+  address call = __ ic_call(op->addr());
+  if (call == NULL) {
+    bailout("trampoline stub overflow");
+    return;
+  }
+  add_call_info(code_offset(), op->info());
+}
+
+/* Currently, vtable-dispatch is only enabled for sparc platforms */
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
+  ShouldNotReachHere();
+}
+
+void LIR_Assembler::emit_static_call_stub() {
+  address call_pc = __ pc();
+  address stub = __ start_a_stub(call_stub_size);
+  if (stub == NULL) {
+    bailout("static call stub overflow");
+    return;
+  }
+
+  int start = __ offset();
+
+  __ relocate(static_stub_Relocation::spec(call_pc));
+
+  // Code stream for loading method may be changed.
+  __ ibar(0);
+
+  // Rmethod contains Method*, it should be relocated for GC
+  // static stub relocation also tags the Method* in the code-stream.
+  __ mov_metadata(Rmethod, NULL);
+  // This is recognized as unresolved by relocs/nativeInst/ic code
+  __ patchable_jump(__ pc());
+
+  assert(__ offset() - start <= call_stub_size, "stub too big");
+  __ end_a_stub();
+}
+
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
+  assert(exceptionOop->as_register() == A0, "must match");
+  assert(exceptionPC->as_register() == A1, "must match");
+
+  // exception object is not added to oop map by LinearScan
+  // (LinearScan assumes that no oops are in fixed registers)
+  info->add_register_oop(exceptionOop);
+  Runtime1::StubID unwind_id;
+
+  // get current pc information
+  // pc is only needed if the method has an exception handler, the unwind code does not need it.
+  if (compilation()->debug_info_recorder()->last_pc_offset() == __ offset()) {
+    // As no instructions have been generated yet for this LIR node it's
+    // possible that an oop map already exists for the current offset.
+    // In that case insert an dummy NOP here to ensure all oop map PCs
+    // are unique. See JDK-8237483.
+    __ nop();
+  }
+  Label L;
+  int pc_for_athrow_offset = __ offset();
+  __ bind(L);
+  __ lipc(exceptionPC->as_register(), L);
+  add_call_info(pc_for_athrow_offset, info); // for exception handler
+
+  __ verify_not_null_oop(A0);
+  // search an exception handler (A0: exception oop, A1: throwing pc)
+  if (compilation()->has_fpu_code()) {
+    unwind_id = Runtime1::handle_exception_id;
+  } else {
+    unwind_id = Runtime1::handle_exception_nofpu_id;
+  }
+  __ call(Runtime1::entry_for(unwind_id), relocInfo::runtime_call_type);
+
+  // FIXME: enough room for two byte trap   ????
+  __ nop();
+}
+
+void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
+  assert(exceptionOop->as_register() == A0, "must match");
+  __ b(_unwind_handler_entry);
+}
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
+  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
+  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
+
+  switch (left->type()) {
+    case T_INT: {
+      switch (code) {
+        case lir_shl:  __ sll_w(dreg, lreg, count->as_register()); break;
+        case lir_shr:  __ sra_w(dreg, lreg, count->as_register()); break;
+        case lir_ushr: __ srl_w(dreg, lreg, count->as_register()); break;
+        default:       ShouldNotReachHere(); break;
+      }
+      break;
+    case T_LONG:
+    case T_ADDRESS:
+    case T_OBJECT:
+      switch (code) {
+        case lir_shl:  __ sll_d(dreg, lreg, count->as_register()); break;
+        case lir_shr:  __ sra_d(dreg, lreg, count->as_register()); break;
+        case lir_ushr: __ srl_d(dreg, lreg, count->as_register()); break;
+        default:       ShouldNotReachHere(); break;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+}
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
+  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
+  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
+
+  switch (left->type()) {
+    case T_INT: {
+      switch (code) {
+        case lir_shl:  __ slli_w(dreg, lreg, count); break;
+        case lir_shr:  __ srai_w(dreg, lreg, count); break;
+        case lir_ushr: __ srli_w(dreg, lreg, count); break;
+        default:       ShouldNotReachHere(); break;
+      }
+      break;
+    case T_LONG:
+    case T_ADDRESS:
+    case T_OBJECT:
+      switch (code) {
+        case lir_shl:  __ slli_d(dreg, lreg, count); break;
+        case lir_shr:  __ srai_d(dreg, lreg, count); break;
+        case lir_ushr: __ srli_d(dreg, lreg, count); break;
+        default:       ShouldNotReachHere(); break;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+}
+
+void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) {
+  assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
+  int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
+  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ st_ptr(r, Address(SP, offset_from_sp_in_bytes));
+}
+
+void LIR_Assembler::store_parameter(jint c,     int offset_from_sp_in_words) {
+  assert(offset_from_sp_in_words >= 0, "invalid offset from sp");
+  int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord;
+  assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
+  __ li(SCR2, c);
+  __ st_ptr(SCR2, Address(SP, offset_from_sp_in_bytes));
+}
+
+void LIR_Assembler::store_parameter(jobject o,  int offset_from_sp_in_words) {
+  ShouldNotReachHere();
+}
+
+// This code replaces a call to arraycopy; no exception may
+// be thrown in this code, they must be thrown in the System.arraycopy
+// activation frame; we could save some checks if this would not be the case
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+  Register j_rarg0 = T0;
+  Register j_rarg1 = A0;
+  Register j_rarg2 = A1;
+  Register j_rarg3 = A2;
+  Register j_rarg4 = A3;
+
+  ciArrayKlass* default_type = op->expected_type();
+  Register src = op->src()->as_register();
+  Register dst = op->dst()->as_register();
+  Register src_pos = op->src_pos()->as_register();
+  Register dst_pos = op->dst_pos()->as_register();
+  Register length  = op->length()->as_register();
+  Register tmp = op->tmp()->as_register();
+
+  CodeStub* stub = op->stub();
+  int flags = op->flags();
+  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
+  if (is_reference_type(basic_type))
+    basic_type = T_OBJECT;
+
+  // if we don't know anything, just go through the generic arraycopy
+  if (default_type == NULL) {
+    Label done;
+    assert(src == T0 && src_pos == A0, "mismatch in calling convention");
+
+    // Save the arguments in case the generic arraycopy fails and we
+    // have to fall back to the JNI stub
+    __ st_ptr(dst, Address(SP, 0 * BytesPerWord));
+    __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
+    __ st_ptr(length, Address(SP, 2 * BytesPerWord));
+    __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord));
+    __ st_ptr(src, Address(SP, 4 * BytesPerWord));
+
+    address copyfunc_addr = StubRoutines::generic_arraycopy();
+
+    // FIXME: LA
+    if (copyfunc_addr == NULL) {
+      // Take a slow path for generic arraycopy.
+      __ b(*stub->entry());
+      __ bind(*stub->continuation());
+      return;
+    }
+
+    // The arguments are in java calling convention so we shift them
+    // to C convention
+    assert_different_registers(A0, j_rarg1, j_rarg2, j_rarg3, j_rarg4);
+    __ move(A0, j_rarg0);
+    assert_different_registers(A1, j_rarg2, j_rarg3, j_rarg4);
+    __ move(A1, j_rarg1);
+    assert_different_registers(A2, j_rarg3, j_rarg4);
+    __ move(A2, j_rarg2);
+    assert_different_registers(A3, j_rarg4);
+    __ move(A3, j_rarg3);
+    __ move(A4, j_rarg4);
+#ifndef PRODUCT
+    if (PrintC1Statistics) {
+      __ li(SCR2, (address)&Runtime1::_generic_arraycopystub_cnt);
+      __ increment(SCR2, 1);
+    }
+#endif
+    __ call(copyfunc_addr, relocInfo::runtime_call_type);
+
+    __ beqz(A0, *stub->continuation());
+
+    // Reload values from the stack so they are where the stub
+    // expects them.
+    __ ld_ptr(dst, Address(SP, 0 * BytesPerWord));
+    __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
+    __ ld_ptr(length, Address(SP, 2 * BytesPerWord));
+    __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord));
+    __ ld_ptr(src, Address(SP, 4 * BytesPerWord));
+
+    // A0 is -1^K where K == partial copied count
+    __ nor(SCR1, A0, R0);
+    __ slli_w(SCR1, SCR1, 0);
+    // adjust length down and src/end pos up by partial copied count
+    __ sub_w(length, length, SCR1);
+    __ add_w(src_pos, src_pos, SCR1);
+    __ add_w(dst_pos, dst_pos, SCR1);
+    __ b(*stub->entry());
+
+    __ bind(*stub->continuation());
+    return;
+  }
+
+  assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(),
+         "must be true at this point");
+
+  int elem_size = type2aelembytes(basic_type);
+  Address::ScaleFactor scale = Address::times(elem_size);
+
+  Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
+  Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
+  Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
+  Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
+
+  // test for NULL
+  if (flags & LIR_OpArrayCopy::src_null_check) {
+    __ beqz(src, *stub->entry());
+  }
+  if (flags & LIR_OpArrayCopy::dst_null_check) {
+    __ beqz(dst, *stub->entry());
+  }
+
+  // If the compiler was not able to prove that exact type of the source or the destination
+  // of the arraycopy is an array type, check at runtime if the source or the destination is
+  // an instance type.
+  if (flags & LIR_OpArrayCopy::type_check) {
+    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
+      __ load_klass(tmp, dst);
+      __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+      __ li(SCR2, Klass::_lh_neutral_value);
+      __ bge_far(SCR1, SCR2, *stub->entry(), true);
+    }
+
+    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
+      __ load_klass(tmp, src);
+      __ ld_w(SCR1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
+      __ li(SCR2, Klass::_lh_neutral_value);
+      __ bge_far(SCR1, SCR2, *stub->entry(), true);
+    }
+  }
+
+  // check if negative
+  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
+    __ blt_far(src_pos, R0, *stub->entry(), true);
+  }
+  if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
+    __ blt_far(dst_pos, R0, *stub->entry(), true);
+  }
+
+  if (flags & LIR_OpArrayCopy::length_positive_check) {
+    __ blt_far(length, R0, *stub->entry(), true);
+  }
+
+  if (flags & LIR_OpArrayCopy::src_range_check) {
+    __ add_w(tmp, src_pos, length);
+    __ ld_wu(SCR1, src_length_addr);
+    __ blt_far(SCR1, tmp, *stub->entry(), false);
+  }
+  if (flags & LIR_OpArrayCopy::dst_range_check) {
+    __ add_w(tmp, dst_pos, length);
+    __ ld_wu(SCR1, dst_length_addr);
+    __ blt_far(SCR1, tmp, *stub->entry(), false);
+  }
+
+  if (flags & LIR_OpArrayCopy::type_check) {
+    // We don't know the array types are compatible
+    if (basic_type != T_OBJECT) {
+      // Simple test for basic type arrays
+      if (UseCompressedClassPointers) {
+        __ ld_wu(tmp, src_klass_addr);
+        __ ld_wu(SCR1, dst_klass_addr);
+      } else {
+        __ ld_ptr(tmp, src_klass_addr);
+        __ ld_ptr(SCR1, dst_klass_addr);
+      }
+      __ bne_far(tmp, SCR1, *stub->entry());
+    } else {
+      // For object arrays, if src is a sub class of dst then we can
+      // safely do the copy.
+      Label cont, slow;
+
+      __ addi_d(SP, SP, -2 * wordSize);
+      __ st_ptr(dst, Address(SP, 0 * wordSize));
+      __ st_ptr(src, Address(SP, 1 * wordSize));
+
+      __ load_klass(src, src);
+      __ load_klass(dst, dst);
+
+      __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
+
+      __ addi_d(SP, SP, -2 * wordSize);
+      __ st_ptr(dst, Address(SP, 0 * wordSize));
+      __ st_ptr(src, Address(SP, 1 * wordSize));
+      __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
+      __ ld_ptr(dst, Address(SP, 0 * wordSize));
+      __ ld_ptr(src, Address(SP, 1 * wordSize));
+      __ addi_d(SP, SP, 2 * wordSize);
+
+      __ bnez(dst, cont);
+
+      __ bind(slow);
+      __ ld_ptr(dst, Address(SP, 0 * wordSize));
+      __ ld_ptr(src, Address(SP, 1 * wordSize));
+      __ addi_d(SP, SP, 2 * wordSize);
+
+      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+      if (copyfunc_addr != NULL) { // use stub if available
+        // src is not a sub class of dst so we have to do a
+        // per-element check.
+
+        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
+        if ((flags & mask) != mask) {
+          // Check that at least both of them object arrays.
+          assert(flags & mask, "one of the two should be known to be an object array");
+
+          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+            __ load_klass(tmp, src);
+          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+            __ load_klass(tmp, dst);
+          }
+          int lh_offset = in_bytes(Klass::layout_helper_offset());
+          Address klass_lh_addr(tmp, lh_offset);
+          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+          __ ld_w(SCR1, klass_lh_addr);
+          __ li(SCR2, objArray_lh);
+          __ XOR(SCR1, SCR1, SCR2);
+          __ bnez(SCR1, *stub->entry());
+        }
+
+        // Spill because stubs can use any register they like and it's
+        // easier to restore just those that we care about.
+        __ st_ptr(dst, Address(SP, 0 * BytesPerWord));
+        __ st_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
+        __ st_ptr(length, Address(SP, 2 * BytesPerWord));
+        __ st_ptr(src_pos, Address(SP, 3 * BytesPerWord));
+        __ st_ptr(src, Address(SP, 4 * BytesPerWord));
+
+        __ lea(A0, Address(src, src_pos, scale));
+        __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type));
+        assert_different_registers(A0, dst, dst_pos, length);
+        __ lea(A1, Address(dst, dst_pos, scale));
+        __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type));
+        assert_different_registers(A1, dst, length);
+        __ bstrpick_d(A2, length, 31, 0);
+        assert_different_registers(A2, dst);
+
+        __ load_klass(A4, dst);
+        __ ld_ptr(A4, Address(A4, ObjArrayKlass::element_klass_offset()));
+        __ ld_w(A3, Address(A4, Klass::super_check_offset_offset()));
+        __ call(copyfunc_addr, relocInfo::runtime_call_type);
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          Label failed;
+          __ bnez(A0, failed);
+          __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_cnt);
+          __ increment(SCR2, 1);
+          __ bind(failed);
+        }
+#endif
+
+        __ beqz(A0, *stub->continuation());
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          __ li(SCR2, (address)&Runtime1::_arraycopy_checkcast_attempt_cnt);
+          __ increment(SCR2, 1);
+        }
+#endif
+        assert_different_registers(dst, dst_pos, length, src_pos, src, A0, SCR1);
+
+        // Restore previously spilled arguments
+        __ ld_ptr(dst, Address(SP, 0 * BytesPerWord));
+        __ ld_ptr(dst_pos, Address(SP, 1 * BytesPerWord));
+        __ ld_ptr(length, Address(SP, 2 * BytesPerWord));
+        __ ld_ptr(src_pos, Address(SP, 3 * BytesPerWord));
+        __ ld_ptr(src, Address(SP, 4 * BytesPerWord));
+
+        // return value is -1^K where K is partial copied count
+        __ nor(SCR1, A0, R0);
+        __ slli_w(SCR1, SCR1, 0);
+        // adjust length down and src/end pos up by partial copied count
+        __ sub_w(length, length, SCR1);
+        __ add_w(src_pos, src_pos, SCR1);
+        __ add_w(dst_pos, dst_pos, SCR1);
+      }
+
+      __ b(*stub->entry());
+
+      __ bind(cont);
+      __ ld_ptr(dst, Address(SP, 0 * wordSize));
+      __ ld_ptr(src, Address(SP, 1 * wordSize));
+      __ addi_d(SP, SP, 2 * wordSize);
+    }
+  }
+
+#ifdef ASSERT
+  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
+    // Sanity check the known type with the incoming class.  For the
+    // primitive case the types must match exactly with src.klass and
+    // dst.klass each exactly matching the default type.  For the
+    // object array case, if no type check is needed then either the
+    // dst type is exactly the expected type and the src type is a
+    // subtype which we can't check or src is the same array as dst
+    // but not necessarily exactly of type default_type.
+    Label known_ok, halt;
+    __ mov_metadata(tmp, default_type->constant_encoding());
+    if (UseCompressedClassPointers) {
+      __ encode_klass_not_null(tmp);
+    }
+
+    if (basic_type != T_OBJECT) {
+
+      if (UseCompressedClassPointers) {
+        __ ld_wu(SCR1, dst_klass_addr);
+      } else {
+        __ ld_ptr(SCR1, dst_klass_addr);
+      }
+      __ bne(tmp, SCR1, halt);
+      if (UseCompressedClassPointers) {
+        __ ld_wu(SCR1, src_klass_addr);
+      } else {
+        __ ld_ptr(SCR1, src_klass_addr);
+      }
+      __ beq(tmp, SCR1, known_ok);
+    } else {
+      if (UseCompressedClassPointers) {
+        __ ld_wu(SCR1, dst_klass_addr);
+      } else {
+        __ ld_ptr(SCR1, dst_klass_addr);
+      }
+      __ beq(tmp, SCR1, known_ok);
+      __ beq(src, dst, known_ok);
+    }
+    __ bind(halt);
+    __ stop("incorrect type information in arraycopy");
+    __ bind(known_ok);
+  }
+#endif
+
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    __ li(SCR2, Runtime1::arraycopy_count_address(basic_type));
+    __ increment(SCR2, 1);
+  }
+#endif
+
+  __ lea(A0, Address(src, src_pos, scale));
+  __ addi_d(A0, A0, arrayOopDesc::base_offset_in_bytes(basic_type));
+  assert_different_registers(A0, dst, dst_pos, length);
+  __ lea(A1, Address(dst, dst_pos, scale));
+  __ addi_d(A1, A1, arrayOopDesc::base_offset_in_bytes(basic_type));
+  assert_different_registers(A1, length);
+  __ bstrpick_d(A2, length, 31, 0);
+
+  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
+  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
+  const char *name;
+  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
+
+  CodeBlob *cb = CodeCache::find_blob(entry);
+  if (cb) {
+    __ call(entry, relocInfo::runtime_call_type);
+  } else {
+    __ call_VM_leaf(entry, 3);
+  }
+
+  __ bind(*stub->continuation());
+}
+
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+  Register obj = op->obj_opr()->as_register(); // may not be an oop
+  Register hdr = op->hdr_opr()->as_register();
+  Register lock = op->lock_opr()->as_register();
+  if (!UseFastLocking) {
+    __ b(*op->stub()->entry());
+  } else if (op->code() == lir_lock) {
+    Register scratch = noreg;
+    if (UseBiasedLocking) {
+      scratch = op->scratch_opr()->as_register();
+    }
+    assert(BasicLock::displaced_header_offset_in_bytes() == 0,
+           "lock_reg must point to the displaced header");
+    // add debug info for NullPointerException only if one is possible
+    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
+    if (op->info() != NULL) {
+      add_debug_info_for_null_check(null_check_offset, op->info());
+    }
+    // done
+  } else if (op->code() == lir_unlock) {
+    assert(BasicLock::displaced_header_offset_in_bytes() == 0,
+           "lock_reg must point to the displaced header");
+    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
+  } else {
+    Unimplemented();
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+  ciMethod* method = op->profiled_method();
+  ciMethod* callee = op->profiled_callee();
+  int bci = op->profiled_bci();
+
+  // Update counter for all call types
+  ciMethodData* md = method->method_data_or_null();
+  assert(md != NULL, "Sanity");
+  ciProfileData* data = md->bci_to_data(bci);
+  assert(data != NULL && data->is_CounterData(), "need CounterData for calls");
+  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
+  Register mdo  = op->mdo()->as_register();
+  __ mov_metadata(mdo, md->constant_encoding());
+  Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
+  Bytecodes::Code bc = method->java_code_at_bci(bci);
+  const bool callee_is_static = callee->is_loaded() && callee->is_static();
+  // Perform additional virtual call profiling for invokevirtual and
+  // invokeinterface bytecodes
+  if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+      !callee_is_static &&  // required for optimized MH invokes
+      C1ProfileVirtualCalls) {
+    assert(op->recv()->is_single_cpu(), "recv must be allocated");
+    Register recv = op->recv()->as_register();
+    assert_different_registers(mdo, recv);
+    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
+    ciKlass* known_klass = op->known_holder();
+    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
+      // We know the type that will be seen at this call site; we can
+      // statically update the MethodData* rather than needing to do
+      // dynamic tests on the receiver type
+
+      // NOTE: we should probably put a lock around this search to
+      // avoid collisions by concurrent compilations
+      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
+      uint i;
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (known_klass->equals(receiver)) {
+          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+          __ ld_ptr(SCR2, data_addr);
+          __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
+          __ st_ptr(SCR2, data_addr);
+          return;
+        }
+      }
+
+      // Receiver type not found in profile data; select an empty slot
+
+      // Note that this is less efficient than it should be because it
+      // always does a write to the receiver part of the
+      // VirtualCallData rather than just the first time
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (receiver == NULL) {
+          Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
+          __ mov_metadata(SCR2, known_klass->constant_encoding());
+          __ lea(SCR1, recv_addr);
+          __ st_ptr(SCR2, SCR1, 0);
+          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
+          __ ld_ptr(SCR2, data_addr);
+          __ addi_d(SCR2, SCR1, DataLayout::counter_increment);
+          __ st_ptr(SCR2, data_addr);
+          return;
+        }
+      }
+    } else {
+      __ load_klass(recv, recv);
+      Label update_done;
+      type_profile_helper(mdo, md, data, recv, &update_done);
+      // Receiver did not match any saved receiver and there is no empty row for it.
+      // Increment total counter to indicate polymorphic case.
+      __ ld_ptr(SCR2, counter_addr);
+      __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
+      __ st_ptr(SCR2, counter_addr);
+
+      __ bind(update_done);
+    }
+  } else {
+    // Static call
+    __ ld_ptr(SCR2, counter_addr);
+    __ addi_d(SCR2, SCR2, DataLayout::counter_increment);
+    __ st_ptr(SCR2, counter_addr);
+  }
+}
+
+void LIR_Assembler::emit_delay(LIR_OpDelay*) {
+  Unimplemented();
+}
+
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
+  __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
+}
+
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+  assert(op->crc()->is_single_cpu(), "crc must be register");
+  assert(op->val()->is_single_cpu(), "byte value must be register");
+  assert(op->result_opr()->is_single_cpu(), "result must be register");
+  Register crc = op->crc()->as_register();
+  Register val = op->val()->as_register();
+  Register res = op->result_opr()->as_register();
+
+  assert_different_registers(val, crc, res);
+  __ li(res, StubRoutines::crc_table_addr());
+  __ nor(crc, crc, R0); // ~crc
+  __ update_byte_crc32(crc, val, res);
+  __ nor(res, crc, R0); // ~crc
+}
+
+void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
+  COMMENT("emit_profile_type {");
+  Register obj = op->obj()->as_register();
+  Register tmp = op->tmp()->as_pointer_register();
+  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
+  ciKlass* exact_klass = op->exact_klass();
+  intptr_t current_klass = op->current_klass();
+  bool not_null = op->not_null();
+  bool no_conflict = op->no_conflict();
+
+  Label update, next, none;
+
+  bool do_null = !not_null;
+  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
+  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
+
+  assert(do_null || do_update, "why are we here?");
+  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
+  assert(mdo_addr.base() != SCR1, "wrong register");
+
+  __ verify_oop(obj);
+
+  if (tmp != obj) {
+    __ move(tmp, obj);
+  }
+  if (do_null) {
+    __ bnez(tmp, update);
+    if (!TypeEntries::was_null_seen(current_klass)) {
+      __ ld_ptr(SCR2, mdo_addr);
+      __ ori(SCR2, SCR2, TypeEntries::null_seen);
+      __ st_ptr(SCR2, mdo_addr);
+    }
+    if (do_update) {
+#ifndef ASSERT
+      __ b(next);
+    }
+#else
+      __ b(next);
+    }
+  } else {
+    __ bnez(tmp, update);
+    __ stop("unexpected null obj");
+#endif
+  }
+
+  __ bind(update);
+
+  if (do_update) {
+#ifdef ASSERT
+    if (exact_klass != NULL) {
+      Label ok;
+      __ load_klass(tmp, tmp);
+      __ mov_metadata(SCR1, exact_klass->constant_encoding());
+      __ XOR(SCR1, tmp, SCR1);
+      __ beqz(SCR1, ok);
+      __ stop("exact klass and actual klass differ");
+      __ bind(ok);
+    }
+#endif
+    if (!no_conflict) {
+      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
+        if (exact_klass != NULL) {
+          __ mov_metadata(tmp, exact_klass->constant_encoding());
+        } else {
+          __ load_klass(tmp, tmp);
+        }
+
+        __ ld_ptr(SCR2, mdo_addr);
+        __ XOR(tmp, tmp, SCR2);
+        assert(TypeEntries::type_klass_mask == -4, "must be");
+        __ bstrpick_d(SCR1, tmp, 63, 2);
+        // klass seen before, nothing to do. The unknown bit may have been
+        // set already but no need to check.
+        __ beqz(SCR1, next);
+
+        __ andi(SCR1, tmp, TypeEntries::type_unknown);
+        __ bnez(SCR1, next); // already unknown. Nothing to do anymore.
+
+        if (TypeEntries::is_type_none(current_klass)) {
+          __ beqz(SCR2, none);
+          __ li(SCR1, (u1)TypeEntries::null_seen);
+          __ beq(SCR2, SCR1, none);
+          // There is a chance that the checks above (re-reading profiling
+          // data from memory) fail if another thread has just set the
+          // profiling to this obj's klass
+          membar_acquire();
+          __ ld_ptr(SCR2, mdo_addr);
+          __ XOR(tmp, tmp, SCR2);
+          assert(TypeEntries::type_klass_mask == -4, "must be");
+          __ bstrpick_d(SCR1, tmp, 63, 2);
+          __ beqz(SCR1, next);
+        }
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
+
+        __ ld_ptr(tmp, mdo_addr);
+        __ andi(SCR2, tmp, TypeEntries::type_unknown);
+        __ bnez(SCR2, next); // already unknown. Nothing to do anymore.
+      }
+
+      // different than before. Cannot keep accurate profile.
+      __ ld_ptr(SCR2, mdo_addr);
+      __ ori(SCR2, SCR2, TypeEntries::type_unknown);
+      __ st_ptr(SCR2, mdo_addr);
+
+      if (TypeEntries::is_type_none(current_klass)) {
+        __ b(next);
+
+        __ bind(none);
+        // first time here. Set profile type.
+        __ st_ptr(tmp, mdo_addr);
+      }
+    } else {
+      // There's a single possible klass at this profile point
+      assert(exact_klass != NULL, "should be");
+      if (TypeEntries::is_type_none(current_klass)) {
+        __ mov_metadata(tmp, exact_klass->constant_encoding());
+        __ ld_ptr(SCR2, mdo_addr);
+        __ XOR(tmp, tmp, SCR2);
+        assert(TypeEntries::type_klass_mask == -4, "must be");
+        __ bstrpick_d(SCR1, tmp, 63, 2);
+        __ beqz(SCR1, next);
+#ifdef ASSERT
+        {
+          Label ok;
+          __ ld_ptr(SCR1, mdo_addr);
+          __ beqz(SCR1, ok);
+          __ li(SCR2, (u1)TypeEntries::null_seen);
+          __ beq(SCR1, SCR2, ok);
+          // may have been set by another thread
+          membar_acquire();
+          __ mov_metadata(SCR1, exact_klass->constant_encoding());
+          __ ld_ptr(SCR2, mdo_addr);
+          __ XOR(SCR2, SCR1, SCR2);
+          assert(TypeEntries::type_mask == -2, "must be");
+          __ bstrpick_d(SCR2, SCR2, 63, 1);
+          __ beqz(SCR2, ok);
+
+          __ stop("unexpected profiling mismatch");
+          __ bind(ok);
+        }
+#endif
+        // first time here. Set profile type.
+        __ st_ptr(tmp, mdo_addr);
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
+
+        __ ld_ptr(tmp, mdo_addr);
+        __ andi(SCR1, tmp, TypeEntries::type_unknown);
+        __ bnez(SCR1, next); // already unknown. Nothing to do anymore.
+
+        __ ori(tmp, tmp, TypeEntries::type_unknown);
+        __ st_ptr(tmp, mdo_addr);
+        // FIXME: Write barrier needed here?
+      }
+    }
+
+    __ bind(next);
+  }
+  COMMENT("} emit_profile_type");
+}
+
+void LIR_Assembler::align_backward_branch_target() {}
+
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+  if (left->is_single_cpu()) {
+    assert(dest->is_single_cpu(), "expect single result reg");
+    __ sub_w(dest->as_register(), R0, left->as_register());
+  } else if (left->is_double_cpu()) {
+    assert(dest->is_double_cpu(), "expect double result reg");
+    __ sub_d(dest->as_register_lo(), R0, left->as_register_lo());
+  } else if (left->is_single_fpu()) {
+    assert(dest->is_single_fpu(), "expect single float result reg");
+    __ fneg_s(dest->as_float_reg(), left->as_float_reg());
+  } else {
+    assert(left->is_double_fpu(), "expect double float operand reg");
+    assert(dest->is_double_fpu(), "expect double float result reg");
+    __ fneg_d(dest->as_double_reg(), left->as_double_reg());
+  }
+}
+
+void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest) {
+  __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr()));
+}
+
+void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args,
+                            LIR_Opr tmp, CodeEmitInfo* info) {
+  assert(!tmp->is_valid(), "don't need temporary");
+  __ call(dest, relocInfo::runtime_call_type);
+  if (info != NULL) {
+    add_call_info_here(info);
+  }
+}
+
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type,
+                                     CodeEmitInfo* info) {
+  if (dest->is_address() || src->is_address()) {
+    move_op(src, dest, type, lir_patch_none, info,
+            /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+#ifdef ASSERT
+// emit run-time assertion
+void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
+  assert(op->code() == lir_assert, "must be");
+  Label ok;
+
+  if (op->in_opr1()->is_valid()) {
+    assert(op->in_opr2()->is_valid(), "both operands must be valid");
+    assert(op->in_opr1()->is_cpu_register() || op->in_opr2()->is_cpu_register(), "must be");
+    Register reg1 = as_reg(op->in_opr1());
+    Register reg2 = as_reg(op->in_opr2());
+    switch (op->condition()) {
+      case lir_cond_equal:        __  beq(reg1, reg2, ok); break;
+      case lir_cond_notEqual:     __  bne(reg1, reg2, ok); break;
+      case lir_cond_less:         __  blt(reg1, reg2, ok); break;
+      case lir_cond_lessEqual:    __  bge(reg2, reg1, ok); break;
+      case lir_cond_greaterEqual: __  bge(reg1, reg2, ok); break;
+      case lir_cond_greater:      __  blt(reg2, reg1, ok); break;
+      case lir_cond_belowEqual:   __ bgeu(reg2, reg1, ok); break;
+      case lir_cond_aboveEqual:   __ bgeu(reg1, reg2, ok); break;
+      default:                    ShouldNotReachHere();
+    }
+  } else {
+    assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
+    assert(op->condition() == lir_cond_always, "no other conditions allowed");
+  }
+  if (op->halt()) {
+    const char* str = __ code_string(op->msg());
+    __ stop(str);
+  } else {
+    breakpoint();
+  }
+  __ bind(ok);
+}
+#endif
+
+#ifndef PRODUCT
+#define COMMENT(x) do { __ block_comment(x); } while (0)
+#else
+#define COMMENT(x)
+#endif
+
+void LIR_Assembler::membar() {
+  COMMENT("membar");
+  __ membar(Assembler::AnyAny);
+}
+
+void LIR_Assembler::membar_acquire() {
+  __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore));
+}
+
+void LIR_Assembler::membar_release() {
+  __ membar(Assembler::Membar_mask_bits(Assembler::LoadStore|Assembler::StoreStore));
+}
+
+void LIR_Assembler::membar_loadload() {
+  __ membar(Assembler::LoadLoad);
+}
+
+void LIR_Assembler::membar_storestore() {
+  __ membar(MacroAssembler::StoreStore);
+}
+
+void LIR_Assembler::membar_loadstore() {
+  __ membar(MacroAssembler::LoadStore);
+}
+
+void LIR_Assembler::membar_storeload() {
+  __ membar(MacroAssembler::StoreLoad);
+}
+
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
+  __ move(result_reg->as_register(), TREG);
+}
+
+void LIR_Assembler::peephole(LIR_List *lir) {
+}
+
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data,
+                              LIR_Opr dest, LIR_Opr tmp_op) {
+  Address addr = as_Address(src->as_address_ptr());
+  BasicType type = src->type();
+  Register dst = as_reg(dest);
+  Register tmp = as_reg(tmp_op);
+  bool is_oop = is_reference_type(type);
+
+  if (Assembler::is_simm(addr.disp(), 12)) {
+    __ addi_d(tmp, addr.base(), addr.disp());
+  } else {
+    __ li(tmp, addr.disp());
+    __ add_d(tmp, addr.base(), tmp);
+  }
+  if (addr.index() != noreg) {
+    if (addr.scale() > Address::times_1)
+      __ alsl_d(tmp, addr.index(), tmp, addr.scale() - 1);
+    else
+      __ add_d(tmp, tmp, addr.index());
+  }
+
+  switch(type) {
+  case T_INT:
+    break;
+  case T_LONG:
+    break;
+  case T_OBJECT:
+  case T_ARRAY:
+    if (UseCompressedOops) {
+      // unsigned int
+    } else {
+      // long
+    }
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  if (code == lir_xadd) {
+    Register inc = noreg;
+    if (data->is_constant()) {
+      inc = SCR1;
+      __ li(inc, as_long(data));
+    } else {
+      inc = as_reg(data);
+    }
+    switch(type) {
+    case T_INT:
+      __ amadd_db_w(dst, inc, tmp);
+      break;
+    case T_LONG:
+      __ amadd_db_d(dst, inc, tmp);
+      break;
+    case T_OBJECT:
+    case T_ARRAY:
+      if (UseCompressedOops) {
+        __ amadd_db_w(dst, inc, tmp);
+        __ lu32i_d(dst, 0);
+      } else {
+        __ amadd_db_d(dst, inc, tmp);
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+  } else if (code == lir_xchg) {
+    Register obj = as_reg(data);
+    if (is_oop && UseCompressedOops) {
+      __ encode_heap_oop(SCR2, obj);
+      obj = SCR2;
+    }
+    switch(type) {
+    case T_INT:
+      __ amswap_db_w(dst, obj, tmp);
+      break;
+    case T_LONG:
+      __ amswap_db_d(dst, obj, tmp);
+      break;
+    case T_OBJECT:
+    case T_ARRAY:
+      if (UseCompressedOops) {
+        __ amswap_db_w(dst, obj, tmp);
+        __ lu32i_d(dst, 0);
+      } else {
+        __ amswap_db_d(dst, obj, tmp);
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    if (is_oop && UseCompressedOops) {
+      __ decode_heap_oop(dst);
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+#undef __
diff --git a/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp
new file mode 100644
index 00000000000..7cb15f689f5
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_LIRGenerator_loongarch_64.cpp
@@ -0,0 +1,1442 @@
+/*
+ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_LIRGenerator.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArray.hpp"
+#include "ci/ciObjArrayKlass.hpp"
+#include "ci/ciTypeArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_loongarch.inline.hpp"
+
+#ifdef ASSERT
+#define __ gen()->lir(__FILE__, __LINE__)->
+#else
+#define __ gen()->lir()->
+#endif
+
+// Item will be loaded into a byte register; Intel only
+void LIRItem::load_byte_item() {
+  load_item();
+}
+
+void LIRItem::load_nonconstant() {
+  LIR_Opr r = value()->operand();
+  if (r->is_constant()) {
+    _result = r;
+  } else {
+    load_item();
+  }
+}
+
+//--------------------------------------------------------------
+//               LIRGenerator
+//--------------------------------------------------------------
+
+LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::a0_oop_opr; }
+LIR_Opr LIRGenerator::exceptionPcOpr()  { return FrameMap::a1_opr; }
+LIR_Opr LIRGenerator::divInOpr()        { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
+LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::a0_opr; }
+LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
+
+LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
+  LIR_Opr opr;
+  switch (type->tag()) {
+    case intTag:    opr = FrameMap::a0_opr;          break;
+    case objectTag: opr = FrameMap::a0_oop_opr;      break;
+    case longTag:   opr = FrameMap::long0_opr;       break;
+    case floatTag:  opr = FrameMap::fpu0_float_opr;  break;
+    case doubleTag: opr = FrameMap::fpu0_double_opr; break;
+    case addressTag:
+    default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
+  }
+
+  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+  return opr;
+}
+
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+  LIR_Opr reg = new_register(T_INT);
+  set_vreg_flag(reg, LIRGenerator::byte_reg);
+  return reg;
+}
+
+//--------- loading items into registers --------------------------------
+
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
+  if (v->type()->as_IntConstant() != NULL) {
+    return v->type()->as_IntConstant()->value() == 0L;
+  } else if (v->type()->as_LongConstant() != NULL) {
+    return v->type()->as_LongConstant()->value() == 0L;
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+}
+
+bool LIRGenerator::can_inline_as_constant(Value v) const {
+  // FIXME: Just a guess
+  if (v->type()->as_IntConstant() != NULL) {
+    return Assembler::is_simm(v->type()->as_IntConstant()->value(), 12);
+  } else if (v->type()->as_LongConstant() != NULL) {
+    return v->type()->as_LongConstant()->value() == 0L;
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+}
+
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; }
+
+LIR_Opr LIRGenerator::safepoint_poll_register() {
+  return LIR_OprFact::illegalOpr;
+}
+
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
+                                            int shift, int disp, BasicType type) {
+  assert(base->is_register(), "must be");
+  intx large_disp = disp;
+
+  // accumulate fixed displacements
+  if (index->is_constant()) {
+    LIR_Const *constant = index->as_constant_ptr();
+    if (constant->type() == T_INT) {
+      large_disp += index->as_jint() << shift;
+    } else {
+      assert(constant->type() == T_LONG, "should be");
+      jlong c = index->as_jlong() << shift;
+      if ((jlong)((jint)c) == c) {
+        large_disp += c;
+        index = LIR_OprFact::illegalOpr;
+      } else {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ move(index, tmp);
+        index = tmp;
+        // apply shift and displacement below
+      }
+    }
+  }
+
+  if (index->is_register()) {
+    // apply the shift and accumulate the displacement
+    if (shift > 0) {
+      LIR_Opr tmp = new_pointer_register();
+      __ shift_left(index, shift, tmp);
+      index = tmp;
+    }
+    if (large_disp != 0) {
+      LIR_Opr tmp = new_pointer_register();
+      if (Assembler::is_simm(large_disp, 12)) {
+        __ add(index, LIR_OprFact::intptrConst(large_disp), tmp);
+        index = tmp;
+      } else {
+        __ move(LIR_OprFact::intptrConst(large_disp), tmp);
+        __ add(tmp, index, tmp);
+        index = tmp;
+      }
+      large_disp = 0;
+    }
+  } else if (large_disp != 0 && !Assembler::is_simm(large_disp, 12)) {
+    // index is illegal so replace it with the displacement loaded into a register
+    index = new_pointer_register();
+    __ move(LIR_OprFact::intptrConst(large_disp), index);
+    large_disp = 0;
+  }
+
+  // at this point we either have base + index or base + displacement
+  if (large_disp == 0 && index->is_register()) {
+    return new LIR_Address(base, index, type);
+  } else {
+    assert(Assembler::is_simm(large_disp, 12), "must be");
+    return new LIR_Address(base, large_disp, type);
+  }
+}
+
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, BasicType type, bool needs_card_mark) {
+  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
+  int elem_size = type2aelembytes(type);
+  int shift = exact_log2(elem_size);
+
+  LIR_Address* addr;
+  if (index_opr->is_constant()) {
+    addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type);
+  } else {
+    if (offset_in_bytes) {
+      LIR_Opr tmp = new_pointer_register();
+      __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp);
+      array_opr = tmp;
+      offset_in_bytes = 0;
+    }
+    addr =  new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), offset_in_bytes, type);
+  }
+  if (needs_card_mark) {
+    // This store will need a precise card mark, so go ahead and
+    // compute the full adddres instead of computing once for the
+    // store and again for the card mark.
+    LIR_Opr tmp = new_pointer_register();
+    __ leal(LIR_OprFact::address(addr), tmp);
+    return new LIR_Address(tmp, type);
+  } else {
+    return addr;
+  }
+}
+
+LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+  LIR_Opr r;
+  if (type == T_LONG) {
+    r = LIR_OprFact::longConst(x);
+    if (!Assembler::is_simm(x, 12)) {
+      LIR_Opr tmp = new_register(type);
+      __ move(r, tmp);
+      return tmp;
+    }
+  } else if (type == T_INT) {
+    r = LIR_OprFact::intConst(x);
+    if (!Assembler::is_simm(x, 12)) {
+      // This is all rather nasty.  We don't know whether our constant
+      // is required for a logical or an arithmetic operation, wo we
+      // don't know what the range of valid values is!!
+      LIR_Opr tmp = new_register(type);
+      __ move(r, tmp);
+      return tmp;
+    }
+  } else {
+    ShouldNotReachHere();
+    r = NULL;  // unreachable
+  }
+  return r;
+}
+
+void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
+  LIR_Opr pointer = new_pointer_register();
+  __ move(LIR_OprFact::intptrConst(counter), pointer);
+  LIR_Address* addr = new LIR_Address(pointer, type);
+  increment_counter(addr, step);
+}
+
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+  LIR_Opr imm = NULL;
+  switch(addr->type()) {
+  case T_INT:
+    imm = LIR_OprFact::intConst(step);
+    break;
+  case T_LONG:
+    imm = LIR_OprFact::longConst(step);
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  LIR_Opr reg = new_register(addr->type());
+  __ load(addr, reg);
+  __ add(reg, imm, reg);
+  __ store(reg, addr);
+}
+
+template<typename T>
+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base,
+                                      int disp, int c, T tgt, CodeEmitInfo* info) {
+  LIR_Opr reg = new_register(T_INT);
+  __ load(generate_address(base, disp, T_INT), reg, info);
+  __ cmp_branch(condition, reg, LIR_OprFact::intConst(c), T_INT, tgt);
+}
+
+// Explicit instantiation for all supported types.
+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+
+template<typename T>
+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
+                                      int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+  LIR_Opr reg1 = new_register(T_INT);
+  __ load(generate_address(base, disp, type), reg1, info);
+  __ cmp_branch(condition, reg, reg1, type, tgt);
+}
+
+// Explicit instantiation for all supported types.
+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+  if (is_power_of_2(c - 1)) {
+    __ shift_left(left, exact_log2(c - 1), tmp);
+    __ add(tmp, left, result);
+    return true;
+  } else if (is_power_of_2(c + 1)) {
+    __ shift_left(left, exact_log2(c + 1), tmp);
+    __ sub(tmp, left, result);
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
+  BasicType type = item->type();
+  __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type));
+}
+
+//----------------------------------------------------------------------
+//             visitor functions
+//----------------------------------------------------------------------
+
+void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+  assert(x->is_pinned(),"");
+  bool needs_range_check = x->compute_needs_range_check();
+  bool use_length = x->length() != NULL;
+  bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT;
+  bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL ||
+                                         !get_jobject_constant(x->value())->is_null_object() ||
+                                         x->should_profile());
+
+  LIRItem array(x->array(), this);
+  LIRItem index(x->index(), this);
+  LIRItem value(x->value(), this);
+  LIRItem length(this);
+
+  array.load_item();
+  index.load_nonconstant();
+
+  if (use_length && needs_range_check) {
+    length.set_instruction(x->length());
+    length.load_item();
+
+  }
+  if (needs_store_check || x->check_boolean()) {
+    value.load_item();
+  } else {
+    value.load_for_store(x->elt_type());
+  }
+
+  set_no_result(x);
+
+  // the CodeEmitInfo must be duplicated for each different
+  // LIR-instruction because spilling can occur anywhere between two
+  // instructions and so the debug information must be different
+  CodeEmitInfo* range_check_info = state_for(x);
+  CodeEmitInfo* null_check_info = NULL;
+  if (x->needs_null_check()) {
+    null_check_info = new CodeEmitInfo(range_check_info);
+  }
+
+  // emit array address setup early so it schedules better
+  // FIXME?  No harm in this on aarch64, and it might help
+  LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store);
+
+  if (GenerateRangeChecks && needs_range_check) {
+    if (use_length) {
+      __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), x->elt_type(), new RangeCheckStub(range_check_info, index.result()));
+    } else {
+      array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+      // range_check also does the null check
+      null_check_info = NULL;
+    }
+  }
+
+  if (GenerateArrayStoreCheck && needs_store_check) {
+    LIR_Opr tmp1 = new_register(objectType);
+    LIR_Opr tmp2 = new_register(objectType);
+    LIR_Opr tmp3 = new_register(objectType);
+
+    CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info);
+    __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info, x->profiled_method(), x->profiled_bci());
+  }
+
+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+    __ move(value.result(), array_addr, null_check_info);
+    // Seems to be a precise
+    post_barrier(LIR_OprFact::address(array_addr), value.result());
+  } else {
+    LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info);
+    __ move(result, array_addr, null_check_info);
+  }
+}
+
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+  assert(x->is_pinned(),"");
+  LIRItem obj(x->obj(), this);
+  obj.load_item();
+
+  set_no_result(x);
+
+  // "lock" stores the address of the monitor stack slot, so this is not an oop
+  LIR_Opr lock = new_register(T_INT);
+  // Need a scratch register for biased locking
+  LIR_Opr scratch = LIR_OprFact::illegalOpr;
+  if (UseBiasedLocking) {
+    scratch = new_register(T_INT);
+  }
+
+  CodeEmitInfo* info_for_exception = NULL;
+  if (x->needs_null_check()) {
+    info_for_exception = state_for(x);
+  }
+  // this CodeEmitInfo must not have the xhandlers because here the
+  // object is already locked (xhandlers expect object to be unlocked)
+  CodeEmitInfo* info = state_for(x, x->state(), true);
+  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
+                x->monitor_no(), info_for_exception, info);
+}
+
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
+  assert(x->is_pinned(),"");
+
+  LIRItem obj(x->obj(), this);
+  obj.dont_load_item();
+
+  LIR_Opr lock = new_register(T_INT);
+  LIR_Opr obj_temp = new_register(T_INT);
+  set_no_result(x);
+  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
+}
+
+void LIRGenerator::do_NegateOp(NegateOp* x) {
+  LIRItem from(x->x(), this);
+  from.load_item();
+  LIR_Opr result = rlock_result(x);
+  __ negate (from.result(), result);
+}
+
+// for  _fadd, _fmul, _fsub, _fdiv, _frem
+//      _dadd, _dmul, _dsub, _ddiv, _drem
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+  if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
+    // float remainder is implemented as a direct call into the runtime
+    LIRItem right(x->x(), this);
+    LIRItem left(x->y(), this);
+
+    BasicTypeList signature(2);
+    if (x->op() == Bytecodes::_frem) {
+      signature.append(T_FLOAT);
+      signature.append(T_FLOAT);
+    } else {
+      signature.append(T_DOUBLE);
+      signature.append(T_DOUBLE);
+    }
+    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+
+    const LIR_Opr result_reg = result_register_for(x->type());
+    left.load_item_force(cc->at(1));
+    right.load_item();
+
+    __ move(right.result(), cc->at(0));
+
+    address entry;
+    if (x->op() == Bytecodes::_frem) {
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
+    } else {
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
+    }
+
+    LIR_Opr result = rlock_result(x);
+    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
+    __ move(result_reg, result);
+    return;
+  }
+
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+  LIRItem* left_arg  = &left;
+  LIRItem* right_arg = &right;
+
+  // Always load right hand side.
+  right.load_item();
+
+  if (!left.is_register())
+    left.load_item();
+
+  LIR_Opr reg = rlock(x);
+
+  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
+
+  set_result(x, round_item(reg));
+}
+
+// for  _ladd, _lmul, _lsub, _ldiv, _lrem
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+  // missing test if instr is commutative and if we should swap
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+
+  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
+    left.load_item();
+    bool need_zero_check = true;
+    if (right.is_constant()) {
+      jlong c = right.get_jlong_constant();
+      // no need to do div-by-zero check if the divisor is a non-zero constant
+      if (c != 0) need_zero_check = false;
+      // do not load right if the divisor is a power-of-2 constant
+      if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) {
+        right.dont_load_item();
+      } else {
+        right.load_item();
+      }
+    } else {
+      right.load_item();
+    }
+    if (need_zero_check) {
+      CodeEmitInfo* info = state_for(x);
+      CodeStub* stub = new DivByZeroStub(info);
+      __ cmp_branch(lir_cond_equal, right.result(), LIR_OprFact::longConst(0), T_LONG, stub);
+    }
+
+    rlock_result(x);
+    switch (x->op()) {
+    case Bytecodes::_lrem:
+      __ rem (left.result(), right.result(), x->operand());
+      break;
+    case Bytecodes::_ldiv:
+      __ div (left.result(), right.result(), x->operand());
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  } else {
+    assert(x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub,
+           "expect lmul, ladd or lsub");
+    // add, sub, mul
+    left.load_item();
+    if (!right.is_register()) {
+      if (x->op() == Bytecodes::_lmul || !right.is_constant() ||
+          (x->op() == Bytecodes::_ladd && !Assembler::is_simm(right.get_jlong_constant(), 12)) ||
+          (x->op() == Bytecodes::_lsub && !Assembler::is_simm(-right.get_jlong_constant(), 12))) {
+        right.load_item();
+      } else { // add, sub
+        assert(x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub");
+        // don't load constants to save register
+        right.load_nonconstant();
+      }
+    }
+    rlock_result(x);
+    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
+  }
+}
+
+// for: _iadd, _imul, _isub, _idiv, _irem
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+  // Test if instr is commutative and if we should swap
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+  LIRItem* left_arg = &left;
+  LIRItem* right_arg = &right;
+  if (x->is_commutative() && left.is_stack() && right.is_register()) {
+    // swap them if left is real stack (or cached) and right is real register(not cached)
+    left_arg = &right;
+    right_arg = &left;
+  }
+
+  left_arg->load_item();
+
+  // do not need to load right, as we can handle stack and constants
+  if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
+    rlock_result(x);
+    bool need_zero_check = true;
+    if (right.is_constant()) {
+      jint c = right.get_jint_constant();
+      // no need to do div-by-zero check if the divisor is a non-zero constant
+      if (c != 0) need_zero_check = false;
+      // do not load right if the divisor is a power-of-2 constant
+      if (c > 0 && is_power_of_2(c) && Assembler::is_uimm(c - 1, 12)) {
+        right_arg->dont_load_item();
+      } else {
+        right_arg->load_item();
+      }
+    } else {
+      right_arg->load_item();
+    }
+    if (need_zero_check) {
+      CodeEmitInfo* info = state_for(x);
+      CodeStub* stub = new DivByZeroStub(info);
+      __ cmp_branch(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0), T_INT, stub);
+    }
+
+    LIR_Opr ill = LIR_OprFact::illegalOpr;
+    if (x->op() == Bytecodes::_irem) {
+      __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
+    } else if (x->op() == Bytecodes::_idiv) {
+      __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
+    }
+  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
+    if (right.is_constant() &&
+        ((x->op() == Bytecodes::_iadd && Assembler::is_simm(right.get_jint_constant(), 12)) ||
+         (x->op() == Bytecodes::_isub && Assembler::is_simm(-right.get_jint_constant(), 12)))) {
+      right.load_nonconstant();
+    } else {
+      right.load_item();
+    }
+    rlock_result(x);
+    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr);
+  } else {
+    assert (x->op() == Bytecodes::_imul, "expect imul");
+    if (right.is_constant()) {
+      jint c = right.get_jint_constant();
+      if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) {
+        right_arg->dont_load_item();
+      } else {
+        // Cannot use constant op.
+        right_arg->load_item();
+      }
+    } else {
+      right.load_item();
+    }
+    rlock_result(x);
+    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT));
+  }
+}
+
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
+  // when an operand with use count 1 is the left operand, then it is
+  // likely that no move for 2-operand-LIR-form is necessary
+  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
+    x->swap_operands();
+  }
+
+  ValueTag tag = x->type()->tag();
+  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
+  switch (tag) {
+    case floatTag:
+    case doubleTag: do_ArithmeticOp_FPU(x);  return;
+    case longTag:   do_ArithmeticOp_Long(x); return;
+    case intTag:    do_ArithmeticOp_Int(x);  return;
+    default:        ShouldNotReachHere();    return;
+  }
+}
+
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+
+  rlock_result(x);
+  if (right.is_constant()) {
+    right.dont_load_item();
+    int c;
+    switch (x->op()) {
+      case Bytecodes::_ishl:
+        c = right.get_jint_constant() & 0x1f;
+        __ shift_left(left.result(), c, x->operand());
+        break;
+      case Bytecodes::_ishr:
+        c = right.get_jint_constant() & 0x1f;
+        __ shift_right(left.result(), c, x->operand());
+        break;
+      case Bytecodes::_iushr:
+        c = right.get_jint_constant() & 0x1f;
+        __ unsigned_shift_right(left.result(), c, x->operand());
+        break;
+      case Bytecodes::_lshl:
+        c = right.get_jint_constant() & 0x3f;
+        __ shift_left(left.result(), c, x->operand());
+        break;
+      case Bytecodes::_lshr:
+        c = right.get_jint_constant() & 0x3f;
+        __ shift_right(left.result(), c, x->operand());
+        break;
+      case Bytecodes::_lushr:
+        c = right.get_jint_constant() & 0x3f;
+        __ unsigned_shift_right(left.result(), c, x->operand());
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  } else {
+    right.load_item();
+    LIR_Opr tmp = new_register(T_INT);
+    switch (x->op()) {
+    case Bytecodes::_ishl:
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
+      __ shift_left(left.result(), tmp, x->operand(), tmp);
+      break;
+    case Bytecodes::_ishr:
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
+      __ shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    case Bytecodes::_iushr:
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
+      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    case Bytecodes::_lshl:
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
+      __ shift_left(left.result(), tmp, x->operand(), tmp);
+      break;
+    case Bytecodes::_lshr:
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
+      __ shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    case Bytecodes::_lushr:
+      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
+      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+  }
+}
+
+// _iand, _land, _ior, _lor, _ixor, _lxor
+void LIRGenerator::do_LogicOp(LogicOp* x) {
+  LIRItem left(x->x(),  this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+
+  rlock_result(x);
+  if (right.is_constant()
+      && ((right.type()->tag() == intTag
+           && Assembler::is_uimm(right.get_jint_constant(), 12))
+          || (right.type()->tag() == longTag
+              && Assembler::is_uimm(right.get_jlong_constant(), 12)))) {
+    right.dont_load_item();
+  } else {
+    right.load_item();
+  }
+  switch (x->op()) {
+    case Bytecodes::_iand:
+    case Bytecodes::_land:
+      __ logical_and(left.result(), right.result(), x->operand()); break;
+    case Bytecodes::_ior:
+    case Bytecodes::_lor:
+      __ logical_or (left.result(), right.result(), x->operand()); break;
+    case Bytecodes::_ixor:
+    case Bytecodes::_lxor:
+      __ logical_xor(left.result(), right.result(), x->operand()); break;
+    default: Unimplemented();
+  }
+}
+
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
+void LIRGenerator::do_CompareOp(CompareOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+  ValueTag tag = x->x()->type()->tag();
+  if (tag == longTag) {
+    left.set_destroys_register();
+  }
+  left.load_item();
+  right.load_item();
+  LIR_Opr reg = rlock_result(x);
+
+  if (x->x()->type()->is_float_kind()) {
+    Bytecodes::Code code = x->op();
+    __ fcmp2int(left.result(), right.result(), reg,
+                (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+  } else if (x->x()->type()->tag() == longTag) {
+    __ lcmp2int(left.result(), right.result(), reg);
+  } else {
+    Unimplemented();
+  }
+}
+
+void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
+  LIRItem value(x->argument_at(0), this);
+  value.set_destroys_register();
+
+  LIR_Opr calc_result = rlock_result(x);
+  LIR_Opr result_reg = result_register_for(x->type());
+
+  CallingConvention* cc = NULL;
+
+  if (x->id() == vmIntrinsics::_dpow) {
+    LIRItem value1(x->argument_at(1), this);
+
+    value1.set_destroys_register();
+
+    BasicTypeList signature(2);
+    signature.append(T_DOUBLE);
+    signature.append(T_DOUBLE);
+    cc = frame_map()->c_calling_convention(&signature);
+    value.load_item_force(cc->at(0));
+    value1.load_item_force(cc->at(1));
+  } else {
+    BasicTypeList signature(1);
+    signature.append(T_DOUBLE);
+    cc = frame_map()->c_calling_convention(&signature);
+    value.load_item_force(cc->at(0));
+  }
+
+  switch (x->id()) {
+    case vmIntrinsics::_dexp:
+      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
+      break;
+    case vmIntrinsics::_dlog:
+      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
+      break;
+    case vmIntrinsics::_dlog10:
+      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
+      break;
+    case vmIntrinsics::_dpow:
+      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
+      break;
+    case vmIntrinsics::_dsin:
+      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args());
+      break;
+    case vmIntrinsics::_dcos:
+      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
+      break;
+    case vmIntrinsics::_dtan:
+      __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
+      break;
+    default:  ShouldNotReachHere();
+  }
+  __ move(result_reg, calc_result);
+}
+
+void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
+  assert(x->number_of_arguments() == 4, "wrong type");
+  LIRItem obj   (x->argument_at(0), this);  // object
+  LIRItem offset(x->argument_at(1), this);  // offset of field
+  LIRItem cmp   (x->argument_at(2), this);  // value to compare with field
+  LIRItem val   (x->argument_at(3), this);  // replace field with val if matches cmp
+
+  assert(obj.type()->tag() == objectTag, "invalid type");
+
+  // In 64bit the type can be long, sparc doesn't have this assert
+  // assert(offset.type()->tag() == intTag, "invalid type");
+
+  assert(cmp.type()->tag() == type->tag(), "invalid type");
+  assert(val.type()->tag() == type->tag(), "invalid type");
+
+  // get address of field
+  obj.load_item();
+  offset.load_nonconstant();
+  val.load_item();
+  cmp.load_item();
+
+  LIR_Address* a;
+  if(offset.result()->is_constant()) {
+    jlong c = offset.result()->as_jlong();
+    if ((jlong)((jint)c) == c) {
+      a = new LIR_Address(obj.result(),
+                          (jint)c,
+                          as_BasicType(type));
+    } else {
+      LIR_Opr tmp = new_register(T_LONG);
+      __ move(offset.result(), tmp);
+      a = new LIR_Address(obj.result(),
+                          tmp,
+                          as_BasicType(type));
+    }
+  } else {
+    a = new LIR_Address(obj.result(),
+                        offset.result(),
+                        LIR_Address::times_1,
+                        0,
+                        as_BasicType(type));
+  }
+  LIR_Opr addr = new_pointer_register();
+  __ leal(LIR_OprFact::address(a), addr);
+
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Do the pre-write barrier, if any.
+    pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+  }
+
+  LIR_Opr result = rlock_result(x);
+
+  LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
+  if (type == objectType)
+    __ cas_obj(addr, cmp.result(), val.result(), new_register(T_INT), new_register(T_INT),
+               result);
+  else if (type == intType)
+    __ cas_int(addr, cmp.result(), val.result(), ill, ill);
+  else if (type == longType)
+    __ cas_long(addr, cmp.result(), val.result(), ill, ill);
+  else {
+    ShouldNotReachHere();
+  }
+
+  __ move(FrameMap::scr1_opr, result);
+
+  if (type == objectType) {   // Write-barrier needed for Object fields.
+    // Seems to be precise
+    post_barrier(addr, val.result());
+  }
+}
+
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+  assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow),
+         "wrong type");
+  if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
+      x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
+      x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
+      x->id() == vmIntrinsics::_dlog10) {
+    do_LibmIntrinsic(x);
+    return;
+  }
+  switch (x->id()) {
+    case vmIntrinsics::_dabs:
+    case vmIntrinsics::_dsqrt: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+      LIRItem value(x->argument_at(0), this);
+      value.load_item();
+      LIR_Opr dst = rlock_result(x);
+
+      switch (x->id()) {
+        case vmIntrinsics::_dsqrt:
+          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+          break;
+        case vmIntrinsics::_dabs:
+          __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
+  Register j_rarg0 = RT0;
+  Register j_rarg1 = RA0;
+  Register j_rarg2 = RA1;
+  Register j_rarg3 = RA2;
+  Register j_rarg4 = RA3;
+  Register j_rarg5 = RA4;
+
+  assert(x->number_of_arguments() == 5, "wrong type");
+
+  // Make all state_for calls early since they can emit code
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem src(x->argument_at(0), this);
+  LIRItem src_pos(x->argument_at(1), this);
+  LIRItem dst(x->argument_at(2), this);
+  LIRItem dst_pos(x->argument_at(3), this);
+  LIRItem length(x->argument_at(4), this);
+
+  // operands for arraycopy must use fixed registers, otherwise
+  // LinearScan will fail allocation (because arraycopy always needs a
+  // call)
+
+  // The java calling convention will give us enough registers
+  // so that on the stub side the args will be perfect already.
+  // On the other slow/special case side we call C and the arg
+  // positions are not similar enough to pick one as the best.
+  // Also because the java calling convention is a "shifted" version
+  // of the C convention we can process the java args trivially into C
+  // args without worry of overwriting during the xfer
+
+  src.load_item_force     (FrameMap::as_oop_opr(j_rarg0));
+  src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
+  dst.load_item_force     (FrameMap::as_oop_opr(j_rarg2));
+  dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
+  length.load_item_force  (FrameMap::as_opr(j_rarg4));
+
+  LIR_Opr tmp =           FrameMap::as_opr(j_rarg5);
+
+  set_no_result(x);
+
+  int flags;
+  ciArrayKlass* expected_type;
+  arraycopy_helper(x, &flags, &expected_type);
+
+  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
+               length.result(), tmp, expected_type, flags, info); // does add_safepoint
+}
+
+void LIRGenerator::do_update_CRC32(Intrinsic* x) {
+  assert(UseCRC32Intrinsics, "why are we here?");
+  // Make all state_for calls early since they can emit code
+  LIR_Opr result = rlock_result(x);
+  int flags = 0;
+  switch (x->id()) {
+    case vmIntrinsics::_updateCRC32: {
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem val(x->argument_at(1), this);
+      // val is destroyed by update_crc32
+      val.set_destroys_register();
+      crc.load_item();
+      val.load_item();
+      __ update_crc32(crc.result(), val.result(), result);
+      break;
+    }
+    case vmIntrinsics::_updateBytesCRC32:
+    case vmIntrinsics::_updateByteBufferCRC32: {
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem len(x->argument_at(3), this);
+      buf.load_item();
+      off.load_nonconstant();
+
+      LIR_Opr index = off.result();
+      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+      if(off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+       offset += off.result()->as_jint();
+      }
+      LIR_Opr base_op = buf.result();
+
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+      }
+
+      if (offset) {
+        LIR_Opr tmp = new_pointer_register();
+        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
+        base_op = tmp;
+        offset = 0;
+      }
+
+      LIR_Address* a = new LIR_Address(base_op, index, LIR_Address::times_1, offset, T_BYTE);
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for(x->type());
+
+      LIR_Opr addr = new_pointer_register();
+      __ leal(LIR_OprFact::address(a), addr);
+
+      crc.load_item_force(cc->at(0));
+      __ move(addr, cc->at(1));
+      len.load_item_force(cc->at(2));
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
+      __ move(result_reg, result);
+
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
+}
+
+// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
+// _i2b, _i2c, _i2s
+void LIRGenerator::do_Convert(Convert* x) {
+  LIRItem value(x->value(), this);
+  value.load_item();
+  LIR_Opr input = value.result();
+  LIR_Opr result = rlock(x);
+
+  // arguments of lir_convert
+  LIR_Opr conv_input = input;
+  LIR_Opr conv_result = result;
+
+  switch (x->op()) {
+    case Bytecodes::_f2i:
+    case Bytecodes::_f2l:
+      __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_FLOAT));
+      break;
+    case Bytecodes::_d2i:
+    case Bytecodes::_d2l:
+      __ convert(x->op(), conv_input, conv_result, NULL, new_register(T_DOUBLE));
+      break;
+    default:
+      __ convert(x->op(), conv_input, conv_result);
+      break;
+  }
+
+  assert(result->is_virtual(), "result must be virtual register");
+  set_result(x, result);
+}
+
+void LIRGenerator::do_NewInstance(NewInstance* x) {
+#ifndef PRODUCT
+  if (PrintNotLoaded && !x->klass()->is_loaded()) {
+    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
+  }
+#endif
+  CodeEmitInfo* info = state_for(x, x->state());
+  LIR_Opr reg = result_register_for(x->type());
+  new_instance(reg, x->klass(), x->is_unresolved(),
+                       FrameMap::t0_oop_opr,
+                       FrameMap::t1_oop_opr,
+                       FrameMap::a4_oop_opr,
+                       LIR_OprFact::illegalOpr,
+                       FrameMap::a3_metadata_opr, info);
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem length(x->length(), this);
+  length.load_item_force(FrameMap::s0_opr);
+
+  LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr tmp1 = FrameMap::t0_oop_opr;
+  LIR_Opr tmp2 = FrameMap::t1_oop_opr;
+  LIR_Opr tmp3 = FrameMap::a5_oop_opr;
+  LIR_Opr tmp4 = reg;
+  LIR_Opr klass_reg = FrameMap::a3_metadata_opr;
+  LIR_Opr len = length.result();
+  BasicType elem_type = x->elt_type();
+
+  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
+
+  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
+  LIRItem length(x->length(), this);
+  // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
+  // and therefore provide the state before the parameters have been consumed
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info =  state_for(x, x->state_before());
+  }
+
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr tmp1 = FrameMap::t0_oop_opr;
+  LIR_Opr tmp2 = FrameMap::t1_oop_opr;
+  LIR_Opr tmp3 = FrameMap::a5_oop_opr;
+  LIR_Opr tmp4 = reg;
+  LIR_Opr klass_reg = FrameMap::a3_metadata_opr;
+
+  length.load_item_force(FrameMap::s0_opr);
+  LIR_Opr len = length.result();
+
+  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+  ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
+  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
+    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
+  }
+  klass2reg_with_patching(klass_reg, obj, patching_info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
+  Values* dims = x->dims();
+  int i = dims->length();
+  LIRItemList* items = new LIRItemList(i, NULL);
+  while (i-- > 0) {
+    LIRItem* size = new LIRItem(dims->at(i), this);
+    items->at_put(i, size);
+  }
+
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for(x, x->state_before());
+
+    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
+    // clone all handlers (NOTE: Usually this is handled transparently
+    // by the CodeEmitInfo cloning logic in CodeStub constructors but
+    // is done explicitly here because a stub isn't being used).
+    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
+  }
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  i = dims->length();
+  while (i-- > 0) {
+    LIRItem* size = items->at(i);
+    size->load_item();
+
+    store_stack_parameter(size->result(), in_ByteSize(i*4));
+  }
+
+  LIR_Opr klass_reg = FrameMap::a0_metadata_opr;
+  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
+
+  LIR_Opr rank = FrameMap::s0_opr;
+  __ move(LIR_OprFact::intConst(x->rank()), rank);
+  LIR_Opr varargs = FrameMap::a2_opr;
+  __ move(FrameMap::sp_opr, varargs);
+  LIR_OprList* args = new LIR_OprList(3);
+  args->append(klass_reg);
+  args->append(rank);
+  args->append(varargs);
+  LIR_Opr reg = result_register_for(x->type());
+  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+                  LIR_OprFact::illegalOpr,
+                  reg, args, info);
+
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
+  // nothing to do for now
+}
+
+void LIRGenerator::do_CheckCast(CheckCast* x) {
+  LIRItem obj(x->obj(), this);
+
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() ||
+      (PatchALot && !x->is_incompatible_class_change_check() &&
+       !x->is_invokespecial_receiver_check())) {
+    // must do this before locking the destination register as an oop register,
+    // and before the obj is loaded (the latter is for deoptimization)
+    patching_info = state_for(x, x->state_before());
+  }
+  obj.load_item();
+
+  // info for exceptions
+  CodeEmitInfo* info_for_exception =
+      (x->needs_exception_state() ? state_for(x) :
+                                    state_for(x, x->state_before(), true /*ignore_xhandler*/));
+
+  CodeStub* stub;
+  if (x->is_incompatible_class_change_check()) {
+    assert(patching_info == NULL, "can't patch this");
+    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id,
+                                   LIR_OprFact::illegalOpr, info_for_exception);
+  } else if (x->is_invokespecial_receiver_check()) {
+    assert(patching_info == NULL, "can't patch this");
+    stub = new DeoptimizeStub(info_for_exception);
+  } else {
+    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id,
+                                   obj.result(), info_for_exception);
+  }
+  LIR_Opr reg = rlock_result(x);
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
+    tmp3 = new_register(objectType);
+  }
+  __ checkcast(reg, obj.result(), x->klass(),
+               new_register(objectType), new_register(objectType), tmp3,
+               x->direct_compare(), info_for_exception, patching_info, stub,
+               x->profiled_method(), x->profiled_bci());
+}
+
+void LIRGenerator::do_InstanceOf(InstanceOf* x) {
+  LIRItem obj(x->obj(), this);
+
+  // result and test object may not be in same register
+  LIR_Opr reg = rlock_result(x);
+  CodeEmitInfo* patching_info = NULL;
+  if ((!x->klass()->is_loaded() || PatchALot)) {
+    // must do this before locking the destination register as an oop register
+    patching_info = state_for(x, x->state_before());
+  }
+  obj.load_item();
+  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
+  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
+    tmp3 = new_register(objectType);
+  }
+  __ instanceof(reg, obj.result(), x->klass(),
+                new_register(objectType), new_register(objectType), tmp3,
+                x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
+}
+
+void LIRGenerator::do_If(If* x) {
+  assert(x->number_of_sux() == 2, "inconsistency");
+  ValueTag tag = x->x()->type()->tag();
+  bool is_safepoint = x->is_safepoint();
+
+  If::Condition cond = x->cond();
+
+  LIRItem xitem(x->x(), this);
+  LIRItem yitem(x->y(), this);
+  LIRItem* xin = &xitem;
+  LIRItem* yin = &yitem;
+
+  if (tag == longTag) {
+    // for longs, only conditions "eql", "neq", "lss", "geq" are valid;
+    // mirror for other conditions
+    if (cond == If::gtr || cond == If::leq) {
+      cond = Instruction::mirror(cond);
+      xin = &yitem;
+      yin = &xitem;
+    }
+    xin->set_destroys_register();
+  }
+  xin->load_item();
+
+  if (tag == longTag) {
+    if (yin->is_constant() && yin->get_jlong_constant() == 0) {
+      yin->dont_load_item();
+    } else {
+      yin->load_item();
+    }
+  } else if (tag == intTag) {
+    if (yin->is_constant() && yin->get_jint_constant() == 0)  {
+      yin->dont_load_item();
+    } else {
+      yin->load_item();
+    }
+  } else {
+    yin->load_item();
+  }
+
+  set_no_result(x);
+
+  LIR_Opr left = xin->result();
+  LIR_Opr right = yin->result();
+
+  // add safepoint before generating condition code so it can be recomputed
+  if (x->is_safepoint()) {
+    // increment backedge counter if needed
+    increment_backedge_counter(state_for(x, x->state_before()), x->profiled_bci());
+    __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
+  }
+
+  // Generate branch profiling. Profiling code doesn't kill flags.
+  profile_branch(x, cond, left, right);
+  move_to_phi(x->state());
+  if (x->x()->type()->is_float_kind()) {
+    __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux(), x->usux());
+  } else {
+    __ cmp_branch(lir_cond(cond), left, right, right->type(), x->tsux());
+  }
+  assert(x->default_sux() == x->fsux(), "wrong destination above");
+  __ jump(x->default_sux());
+}
+
+LIR_Opr LIRGenerator::getThreadPointer() {
+   return FrameMap::as_pointer_opr(TREG);
+}
+
+void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); }
+
+void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
+                                        CodeEmitInfo* info) {
+  __ volatile_store_mem_reg(value, address, info);
+}
+
+void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
+                                       CodeEmitInfo* info) {
+  // 8179954: We need to make sure that the code generated for
+  // volatile accesses forms a sequentially-consistent set of
+  // operations when combined with STLR and LDAR.  Without a leading
+  // membar it's possible for a simple Dekker test to fail if loads
+  // use LD;DMB but stores use STLR.  This can happen if C2 compiles
+  // the stores in one method and C1 compiles the loads in another.
+  __ membar();
+  __ volatile_load_mem_reg(address, result, info);
+}
+
+void LIRGenerator::get_Object_unsafe(LIR_Opr dst, LIR_Opr src, LIR_Opr offset,
+                                     BasicType type, bool is_volatile) {
+  LIR_Address* addr = new LIR_Address(src, offset, type);
+  __ load(addr, dst);
+}
+
+void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
+                                     BasicType type, bool is_volatile) {
+  LIR_Address* addr = new LIR_Address(src, offset, type);
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+  if (is_obj) {
+    // Do the pre-write barrier, if any.
+    pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+    __ move(data, addr);
+    assert(src->is_register(), "must be register");
+    // Seems to be a precise address
+    post_barrier(LIR_OprFact::address(addr), data);
+  } else {
+    __ move(data, addr);
+  }
+}
+
+void LIRGenerator::do_UnsafeGetAndSetObject(UnsafeGetAndSetObject* x) {
+  BasicType type = x->basic_type();
+  LIRItem src(x->object(), this);
+  LIRItem off(x->offset(), this);
+  LIRItem value(x->value(), this);
+
+  src.load_item();
+  off.load_nonconstant();
+
+  // We can cope with a constant increment in an xadd
+  if (! (x->is_add()
+         && value.is_constant()
+         && can_inline_as_constant(x->value()))) {
+    value.load_item();
+  }
+
+  LIR_Opr dst = rlock_result(x, type);
+  LIR_Opr data = value.result();
+  bool is_obj = (type == T_ARRAY || type == T_OBJECT);
+  LIR_Opr offset = off.result();
+
+  if (data == dst) {
+    LIR_Opr tmp = new_register(data->type());
+    __ move(data, tmp);
+    data = tmp;
+  }
+
+  LIR_Address* addr;
+  if (offset->is_constant()) {
+    jlong l = offset->as_jlong();
+    assert((jlong)((jint)l) == l, "offset too large for constant");
+    jint c = (jint)l;
+    addr = new LIR_Address(src.result(), c, type);
+  } else {
+    addr = new LIR_Address(src.result(), offset, type);
+  }
+
+  LIR_Opr tmp = new_register(T_INT);
+  LIR_Opr ptr = LIR_OprFact::illegalOpr;
+
+  if (x->is_add()) {
+    __ xadd(LIR_OprFact::address(addr), data, dst, tmp);
+  } else {
+    if (is_obj) {
+      // Do the pre-write barrier, if any.
+      ptr = new_pointer_register();
+      __ add(src.result(), off.result(), ptr);
+      pre_barrier(ptr, LIR_OprFact::illegalOpr /* pre_val */,
+                  true /* do_load */, false /* patch */, NULL);
+    }
+    __ xchg(LIR_OprFact::address(addr), data, dst, tmp);
+    if (is_obj) {
+      post_barrier(ptr, data);
+    }
+  }
+}
diff --git a/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp
new file mode 100644
index 00000000000..f15dacafeba
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch.hpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
+#define CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
+
+inline bool LinearScan::is_processed_reg_num(int reg_num) {
+  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
+}
+
+inline int LinearScan::num_physical_regs(BasicType type) {
+  return 1;
+}
+
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
+  return false;
+}
+
+inline bool LinearScan::is_caller_save(int assigned_reg) {
+  assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
+  if (assigned_reg < pd_first_callee_saved_reg)
+    return true;
+  if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg)
+    return true;
+  if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg)
+    return true;
+  return false;
+}
+
+inline void LinearScan::pd_add_temps(LIR_Op* op) {}
+
+// Implementation of LinearScanWalker
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
+  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
+    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
+    _first_reg = pd_first_callee_saved_reg;
+    _last_reg = pd_last_callee_saved_reg;
+    return true;
+  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT ||
+             cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
+    _first_reg = pd_first_cpu_reg;
+    _last_reg = pd_last_allocatable_cpu_reg;
+    return true;
+  }
+  return false;
+}
+
+#endif // CPU_LOONGARCH_C1_LINEARSCAN_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp
new file mode 100644
index 00000000000..219b2e3671c
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_LinearScan_loongarch_64.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LinearScan.hpp"
+#include "utilities/bitMap.inline.hpp"
+
+void LinearScan::allocate_fpu_stack() {
+  // No FPU stack on LoongArch64
+}
diff --git a/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp
new file mode 100644
index 00000000000..38ff4c58369
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch.hpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
+#define CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
+
+using MacroAssembler::build_frame;
+using MacroAssembler::null_check;
+
+// C1_MacroAssembler contains high-level macros for C1
+
+ private:
+  int _rsp_offset; // track rsp changes
+  // initialization
+  void pd_init() { _rsp_offset = 0; }
+
+ public:
+  void try_allocate(
+    Register obj,               // result: pointer to object after successful allocation
+    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes, // object size in bytes if   known at compile time
+    Register t1,                // temp register
+    Register t2,                // temp register
+    Label&   slow_case          // continuation point if fast allocation fails
+  );
+
+  void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
+  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1, Register t2);
+
+  // locking
+  // hdr     : must be A0, contents destroyed
+  // obj     : must point to the object to lock, contents preserved
+  // disp_hdr: must point to the displaced header location, contents preserved
+  // scratch : scratch register, contents destroyed
+  // returns code offset at which to add null check debug information
+  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
+
+  // unlocking
+  // hdr     : contents destroyed
+  // obj     : must point to the object to lock, contents preserved
+  // disp_hdr: must be A0 & must point to the displaced header location, contents destroyed
+  void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
+
+  void initialize_object(
+    Register obj,               // result: pointer to object after successful allocation
+    Register klass,             // object klass
+    Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes, // object size in bytes if   known at compile time
+    Register t1,                // temp register
+    Register t2,                // temp register
+    bool     is_tlab_allocated  // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
+  );
+
+  // allocation of fixed-size objects
+  // (can also be used to allocate fixed-size arrays, by setting
+  // hdr_size correctly and storing the array length afterwards)
+  // obj        : will contain pointer to allocated object
+  // t1, t2     : scratch registers - contents destroyed
+  // header_size: size of object header in words
+  // object_size: total size of object in words
+  // slow_case  : exit to slow case implementation if fast allocation fails
+  void allocate_object(Register obj, Register t1, Register t2, int header_size,
+                       int object_size, Register klass, Label& slow_case);
+
+  enum {
+    max_array_allocation_length = 0x00FFFFFF
+  };
+
+  // allocation of arrays
+  // obj        : will contain pointer to allocated object
+  // len        : array length in number of elements
+  // t          : scratch register - contents destroyed
+  // header_size: size of object header in words
+  // f          : element scale factor
+  // slow_case  : exit to slow case implementation if fast allocation fails
+  void allocate_array(Register obj, Register len, Register t, Register t2, int header_size,
+                      int f, Register klass, Label& slow_case);
+
+  int rsp_offset() const { return _rsp_offset; }
+  void set_rsp_offset(int n) { _rsp_offset = n; }
+
+  void invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2, bool inv_a3,
+                            bool inv_a4, bool inv_a5) PRODUCT_RETURN;
+
+  // This platform only uses signal-based null checks. The Label is not needed.
+  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
+
+  void load_parameter(int offset_in_words, Register reg);
+
+#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp
new file mode 100644
index 00000000000..b75126fba44
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_MacroAssembler_loongarch_64.cpp
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/arrayOop.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T4 RT4
+
+int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
+  const int aligned_mask = BytesPerWord -1;
+  const int hdr_offset = oopDesc::mark_offset_in_bytes();
+  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+  int null_check_offset = -1;
+  Label done;
+
+  verify_oop(obj);
+
+  // save object being locked into the BasicObjectLock
+  st_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+
+  if (UseBiasedLocking) {
+    assert(scratch != noreg, "should have scratch register at this point");
+    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
+  } else {
+    null_check_offset = offset();
+  }
+
+  // Load object header
+  ld_ptr(hdr, Address(obj, hdr_offset));
+  // and mark it as unlocked
+  ori(hdr, hdr, markOopDesc::unlocked_value);
+  // save unlocked object header into the displaced header location on the stack
+  st_ptr(hdr, Address(disp_hdr, 0));
+  // test if object header is still the same (i.e. unlocked), and if so, store the
+  // displaced header address in the object header - if it is not the same, get the
+  // object header instead
+  lea(SCR2, Address(obj, hdr_offset));
+  cmpxchg(Address(SCR2, 0), hdr, disp_hdr, SCR1, true, false, done);
+  // if the object header was the same, we're done
+  // if the object header was not the same, it is now in the hdr register
+  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
+  //
+  // 1) (hdr & aligned_mask) == 0
+  // 2) sp <= hdr
+  // 3) hdr <= sp + page_size
+  //
+  // these 3 tests can be done by evaluating the following expression:
+  //
+  // (hdr - sp) & (aligned_mask - page_size)
+  //
+  // assuming both the stack pointer and page_size have their least
+  // significant 2 bits cleared and page_size is a power of 2
+  sub_d(hdr, hdr, SP);
+  li(SCR1, aligned_mask - os::vm_page_size());
+  andr(hdr, hdr, SCR1);
+  // for recursive locking, the result is zero => save it in the displaced header
+  // location (NULL in the displaced hdr location indicates recursive locking)
+  st_ptr(hdr, Address(disp_hdr, 0));
+  // otherwise we don't care about the result and handle locking via runtime call
+  bnez(hdr, slow_case);
+  // done
+  bind(done);
+  return null_check_offset;
+}
+
+void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
+  const int aligned_mask = BytesPerWord -1;
+  const int hdr_offset = oopDesc::mark_offset_in_bytes();
+  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
+  Label done;
+
+  if (UseBiasedLocking) {
+    // load object
+    ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+    biased_locking_exit(obj, hdr, done);
+  }
+
+  // load displaced header
+  ld_ptr(hdr, Address(disp_hdr, 0));
+  // if the loaded hdr is NULL we had recursive locking
+  // if we had recursive locking, we are done
+  beqz(hdr, done);
+  if (!UseBiasedLocking) {
+    // load object
+    ld_ptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
+  }
+  verify_oop(obj);
+  // test if object header is pointing to the displaced header, and if so, restore
+  // the displaced header in the object - if the object header is not pointing to
+  // the displaced header, get the object header instead
+  // if the object header was not pointing to the displaced header,
+  // we do unlocking via runtime call
+  if (hdr_offset) {
+    lea(SCR1, Address(obj, hdr_offset));
+    cmpxchg(Address(SCR1, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case);
+  } else {
+    cmpxchg(Address(obj, 0), disp_hdr, hdr, SCR2, false, false, done, &slow_case);
+  }
+  // done
+  bind(done);
+}
+
+// Defines obj, preserves var_size_in_bytes
+void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes,
+                                     int con_size_in_bytes, Register t1, Register t2,
+                                     Label& slow_case) {
+  if (UseTLAB) {
+    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
+  } else {
+    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
+  }
+}
+
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len,
+                                          Register t1, Register t2) {
+  assert_different_registers(obj, klass, len);
+  if (UseBiasedLocking && !len->is_valid()) {
+    assert_different_registers(obj, klass, len, t1, t2);
+    ld_ptr(t1, Address(klass, Klass::prototype_header_offset()));
+  } else {
+    // This assumes that all prototype bits fit in an int32_t
+    li(t1, (int32_t)(intptr_t)markOopDesc::prototype());
+  }
+  st_ptr(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
+
+  if (UseCompressedClassPointers) { // Take care not to kill klass
+    encode_klass_not_null(t1, klass);
+    st_w(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
+  } else {
+    st_ptr(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
+  }
+
+  if (len->is_valid()) {
+    st_w(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
+  } else if (UseCompressedClassPointers) {
+    store_klass_gap(obj, R0);
+  }
+}
+
+// preserves obj, destroys len_in_bytes
+//
+// Scratch registers: t1 = T0, t2 = T1
+//
+void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes,
+                                        int hdr_size_in_bytes, Register t1, Register t2) {
+  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
+  assert(t1 == T0 && t2 == T1, "must be");
+  Label done;
+
+  // len_in_bytes is positive and ptr sized
+  addi_d(len_in_bytes, len_in_bytes, -hdr_size_in_bytes);
+  beqz(len_in_bytes, done);
+
+  // zero_words() takes ptr in t1 and count in bytes in t2
+  lea(t1, Address(obj, hdr_size_in_bytes));
+  addi_d(t2, len_in_bytes, -BytesPerWord);
+
+  Label loop;
+  bind(loop);
+  stx_d(R0, t1, t2);
+  addi_d(t2, t2, -BytesPerWord);
+  bge(t2, R0, loop);
+
+  bind(done);
+}
+
+void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size,
+                                        int object_size, Register klass, Label& slow_case) {
+  assert_different_registers(obj, t1, t2);
+  assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
+
+  try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
+
+  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
+}
+
+// Scratch registers: t1 = T0, t2 = T1
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes,
+                                          int con_size_in_bytes, Register t1, Register t2,
+                                          bool is_tlab_allocated) {
+  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
+         "con_size_in_bytes is not multiple of alignment");
+  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
+
+  initialize_header(obj, klass, noreg, t1, t2);
+
+  if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
+     // clear rest of allocated space
+     const Register index = t2;
+     if (var_size_in_bytes != noreg) {
+       move(index, var_size_in_bytes);
+       initialize_body(obj, index, hdr_size_in_bytes, t1, t2);
+     } else if (con_size_in_bytes > hdr_size_in_bytes) {
+       con_size_in_bytes -= hdr_size_in_bytes;
+       lea(t1, Address(obj, hdr_size_in_bytes));
+       Label loop;
+       li(SCR1, con_size_in_bytes - BytesPerWord);
+       bind(loop);
+       stx_d(R0, t1, SCR1);
+       addi_d(SCR1, SCR1, -BytesPerWord);
+       bge(SCR1, R0, loop);
+     }
+  }
+
+  membar(StoreStore);
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    assert(obj == A0, "must be");
+    call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type);
+  }
+
+  verify_oop(obj);
+}
+
+void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2,
+                                       int header_size, int f, Register klass, Label& slow_case) {
+  assert_different_registers(obj, len, t1, t2, klass);
+
+  // determine alignment mask
+  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
+
+  // check for negative or excessive length
+  li(SCR1, (int32_t)max_array_allocation_length);
+  bge_far(len, SCR1, slow_case, false);
+
+  const Register arr_size = t2; // okay to be the same
+  // align object end
+  li(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
+  slli_w(SCR1, len, f);
+  add_d(arr_size, arr_size, SCR1);
+  bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0);
+
+  try_allocate(obj, arr_size, 0, t1, t2, slow_case);
+
+  initialize_header(obj, klass, len, t1, t2);
+
+  // clear rest of allocated space
+  initialize_body(obj, arr_size, header_size * BytesPerWord, t1, t2);
+
+  membar(StoreStore);
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    assert(obj == A0, "must be");
+    call(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id), relocInfo::runtime_call_type);
+  }
+
+  verify_oop(obj);
+}
+
+void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
+  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
+  // Make sure there is enough stack space for this method's activation.
+  // Note that we do this before creating a frame.
+  generate_stack_overflow_check(bang_size_in_bytes);
+  MacroAssembler::build_frame(framesize);
+}
+
+void C1_MacroAssembler::remove_frame(int framesize) {
+  MacroAssembler::remove_frame(framesize);
+}
+
+void C1_MacroAssembler::verified_entry() {
+  // If we have to make this method not-entrant we'll overwrite its
+  // first instruction with a jump.  For this action to be legal we
+  // must ensure that this first instruction is a b, bl, nop, break.
+  // Make it a NOP.
+  nop();
+}
+
+void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
+  // rbp, + 0: link
+  //      + 1: return address
+  //      + 2: argument with offset 0
+  //      + 3: argument with offset 1
+  //      + 4: ...
+
+  ld_ptr(reg, Address(FP, (offset_in_words + 2) * BytesPerWord));
+}
+
+#ifndef PRODUCT
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
+  if (!VerifyOops) return;
+  verify_oop_addr(Address(SP, stack_offset), "oop");
+}
+
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
+  if (!VerifyOops) return;
+  Label not_null;
+  bnez(r, not_null);
+  stop("non-null oop required");
+  bind(not_null);
+  verify_oop(r);
+}
+
+void C1_MacroAssembler::invalidate_registers(bool inv_a0, bool inv_s0, bool inv_a2,
+                                             bool inv_a3, bool inv_a4, bool inv_a5) {
+#ifdef ASSERT
+  static int nn;
+  if (inv_a0) li(A0, 0xDEAD);
+  if (inv_s0) li(S0, 0xDEAD);
+  if (inv_a2) li(A2, nn++);
+  if (inv_a3) li(A3, 0xDEAD);
+  if (inv_a4) li(A4, 0xDEAD);
+  if (inv_a5) li(A5, 0xDEAD);
+#endif
+}
+#endif // ifndef PRODUCT
diff --git a/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp
new file mode 100644
index 00000000000..a750dca323b
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_Runtime1_loongarch_64.cpp
@@ -0,0 +1,1252 @@
+/*
+ * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_Defs.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "compiler/disassembler.hpp"
+#include "compiler/oopMap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/universe.hpp"
+#include "nativeInst_loongarch.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "register_loongarch.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframe.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_loongarch.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T5 RT5
+#define T6 RT6
+#define T8 RT8
+
+// Implementation of StubAssembler
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
+  // setup registers
+  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result,
+         "registers must be different");
+  assert(oop_result1 != TREG && metadata_result != TREG, "registers must be different");
+  assert(args_size >= 0, "illegal args_size");
+  bool align_stack = false;
+
+  move(A0, TREG);
+  set_num_rt_args(0); // Nothing on stack
+
+  Label retaddr;
+  set_last_Java_frame(SP, FP, retaddr);
+
+  // do the call
+  call(entry, relocInfo::runtime_call_type);
+  bind(retaddr);
+  int call_offset = offset();
+  // verify callee-saved register
+#ifdef ASSERT
+  { Label L;
+    get_thread(SCR1);
+    beq(TREG, SCR1, L);
+    stop("StubAssembler::call_RT: TREG not callee saved?");
+    bind(L);
+  }
+#endif
+  reset_last_Java_frame(true);
+
+  // check for pending exceptions
+  { Label L;
+    // check for pending exceptions (java_thread is set upon return)
+    ld_ptr(SCR1, Address(TREG, in_bytes(Thread::pending_exception_offset())));
+    beqz(SCR1, L);
+    // exception pending => remove activation and forward to exception handler
+    // make sure that the vm_results are cleared
+    if (oop_result1->is_valid()) {
+      st_ptr(R0, Address(TREG, JavaThread::vm_result_offset()));
+    }
+    if (metadata_result->is_valid()) {
+      st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset()));
+    }
+    if (frame_size() == no_frame_size) {
+      leave();
+      jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+    } else if (_stub_id == Runtime1::forward_exception_id) {
+      should_not_reach_here();
+    } else {
+      jmp(Runtime1::entry_for(Runtime1::forward_exception_id), relocInfo::runtime_call_type);
+    }
+    bind(L);
+  }
+  // get oop results if there are any and reset the values in the thread
+  if (oop_result1->is_valid()) {
+    get_vm_result(oop_result1, TREG);
+  }
+  if (metadata_result->is_valid()) {
+    get_vm_result_2(metadata_result, TREG);
+  }
+  return call_offset;
+}
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
+                           address entry, Register arg1) {
+  move(A1, arg1);
+  return call_RT(oop_result1, metadata_result, entry, 1);
+}
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
+                           address entry, Register arg1, Register arg2) {
+  if (A1 == arg2) {
+    if (A2 == arg1) {
+      move(SCR1, arg1);
+      move(arg1, arg2);
+      move(arg2, SCR1);
+    } else {
+      move(A2, arg2);
+      move(A1, arg1);
+    }
+  } else {
+    move(A1, arg1);
+    move(A2, arg2);
+  }
+  return call_RT(oop_result1, metadata_result, entry, 2);
+}
+
+int StubAssembler::call_RT(Register oop_result1, Register metadata_result,
+                           address entry, Register arg1, Register arg2, Register arg3) {
+  // if there is any conflict use the stack
+  if (arg1 == A2 || arg1 == A3 ||
+      arg2 == A1 || arg2 == A3 ||
+      arg3 == A1 || arg3 == A2) {
+    addi_d(SP, SP, -4 * wordSize);
+    st_ptr(arg1, Address(SP, 0 * wordSize));
+    st_ptr(arg2, Address(SP, 1 * wordSize));
+    st_ptr(arg3, Address(SP, 2 * wordSize));
+    ld_ptr(arg1, Address(SP, 0 * wordSize));
+    ld_ptr(arg2, Address(SP, 1 * wordSize));
+    ld_ptr(arg3, Address(SP, 2 * wordSize));
+    addi_d(SP, SP, 4 * wordSize);
+  } else {
+    move(A1, arg1);
+    move(A2, arg2);
+    move(A3, arg3);
+  }
+  return call_RT(oop_result1, metadata_result, entry, 3);
+}
+
+// Implementation of StubFrame
+
+class StubFrame: public StackObj {
+ private:
+  StubAssembler* _sasm;
+
+ public:
+  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
+  void load_argument(int offset_in_words, Register reg);
+
+  ~StubFrame();
+};;
+
+#define __ _sasm->
+
+StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
+  _sasm = sasm;
+  __ set_info(name, must_gc_arguments);
+  __ enter();
+}
+
+// load parameters that were stored with LIR_Assembler::store_parameter
+// Note: offsets for store_parameter and load_argument must match
+void StubFrame::load_argument(int offset_in_words, Register reg) {
+  __ load_parameter(offset_in_words, reg);
+}
+
+StubFrame::~StubFrame() {
+  __ leave();
+  __ jr(RA);
+}
+
+#undef __
+
+// Implementation of Runtime1
+
+#define __ sasm->
+
+const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
+
+// Stack layout for saving/restoring  all the registers needed during a runtime
+// call (this includes deoptimization)
+// Note: note that users of this frame may well have arguments to some runtime
+// while these values are on the stack. These positions neglect those arguments
+// but the code in save_live_registers will take the argument count into
+// account.
+//
+
+enum reg_save_layout {
+  reg_save_frame_size = 32 /* float */ + 30 /* integer, except zr, tp */
+};
+
+// Save off registers which might be killed by calls into the runtime.
+// Tries to smart of about FP registers.  In particular we separate
+// saving and describing the FPU registers for deoptimization since we
+// have to save the FPU registers twice if we describe them.  The
+// deopt blob is the only thing which needs to describe FPU registers.
+// In all other cases it should be sufficient to simply save their
+// current value.
+
+static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
+static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
+static int reg_save_size_in_words;
+static int frame_size_in_bytes = -1;
+
+static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
+  int frame_size_in_bytes = reg_save_frame_size * BytesPerWord;
+  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
+  int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size;
+  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
+
+  for (int i = A0->encoding(); i <= T8->encoding(); i++) {
+    Register r = as_Register(i);
+    if (i != SCR1->encoding() && i != SCR2->encoding()) {
+      int sp_offset = cpu_reg_save_offsets[i];
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
+    }
+  }
+
+  if (save_fpu_registers) {
+    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
+      FloatRegister r = as_FloatRegister(i);
+      int sp_offset = fpu_reg_save_offsets[i];
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
+    }
+  }
+
+  return oop_map;
+}
+
+static OopMap* save_live_registers(StubAssembler* sasm,
+                                   bool save_fpu_registers = true) {
+  __ block_comment("save_live_registers");
+
+  // integer registers except zr & ra & tp & sp
+  __ addi_d(SP, SP, -(32 - 4 + 32) * wordSize);
+
+  for (int i = 4; i < 32; i++)
+    __ st_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
+
+  if (save_fpu_registers) {
+    for (int i = 0; i < 32; i++)
+      __ fst_d(as_FloatRegister(i), Address(SP, i * wordSize));
+  }
+
+  return generate_oop_map(sasm, save_fpu_registers);
+}
+
+static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
+  if (restore_fpu_registers) {
+    for (int i = 0; i < 32; i ++)
+      __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize));
+  }
+
+  for (int i = 4; i < 32; i++)
+    __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
+
+  __ addi_d(SP, SP, (32 - 4 + 32) * wordSize);
+}
+
+static void restore_live_registers_except_a0(StubAssembler* sasm, bool restore_fpu_registers = true)  {
+  if (restore_fpu_registers) {
+    for (int i = 0; i < 32; i ++)
+      __ fld_d(as_FloatRegister(i), Address(SP, i * wordSize));
+  }
+
+  for (int i = 5; i < 32; i++)
+    __ ld_ptr(as_Register(i), Address(SP, (32 + i - 4) * wordSize));
+
+  __ addi_d(SP, SP, (32 - 4 + 32) * wordSize);
+}
+
+void Runtime1::initialize_pd() {
+  int sp_offset = 0;
+  int i;
+
+  // all float registers are saved explicitly
+  assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here");
+  for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
+    fpu_reg_save_offsets[i] = sp_offset;
+    sp_offset += 2; // SP offsets are in halfwords
+  }
+
+  for (i = 4; i < FrameMap::nof_cpu_regs; i++) {
+    Register r = as_Register(i);
+    cpu_reg_save_offsets[i] = sp_offset;
+    sp_offset += 2; // SP offsets are in halfwords
+  }
+}
+
+// target: the entry point of the method that creates and posts the exception oop
+// has_argument: true if the exception needs arguments (passed in SCR1 and SCR2)
+
+OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target,
+                                              bool has_argument) {
+  // make a frame and preserve the caller's caller-save registers
+  OopMap* oop_map = save_live_registers(sasm);
+  int call_offset;
+  if (!has_argument) {
+    call_offset = __ call_RT(noreg, noreg, target);
+  } else {
+    __ move(A1, SCR1);
+    __ move(A2, SCR2);
+    call_offset = __ call_RT(noreg, noreg, target);
+  }
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(call_offset, oop_map);
+  return oop_maps;
+}
+
+OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
+  __ block_comment("generate_handle_exception");
+
+  // incoming parameters
+  const Register exception_oop = A0;
+  const Register exception_pc  = A1;
+  // other registers used in this stub
+
+  // Save registers, if required.
+  OopMapSet* oop_maps = new OopMapSet();
+  OopMap* oop_map = NULL;
+  switch (id) {
+  case forward_exception_id:
+    // We're handling an exception in the context of a compiled frame.
+    // The registers have been saved in the standard places.  Perform
+    // an exception lookup in the caller and dispatch to the handler
+    // if found.  Otherwise unwind and dispatch to the callers
+    // exception handler.
+    oop_map = generate_oop_map(sasm, 1 /*thread*/);
+
+    // load and clear pending exception oop into A0
+    __ ld_ptr(exception_oop, Address(TREG, Thread::pending_exception_offset()));
+    __ st_ptr(R0, Address(TREG, Thread::pending_exception_offset()));
+
+    // load issuing PC (the return address for this stub) into A1
+    __ ld_ptr(exception_pc, Address(FP, 1 * BytesPerWord));
+
+    // make sure that the vm_results are cleared (may be unnecessary)
+    __ st_ptr(R0, Address(TREG, JavaThread::vm_result_offset()));
+    __ st_ptr(R0, Address(TREG, JavaThread::vm_result_2_offset()));
+    break;
+  case handle_exception_nofpu_id:
+  case handle_exception_id:
+    // At this point all registers MAY be live.
+    oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
+    break;
+  case handle_exception_from_callee_id: {
+    // At this point all registers except exception oop (A0) and
+    // exception pc (RA) are dead.
+    const int frame_size = 2 /*fp, return address*/;
+    oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
+    sasm->set_frame_size(frame_size);
+    break;
+  }
+  default: ShouldNotReachHere();
+  }
+
+  // verify that only A0 and A1 are valid at this time
+  __ invalidate_registers(false, true, true, true, true, true);
+  // verify that A0 contains a valid exception
+  __ verify_not_null_oop(exception_oop);
+
+#ifdef ASSERT
+  // check that fields in JavaThread for exception oop and issuing pc are
+  // empty before writing to them
+  Label oop_empty;
+  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset()));
+  __ beqz(SCR1, oop_empty);
+  __ stop("exception oop already set");
+  __ bind(oop_empty);
+
+  Label pc_empty;
+  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
+  __ beqz(SCR1, pc_empty);
+  __ stop("exception pc already set");
+  __ bind(pc_empty);
+#endif
+
+  // save exception oop and issuing pc into JavaThread
+  // (exception handler will load it from here)
+  __ st_ptr(exception_oop, Address(TREG, JavaThread::exception_oop_offset()));
+  __ st_ptr(exception_pc, Address(TREG, JavaThread::exception_pc_offset()));
+
+  // patch throwing pc into return address (has bci & oop map)
+  __ st_ptr(exception_pc, Address(FP, 1 * BytesPerWord));
+
+  // compute the exception handler.
+  // the exception oop and the throwing pc are read from the fields in JavaThread
+  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
+  oop_maps->add_gc_map(call_offset, oop_map);
+
+  // A0: handler address
+  //      will be the deopt blob if nmethod was deoptimized while we looked up
+  //      handler regardless of whether handler existed in the nmethod.
+
+  // only A0 is valid at this time, all other registers have been destroyed by the runtime call
+  __ invalidate_registers(false, true, true, true, true, true);
+
+  // patch the return address, this stub will directly return to the exception handler
+  __ st_ptr(A0, Address(FP, 1 * BytesPerWord));
+
+  switch (id) {
+    case forward_exception_id:
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      // Restore the registers that were saved at the beginning.
+      restore_live_registers(sasm, id != handle_exception_nofpu_id);
+      break;
+    case handle_exception_from_callee_id:
+      break;
+    default:  ShouldNotReachHere();
+  }
+
+  return oop_maps;
+}
+
+void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
+  // incoming parameters
+  const Register exception_oop = A0;
+  // callee-saved copy of exception_oop during runtime call
+  const Register exception_oop_callee_saved = S0;
+  // other registers used in this stub
+  const Register exception_pc = A1;
+  const Register handler_addr = A3;
+
+  // verify that only A0, is valid at this time
+  __ invalidate_registers(false, true, true, true, true, true);
+
+#ifdef ASSERT
+  // check that fields in JavaThread for exception oop and issuing pc are empty
+  Label oop_empty;
+  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_oop_offset()));
+  __ beqz(SCR1, oop_empty);
+  __ stop("exception oop must be empty");
+  __ bind(oop_empty);
+
+  Label pc_empty;
+  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
+  __ beqz(SCR1, pc_empty);
+  __ stop("exception pc must be empty");
+  __ bind(pc_empty);
+#endif
+
+  // Save our return address because
+  // exception_handler_for_return_address will destroy it.  We also
+  // save exception_oop
+  __ addi_d(SP, SP, -2 * wordSize);
+  __ st_ptr(RA, Address(SP, 0 * wordSize));
+  __ st_ptr(exception_oop, Address(SP, 1 * wordSize));
+
+  // search the exception handler address of the caller (using the return address)
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), TREG, RA);
+  // V0: exception handler address of the caller
+
+  // Only V0 is valid at this time; all other registers have been
+  // destroyed by the call.
+  __ invalidate_registers(false, true, true, true, false, true);
+
+  // move result of call into correct register
+  __ move(handler_addr, A0);
+
+  // get throwing pc (= return address).
+  // RA has been destroyed by the call
+  __ ld_ptr(RA, Address(SP, 0 * wordSize));
+  __ ld_ptr(exception_oop, Address(SP, 1 * wordSize));
+  __ addi_d(SP, SP, 2 * wordSize);
+  __ move(A1, RA);
+
+  __ verify_not_null_oop(exception_oop);
+
+  // continue at exception handler (return address removed)
+  // note: do *not* remove arguments when unwinding the
+  //       activation since the caller assumes having
+  //       all arguments on the stack when entering the
+  //       runtime to determine the exception handler
+  //       (GC happens at call site with arguments!)
+  // A0: exception oop
+  // A1: throwing pc
+  // A3: exception handler
+  __ jr(handler_addr);
+}
+
+OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
+  // use the maximum number of runtime-arguments here because it is difficult to
+  // distinguish each RT-Call.
+  // Note: This number affects also the RT-Call in generate_handle_exception because
+  //       the oop-map is shared for all calls.
+  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+  assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+  OopMap* oop_map = save_live_registers(sasm);
+
+  __ move(A0, TREG);
+  Label retaddr;
+  __ set_last_Java_frame(SP, FP, retaddr);
+  // do the call
+  __ call(target, relocInfo::runtime_call_type);
+  __ bind(retaddr);
+  OopMapSet* oop_maps = new OopMapSet();
+  oop_maps->add_gc_map(__ offset(), oop_map);
+  // verify callee-saved register
+#ifdef ASSERT
+  { Label L;
+    __ get_thread(SCR1);
+    __ beq(TREG, SCR1, L);
+    __ stop("StubAssembler::call_RT: rthread not callee saved?");
+    __ bind(L);
+  }
+#endif
+
+  __ reset_last_Java_frame(true);
+
+#ifdef ASSERT
+  // check that fields in JavaThread for exception oop and issuing pc are empty
+  Label oop_empty;
+  __ ld_ptr(SCR1, Address(TREG, Thread::pending_exception_offset()));
+  __ beqz(SCR1, oop_empty);
+  __ stop("exception oop must be empty");
+  __ bind(oop_empty);
+
+  Label pc_empty;
+  __ ld_ptr(SCR1, Address(TREG, JavaThread::exception_pc_offset()));
+  __ beqz(SCR1, pc_empty);
+  __ stop("exception pc must be empty");
+  __ bind(pc_empty);
+#endif
+
+  // Runtime will return true if the nmethod has been deoptimized, this is the
+  // expected scenario and anything else is  an error. Note that we maintain a
+  // check on the result purely as a defensive measure.
+  Label no_deopt;
+  __ beqz(A0, no_deopt); // Have we deoptimized?
+
+  // Perform a re-execute. The proper return  address is already on the stack,
+  // we just need  to restore registers, pop  all of our frame  but the return
+  // address and jump to the deopt blob.
+  restore_live_registers(sasm);
+  __ leave();
+  __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
+
+  __ bind(no_deopt);
+  restore_live_registers(sasm);
+  __ leave();
+  __ jr(RA);
+
+  return oop_maps;
+}
+
+OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
+  // for better readability
+  const bool must_gc_arguments = true;
+  const bool dont_gc_arguments = false;
+
+  // default value; overwritten for some optimized stubs that are called
+  // from methods that do not use the fpu
+  bool save_fpu_registers = true;
+
+  // stub code & info for the different stubs
+  OopMapSet* oop_maps = NULL;
+  OopMap* oop_map = NULL;
+  switch (id) {
+    {
+    case forward_exception_id:
+      {
+        oop_maps = generate_handle_exception(id, sasm);
+        __ leave();
+        __ jr(RA);
+      }
+      break;
+
+    case throw_div0_exception_id:
+      {
+        StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
+      }
+      break;
+
+    case throw_null_pointer_exception_id:
+      {
+        StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
+      }
+      break;
+
+    case new_instance_id:
+    case fast_new_instance_id:
+    case fast_new_instance_init_check_id:
+      {
+        Register klass = A3; // Incoming
+        Register obj   = A0; // Result
+
+        if (id == new_instance_id) {
+          __ set_info("new_instance", dont_gc_arguments);
+        } else if (id == fast_new_instance_id) {
+          __ set_info("fast new_instance", dont_gc_arguments);
+        } else {
+          assert(id == fast_new_instance_init_check_id, "bad StubID");
+          __ set_info("fast new_instance init check", dont_gc_arguments);
+        }
+
+        // If TLAB is disabled, see if there is support for inlining contiguous
+        // allocations.
+        // Otherwise, just go to the slow path.
+        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
+            !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
+          Label slow_path;
+          Register obj_size = S0;
+          Register t1       = T0;
+          Register t2       = T1;
+          assert_different_registers(klass, obj, obj_size, t1, t2);
+
+          __ addi_d(SP, SP, -2 * wordSize);
+          __ st_ptr(S0, Address(SP, 0));
+
+          if (id == fast_new_instance_init_check_id) {
+            // make sure the klass is initialized
+            __ ld_bu(SCR1, Address(klass, InstanceKlass::init_state_offset()));
+            __ li(SCR2, InstanceKlass::fully_initialized);
+            __ bne_far(SCR1, SCR2, slow_path);
+          }
+
+#ifdef ASSERT
+          // assert object can be fast path allocated
+          {
+            Label ok, not_ok;
+            __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset()));
+            __ bge(R0, obj_size, not_ok); // make sure it's an instance (LH > 0)
+            __ andi(SCR1, obj_size, Klass::_lh_instance_slow_path_bit);
+            __ beqz(SCR1, ok);
+            __ bind(not_ok);
+            __ stop("assert(can be fast path allocated)");
+            __ should_not_reach_here();
+            __ bind(ok);
+          }
+#endif // ASSERT
+
+          // get the instance size (size is postive so movl is fine for 64bit)
+          __ ld_w(obj_size, Address(klass, Klass::layout_helper_offset()));
+
+          __ eden_allocate(obj, obj_size, 0, t1, slow_path);
+
+          __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
+          __ verify_oop(obj);
+          __ ld_ptr(S0, Address(SP, 0));
+          __ addi_d(SP, SP, 2 * wordSize);
+          __ jr(RA);
+
+          __ bind(slow_path);
+          __ ld_ptr(S0, Address(SP, 0));
+          __ addi_d(SP, SP, 2 * wordSize);
+        }
+
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_a0(sasm);
+        __ verify_oop(obj);
+        __ leave();
+        __ jr(RA);
+
+        // A0,: new instance
+      }
+
+      break;
+
+    case counter_overflow_id:
+      {
+        Register bci = A0, method = A1;
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        // Retrieve bci
+        __ ld_w(bci, Address(FP, 2 * BytesPerWord));
+        // And a pointer to the Method*
+        __ ld_d(method, Address(FP, 3 * BytesPerWord));
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm);
+        __ leave();
+        __ jr(RA);
+      }
+      break;
+
+    case new_type_array_id:
+    case new_object_array_id:
+      {
+        Register length   = S0; // Incoming
+        Register klass    = A3; // Incoming
+        Register obj      = A0; // Result
+
+        if (id == new_type_array_id) {
+          __ set_info("new_type_array", dont_gc_arguments);
+        } else {
+          __ set_info("new_object_array", dont_gc_arguments);
+        }
+
+#ifdef ASSERT
+        // assert object type is really an array of the proper kind
+        {
+          Label ok;
+          Register t0 = obj;
+          __ ld_w(t0, Address(klass, Klass::layout_helper_offset()));
+          __ srai_w(t0, t0, Klass::_lh_array_tag_shift);
+          int tag = ((id == new_type_array_id)
+                     ? Klass::_lh_array_tag_type_value
+                     : Klass::_lh_array_tag_obj_value);
+          __ li(SCR1, tag);
+          __ beq(t0, SCR1, ok);
+          __ stop("assert(is an array klass)");
+          __ should_not_reach_here();
+          __ bind(ok);
+        }
+#endif // ASSERT
+
+        // If TLAB is disabled, see if there is support for inlining contiguous
+        // allocations.
+        // Otherwise, just go to the slow path.
+        if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
+          Register arr_size = A5;
+          Register t1       = T0;
+          Register t2       = T1;
+          Label slow_path;
+          assert_different_registers(length, klass, obj, arr_size, t1, t2);
+
+          // check that array length is small enough for fast path.
+          __ li(SCR1, C1_MacroAssembler::max_array_allocation_length);
+          __ blt_far(SCR1, length, slow_path, false);
+
+          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
+          // since size is positive ldrw does right thing on 64bit
+          __ ld_w(t1, Address(klass, Klass::layout_helper_offset()));
+          // since size is positive movw does right thing on 64bit
+          __ move(arr_size, length);
+          __ sll_w(arr_size, length, t1);
+          __ bstrpick_d(t1, t1, Klass::_lh_header_size_shift +
+                        exact_log2(Klass::_lh_header_size_mask + 1) - 1,
+                        Klass::_lh_header_size_shift);
+          __ add_d(arr_size, arr_size, t1);
+          __ addi_d(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
+          __ bstrins_d(arr_size, R0, exact_log2(MinObjAlignmentInBytesMask + 1) - 1, 0);
+
+          __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size
+
+          __ initialize_header(obj, klass, length, t1, t2);
+          __ ld_bu(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
+          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
+          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
+          __ andi(t1, t1, Klass::_lh_header_size_mask);
+          __ sub_d(arr_size, arr_size, t1); // body length
+          __ add_d(t1, t1, obj); // body start
+          __ initialize_body(t1, arr_size, 0, t1, t2);
+          __ membar(Assembler::StoreStore);
+          __ verify_oop(obj);
+
+          __ jr(RA);
+
+          __ bind(slow_path);
+        }
+
+        __ enter();
+        OopMap* map = save_live_registers(sasm);
+        int call_offset;
+        if (id == new_type_array_id) {
+          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
+        } else {
+          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
+        }
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_a0(sasm);
+
+        __ verify_oop(obj);
+        __ leave();
+        __ jr(RA);
+
+        // A0: new array
+      }
+      break;
+
+    case new_multi_array_id:
+      {
+        StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
+        // A0,: klass
+        // S0,: rank
+        // A2: address of 1st dimension
+        OopMap* map = save_live_registers(sasm);
+        __ move(A1, A0);
+        __ move(A3, A2);
+        __ move(A2, S0);
+        int call_offset = __ call_RT(A0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), A1, A2, A3);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers_except_a0(sasm);
+
+        // A0,: new multi array
+        __ verify_oop(A0);
+      }
+      break;
+
+    case register_finalizer_id:
+      {
+        __ set_info("register_finalizer", dont_gc_arguments);
+
+        // This is called via call_runtime so the arguments
+        // will be place in C abi locations
+
+        __ verify_oop(A0);
+
+        // load the klass and check the has finalizer flag
+        Label register_finalizer;
+        Register t = A5;
+        __ load_klass(t, A0);
+        __ ld_w(t, Address(t, Klass::access_flags_offset()));
+        __ li(SCR1, JVM_ACC_HAS_FINALIZER);
+        __ andr(SCR1, t, SCR1);
+        __ bnez(SCR1, register_finalizer);
+        __ jr(RA);
+
+        __ bind(register_finalizer);
+        __ enter();
+        OopMap* oop_map = save_live_registers(sasm);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), A0);
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+
+        // Now restore all the live registers
+        restore_live_registers(sasm);
+
+        __ leave();
+        __ jr(RA);
+      }
+      break;
+
+    case throw_class_cast_exception_id:
+      {
+        StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
+      }
+      break;
+
+    case throw_incompatible_class_change_error_id:
+      {
+        StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
+      }
+      break;
+
+    case slow_subtype_check_id:
+      {
+        // Typical calling sequence:
+        // __ push(klass_RInfo);  // object klass or other subclass
+        // __ push(sup_k_RInfo);  // array element klass or other superclass
+        // __ bl(slow_subtype_check);
+        // Note that the subclass is pushed first, and is therefore deepest.
+        enum layout {
+          a0_off, a0_off_hi,
+          a2_off, a2_off_hi,
+          a4_off, a4_off_hi,
+          a5_off, a5_off_hi,
+          sup_k_off, sup_k_off_hi,
+          klass_off, klass_off_hi,
+          framesize,
+          result_off = sup_k_off
+        };
+
+        __ set_info("slow_subtype_check", dont_gc_arguments);
+        __ addi_d(SP, SP, -4 * wordSize);
+        __ st_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
+        __ st_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
+        __ st_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
+        __ st_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
+
+        // This is called by pushing args and not with C abi
+        __ ld_ptr(A4, Address(SP, klass_off * VMRegImpl::stack_slot_size)); // subclass
+        __ ld_ptr(A0, Address(SP, sup_k_off * VMRegImpl::stack_slot_size)); // superclass
+
+        Label miss;
+        __ check_klass_subtype_slow_path(A4, A0, A2, A5, NULL, &miss);
+
+        // fallthrough on success:
+        __ li(SCR1, 1);
+        __ st_ptr(SCR1, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result
+        __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
+        __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
+        __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
+        __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
+        __ addi_d(SP, SP, 4 * wordSize);
+        __ jr(RA);
+
+        __ bind(miss);
+        __ st_ptr(R0, Address(SP, result_off * VMRegImpl::stack_slot_size)); // result
+        __ ld_ptr(A0, Address(SP, a0_off * VMRegImpl::stack_slot_size));
+        __ ld_ptr(A2, Address(SP, a2_off * VMRegImpl::stack_slot_size));
+        __ ld_ptr(A4, Address(SP, a4_off * VMRegImpl::stack_slot_size));
+        __ ld_ptr(A5, Address(SP, a5_off * VMRegImpl::stack_slot_size));
+        __ addi_d(SP, SP, 4 * wordSize);
+        __ jr(RA);
+      }
+      break;
+
+    case monitorenter_nofpu_id:
+      save_fpu_registers = false;
+      // fall through
+    case monitorenter_id:
+      {
+        StubFrame f(sasm, "monitorenter", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, save_fpu_registers);
+
+        // Called with store_parameter and not C abi
+
+        f.load_argument(1, A0); // A0,: object
+        f.load_argument(0, A1); // A1,: lock address
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), A0, A1);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm, save_fpu_registers);
+      }
+      break;
+
+    case monitorexit_nofpu_id:
+      save_fpu_registers = false;
+      // fall through
+    case monitorexit_id:
+      {
+        StubFrame f(sasm, "monitorexit", dont_gc_arguments);
+        OopMap* map = save_live_registers(sasm, save_fpu_registers);
+
+        // Called with store_parameter and not C abi
+
+        f.load_argument(0, A0); // A0,: lock address
+
+        // note: really a leaf routine but must setup last java sp
+        //       => use call_RT for now (speed can be improved by
+        //       doing last java sp setup manually)
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), A0);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm, save_fpu_registers);
+      }
+      break;
+
+    case deoptimize_id:
+      {
+        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
+        OopMap* oop_map = save_live_registers(sasm);
+        f.load_argument(0, A1);
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), A1);
+
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, oop_map);
+        restore_live_registers(sasm);
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+        __ leave();
+        __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
+      }
+      break;
+
+    case throw_range_check_failed_id:
+      {
+        StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
+      }
+      break;
+
+    case unwind_exception_id:
+      {
+        __ set_info("unwind_exception", dont_gc_arguments);
+        // note: no stubframe since we are about to leave the current
+        //       activation and we are calling a leaf VM function only.
+        generate_unwind_exception(sasm);
+      }
+      break;
+
+    case access_field_patching_id:
+      {
+        StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
+      }
+      break;
+
+    case load_klass_patching_id:
+      {
+        StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
+      }
+      break;
+
+    case load_mirror_patching_id:
+      {
+        StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
+      }
+      break;
+
+    case load_appendix_patching_id:
+      {
+        StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
+        // we should set up register map
+        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
+      }
+      break;
+
+    case handle_exception_nofpu_id:
+    case handle_exception_id:
+      {
+        StubFrame f(sasm, "handle_exception", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case handle_exception_from_callee_id:
+      {
+        StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
+        oop_maps = generate_handle_exception(id, sasm);
+      }
+      break;
+
+    case throw_index_exception_id:
+      {
+        StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
+      }
+      break;
+
+    case throw_array_store_exception_id:
+      {
+        StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
+        // tos + 0: link
+        //     + 1: return address
+        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
+      }
+      break;
+
+#if INCLUDE_ALL_GCS
+
+    case g1_pre_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
+        // arg0 : previous value of memory
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ li(A0, (int)id);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        const Register pre_val = A0;
+        const Register thread = TREG;
+        const Register tmp = SCR2;
+
+        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_active()));
+
+        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        Label done;
+        Label runtime;
+
+        // Can we store original value in the thread's buffer?
+        __ ld_ptr(tmp, queue_index);
+        __ beqz(tmp, runtime);
+
+        __ addi_d(tmp, tmp, -wordSize);
+        __ st_ptr(tmp, queue_index);
+        __ ld_ptr(SCR1, buffer);
+        __ add_d(tmp, tmp, SCR1);
+        f.load_argument(0, SCR1);
+        __ st_ptr(SCR1, Address(tmp, 0));
+        __ b(done);
+
+        __ bind(runtime);
+        __ pushad();
+        f.load_argument(0, pre_val);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+        __ popad();
+        __ bind(done);
+      }
+      break;
+    case g1_post_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
+
+        // arg0: store_address
+        Address store_addr(FP, 2*BytesPerWord);
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+        assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+        Label done;
+        Label runtime;
+
+        // At this point we know new_value is non-NULL and the new_value crosses regions.
+        // Must check to see if card is already dirty
+
+        const Register thread = TREG;
+
+        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        const Register card_offset = SCR2;
+        // RA is free here, so we can use it to hold the byte_map_base.
+        const Register byte_map_base = RA;
+
+        assert_different_registers(card_offset, byte_map_base, SCR1);
+
+        f.load_argument(0, card_offset);
+        __ srli_d(card_offset, card_offset, CardTableModRefBS::card_shift);
+        __ load_byte_map_base(byte_map_base);
+        __ ldx_bu(SCR1, byte_map_base, card_offset);
+        __ addi_d(SCR1, SCR1, -(int)G1SATBCardTableModRefBS::g1_young_card_val());
+        __ beqz(SCR1, done);
+
+        assert((int)CardTableModRefBS::dirty_card_val() == 0, "must be 0");
+
+        __ membar(Assembler::StoreLoad);
+        __ ldx_bu(SCR1, byte_map_base, card_offset);
+        __ beqz(SCR1, done);
+
+        // storing region crossing non-NULL, card is clean.
+        // dirty card and log.
+        __ stx_b(R0, byte_map_base, card_offset);
+
+        // Convert card offset into an address in card_addr
+        Register card_addr = card_offset;
+        __ add_d(card_addr, byte_map_base, card_addr);
+
+        __ ld_ptr(SCR1, queue_index);
+        __ beqz(SCR1, runtime);
+        __ addi_d(SCR1, SCR1, -wordSize);
+        __ st_ptr(SCR1, queue_index);
+
+        // Reuse RA to hold buffer_addr
+        const Register buffer_addr = RA;
+
+        __ ld_ptr(buffer_addr, buffer);
+        __ stx_d(card_addr, buffer_addr, SCR1);
+        __ b(done);
+
+        __ bind(runtime);
+        __ pushad();
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+        __ popad();
+        __ bind(done);
+
+      }
+      break;
+#endif
+
+    case predicate_failed_trap_id:
+      {
+        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
+
+        OopMap* map = save_live_registers(sasm);
+
+        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
+        oop_maps = new OopMapSet();
+        oop_maps->add_gc_map(call_offset, map);
+        restore_live_registers(sasm);
+        __ leave();
+        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
+        assert(deopt_blob != NULL, "deoptimization blob must have been created");
+
+        __ jmp(deopt_blob->unpack_with_reexecution(), relocInfo::runtime_call_type);
+      }
+      break;
+
+    case dtrace_object_alloc_id:
+      {
+        // A0: object
+        StubFrame f(sasm, "dtrace_object_alloc", dont_gc_arguments);
+        save_live_registers(sasm);
+
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), A0);
+
+        restore_live_registers(sasm);
+      }
+      break;
+
+    default:
+      {
+        StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
+        __ li(A0, (int)id);
+        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), A0);
+        __ should_not_reach_here();
+      }
+      break;
+    }
+  }
+  return oop_maps;
+}
+
+#undef __
+
+const char *Runtime1::pd_name_for_address(address entry) {
+  Unimplemented();
+  return 0;
+}
diff --git a/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp
new file mode 100644
index 00000000000..df052a058c8
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c1_globals_loongarch.hpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
+#define CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the client compiler.
+// (see c1_globals.hpp)
+
+#ifndef COMPILER2
+define_pd_global(bool, BackgroundCompilation,        true );
+define_pd_global(bool, InlineIntrinsics,             true );
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 false);
+define_pd_global(bool, UseOnStackReplacement,        true );
+define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(intx, CompileThreshold,             1500 );
+
+define_pd_global(intx, OnStackReplacePercentage,     933  );
+define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
+define_pd_global(intx, InitialCodeCacheSize,         160*K);
+define_pd_global(intx, ReservedCodeCacheSize,        32*M );
+define_pd_global(intx, NonProfiledCodeHeapSize,      13*M );
+define_pd_global(intx, ProfiledCodeHeapSize,         14*M );
+define_pd_global(intx, NonNMethodCodeHeapSize,       5*M  );
+define_pd_global(bool, ProfileInterpreter,           false);
+define_pd_global(intx, CodeCacheExpansionSize,       32*K );
+define_pd_global(uintx, CodeCacheMinBlockLength,     1);
+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+define_pd_global(bool, NeverActAsServerClassMachine, true );
+define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
+define_pd_global(bool, CICompileOSR,                 true );
+#endif // !COMPILER2
+define_pd_global(bool, UseTypeProfile,               false);
+define_pd_global(bool, RoundFPResults,               true );
+
+define_pd_global(bool, LIRFillDelaySlots,            false);
+define_pd_global(bool, OptimizeSinglePrecision,      true );
+define_pd_global(bool, CSEArrayLength,               false);
+define_pd_global(bool, TwoOperandLIRForm,            false );
+
+define_pd_global(intx, SafepointPollOffset,          0  );
+
+#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp
new file mode 100644
index 00000000000..044b0d2536d
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c2_globals_loongarch.hpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the server compiler.
+// (see c2_globals.hpp).  Alpha-sorted.
+define_pd_global(bool, BackgroundCompilation,        true);
+define_pd_global(bool, UseTLAB,                      true);
+define_pd_global(bool, ResizeTLAB,                   true);
+define_pd_global(bool, CICompileOSR,                 true);
+define_pd_global(bool, InlineIntrinsics,             true);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 true);
+define_pd_global(bool, UseOnStackReplacement,        true);
+#ifdef CC_INTERP
+define_pd_global(bool, ProfileInterpreter,           false);
+#else
+define_pd_global(bool, ProfileInterpreter,           true);
+#endif // CC_INTERP
+define_pd_global(bool, TieredCompilation,            true);
+define_pd_global(intx, CompileThreshold,             10000);
+define_pd_global(intx, BackEdgeThreshold,            100000);
+
+define_pd_global(intx, OnStackReplacePercentage,     140);
+define_pd_global(intx, ConditionalMoveLimit,         3);
+define_pd_global(intx, FLOATPRESSURE,                6);
+define_pd_global(intx, FreqInlineSize,               325);
+define_pd_global(intx, MinJumpTableSize,             10);
+define_pd_global(intx, INTPRESSURE,                  13);
+define_pd_global(intx, InteriorEntryAlignment,       16);
+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
+define_pd_global(intx, LoopUnrollLimit,              60);
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(intx, CodeCacheExpansionSize,       64*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
+define_pd_global(intx, RegisterCostAreaRatio,        16000);
+
+// Peephole and CISC spilling both break the graph, and so makes the
+// scheduler sick.
+define_pd_global(bool, OptoPeephole,                 false);
+define_pd_global(bool, UseCISCSpill,                 false);
+define_pd_global(bool, OptoScheduling,               false);
+define_pd_global(bool, OptoBundling,                 false);
+
+define_pd_global(intx, ReservedCodeCacheSize,        48*M);
+define_pd_global(uintx, CodeCacheMinBlockLength,     4);
+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+
+define_pd_global(bool,  TrapBasedRangeChecks,        false);
+
+// Heap related flags
+define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
+
+// Ergonomics related flags
+define_pd_global(bool, NeverActAsServerClassMachine, false);
+
+#endif // CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp
new file mode 100644
index 00000000000..c7bf590b60d
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/c2_init_loongarch.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+
+// processor dependent initialization for LoongArch
+
+void Compile::pd_compiler2_init() {
+  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
+}
diff --git a/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp
new file mode 100644
index 00000000000..652f6c10926
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/codeBuffer_loongarch.hpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP
+
+private:
+  void pd_initialize() {}
+
+public:
+  void flush_bundle(bool start_new_bundle) {}
+
+#endif // CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp
new file mode 100644
index 00000000000..70a47fc7722
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/compiledIC_loongarch.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/compiledIC.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nmethod.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
+
+// Release the CompiledICHolder* associated with this call site is there is one.
+void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
+  // This call site might have become stale so inspect it carefully.
+  NativeCall* call = nativeCall_at(call_site->addr());
+  if (is_icholder_entry(call->destination())) {
+    NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
+    InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
+  }
+}
+
+bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
+  // This call site might have become stale so inspect it carefully.
+  NativeCall* call = nativeCall_at(call_site->addr());
+  return is_icholder_entry(call->destination());
+}
+
+// ----------------------------------------------------------------------------
+
+#define __ _masm.
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
+  address mark = cbuf.insts_mark(); // get mark within main instrs section
+
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a stub.
+  MacroAssembler _masm(&cbuf);
+
+  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
+  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
+
+  // static stub relocation stores the instruction address of the call
+  __ relocate(static_stub_Relocation::spec(mark), 0);
+
+  // Code stream for loading method may be changed.
+  __ ibar(0);
+
+  // Rmethod contains methodOop, it should be relocated for GC
+  // static stub relocation also tags the methodOop in the code-stream.
+  __ mov_metadata(Rmethod, NULL);
+  // This is recognized as unresolved by relocs/nativeInst/ic code
+
+  cbuf.set_insts_mark();
+  __ patchable_jump(__ pc());
+  // Update current stubs pointer and restore code_end.
+  __ end_a_stub();
+  return base;
+}
+#undef __
+
+int CompiledStaticCall::to_interp_stub_size() {
+  return NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeGeneralJump::instruction_size;
+}
+
+// Relocation entries for call stub, compiled java to interpreter.
+int CompiledStaticCall::reloc_to_interp_stub() {
+  return 16;
+}
+
+void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) {
+  address stub = find_stub();
+  guarantee(stub != NULL, "stub not found");
+
+  if (TraceICs) {
+    ResourceMark rm;
+    tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
+                  p2i(instruction_address()),
+                  callee->name_and_sig_as_C_string());
+  }
+
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
+#ifndef LOONGARCH64
+  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+#else
+  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
+#endif
+
+  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
+         "a) MT-unsafe modification of inline cache");
+  assert(jump->jump_destination() == jump->instruction_address() || jump->jump_destination() == entry,
+         "b) MT-unsafe modification of inline cache");
+
+  // Update stub.
+  method_holder->set_data((intptr_t)callee());
+  jump->set_jump_destination(entry);
+
+  // Update jump to call.
+  set_destination_mt_safe(stub);
+}
+
+void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
+  // Reset stub.
+  address stub = static_stub->addr();
+  assert(stub != NULL, "stub not found");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
+#ifndef LOONGARCH64
+  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
+#else
+  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
+#endif
+  method_holder->set_data(0);
+  jump->set_jump_destination(jump->instruction_address());
+}
+
+//-----------------------------------------------------------------------------
+// Non-product mode code
+#ifndef PRODUCT
+
+void CompiledStaticCall::verify() {
+  // Verify call.
+  NativeCall::verify();
+  if (os::is_MP()) {
+    verify_alignment();
+  }
+
+  // Verify stub.
+  address stub = find_stub();
+  assert(stub != NULL, "no stub found for static call");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
+#ifndef LOONGARCH64
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#else
+  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
+#endif
+
+
+  // Verify state.
+  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
+}
+
+#endif // !PRODUCT
diff --git a/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp
new file mode 100644
index 00000000000..cb655401395
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/copy_loongarch.hpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP
+
+// Inline functions for memory copy and fill.
+
+// Contains inline asm implementations
+#ifdef TARGET_OS_ARCH_linux_loongarch
+# include "copy_linux_loongarch.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_solaris_loongarch
+# include "copy_solaris_loongarch.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_windows_loongarch
+# include "copy_windows_loongarch.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_bsd_loongarch
+# include "copy_bsd_loongarch.inline.hpp"
+#endif
+// Inline functions for memory copy and fill.
+
+// Contains inline asm implementations
+
+// Template for atomic, element-wise copy.
+template <class T>
+static void copy_conjoint_atomic(const T* from, T* to, size_t count) {
+  if (from > to) {
+    while (count-- > 0) {
+      // Copy forwards
+      *to++ = *from++;
+    }
+  } else {
+    from += count - 1;
+    to   += count - 1;
+    while (count-- > 0) {
+      // Copy backwards
+      *to-- = *from--;
+    }
+  }
+}
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+  julong* to = (julong*) tohw;
+  julong  v  = ((julong) value << 32) | value;
+  while (count-- > 0) {
+    *to++ = v;
+  }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+  pd_fill_to_words(tohw, count, value);
+}
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+  (void)memset(to, value, count);
+}
+
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+  pd_fill_to_words(tohw, count, 0);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+  (void)memset(to, 0, count);
+}
+
+#endif //CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp
new file mode 100644
index 00000000000..45d86f5bfed
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/cppInterpreterGenerator_loongarch.hpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP
+
+ protected:
+
+#if 0
+  address generate_asm_interpreter_entry(bool synchronized);
+  address generate_native_entry(bool synchronized);
+  address generate_abstract_entry(void);
+  address generate_math_entry(AbstractInterpreter::MethodKind kind);
+  address generate_empty_entry(void);
+  address generate_accessor_entry(void);
+  void lock_method(void);
+  void generate_stack_overflow_check(void);
+
+  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
+  void generate_counter_overflow(Label* do_continue);
+#endif
+
+  void generate_more_monitors();
+  void generate_deopt_handling();
+  address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only
+  void generate_compute_interpreter_state(const Register state,
+                                          const Register prev_state,
+                                          const Register sender_sp,
+                                          bool native); // C++ interpreter only
+
+#endif // CPU_LOONGARCH_VM_CPPINTERPRETERGENERATOR_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp
new file mode 100644
index 00000000000..d6c0df3b77e
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/cppInterpreter_loongarch.cpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/cppInterpreter.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterGenerator.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#ifdef SHARK
+#include "shark/shark_globals.hpp"
+#endif
+
+#ifdef CC_INTERP
+
+// Routine exists to make tracebacks look decent in debugger
+// while "shadow" interpreter frames are on stack. It is also
+// used to distinguish interpreter frames.
+
+extern "C" void RecursiveInterpreterActivation(interpreterState istate) {
+  ShouldNotReachHere();
+}
+
+bool CppInterpreter::contains(address pc) {
+  Unimplemented();
+}
+
+#define STATE(field_name) Lstate, in_bytes(byte_offset_of(BytecodeInterpreter, field_name))
+#define __ _masm->
+
+Label frame_manager_entry;
+Label fast_accessor_slow_entry_path;  // fast accessor methods need to be able to jmp to unsynchronized
+                                      // c++ interpreter entry point this holds that entry point label.
+
+static address unctrap_frame_manager_entry  = NULL;
+
+static address interpreter_return_address  = NULL;
+static address deopt_frame_manager_return_atos  = NULL;
+static address deopt_frame_manager_return_btos  = NULL;
+static address deopt_frame_manager_return_itos  = NULL;
+static address deopt_frame_manager_return_ltos  = NULL;
+static address deopt_frame_manager_return_ftos  = NULL;
+static address deopt_frame_manager_return_dtos  = NULL;
+static address deopt_frame_manager_return_vtos  = NULL;
+
+const Register prevState = G1_scratch;
+
+void InterpreterGenerator::save_native_result(void) {
+  Unimplemented();
+}
+
+void InterpreterGenerator::restore_native_result(void) {
+  Unimplemented();
+}
+
+// A result handler converts/unboxes a native call result into
+// a java interpreter/compiler result. The current frame is an
+// interpreter frame. The activation frame unwind code must be
+// consistent with that of TemplateTable::_return(...). In the
+// case of native methods, the caller's SP was not modified.
+address CppInterpreterGenerator::generate_result_handler_for(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreter::return_entry(TosState state, int length) {
+  Unimplemented();
+}
+
+address CppInterpreter::deopt_entry(TosState state, int length) {
+  Unimplemented();
+}
+
+void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_empty_entry(void) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_accessor_entry(void) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_native_entry(bool synchronized) {
+  Unimplemented();
+}
+
+void CppInterpreterGenerator::generate_compute_interpreter_state(const Register state,
+                                                              const Register prev_state,
+                                                              bool native) {
+  Unimplemented();
+}
+
+void InterpreterGenerator::lock_method(void) {
+  Unimplemented();
+}
+
+void CppInterpreterGenerator::generate_deopt_handling() {
+  Unimplemented();
+}
+
+void CppInterpreterGenerator::generate_more_monitors() {
+  Unimplemented();
+}
+
+
+static address interpreter_frame_manager = NULL;
+
+void CppInterpreterGenerator::adjust_callers_stack(Register args) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_normal_entry(bool synchronized) {
+  Unimplemented();
+}
+
+InterpreterGenerator::InterpreterGenerator(StubQueue* code)
+ : CppInterpreterGenerator(code) {
+  Unimplemented();
+}
+
+
+static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) {
+  Unimplemented();
+}
+
+int AbstractInterpreter::size_top_interpreter_activation(methodOop method) {
+  Unimplemented();
+}
+
+void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill,
+                                           frame* caller,
+                                           frame* current,
+                                           methodOop method,
+                                           intptr_t* locals,
+                                           intptr_t* stack,
+                                           intptr_t* stack_base,
+                                           intptr_t* monitor_base,
+                                           intptr_t* frame_bottom,
+                                           bool is_top_frame
+                                           )
+{
+  Unimplemented();
+}
+
+void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp) {
+  Unimplemented();
+}
+
+
+int AbstractInterpreter::layout_activation(methodOop method,
+                                           int tempcount, // Number of slots on java expression stack in use
+                                           int popframe_extra_args,
+                                           int moncount,  // Number of active monitors
+                                           int callee_param_size,
+                                           int callee_locals_size,
+                                           frame* caller,
+                                           frame* interpreter_frame,
+                                           bool is_top_frame) {
+  Unimplemented();
+}
+
+#endif // CC_INTERP
diff --git a/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp
new file mode 100644
index 00000000000..50de03653b1
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/debug_loongarch.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/codeCache.hpp"
+#include "code/nmethod.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/init.hpp"
+#include "runtime/os.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/top.hpp"
+
+#ifndef PRODUCT
+
+void pd_ps(frame f) {
+  intptr_t* sp = f.sp();
+  intptr_t* prev_sp = sp - 1;
+  intptr_t *pc = NULL;
+  intptr_t *next_pc = NULL;
+  int count = 0;
+  tty->print("register window backtrace from %#lx:\n", p2i(sp));
+}
+
+// This function is used to add platform specific info
+// to the error reporting code.
+
+void pd_obfuscate_location(char *buf,int buflen) {}
+
+#endif // PRODUCT
diff --git a/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp
new file mode 100644
index 00000000000..62478be3dc8
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "compiler/disassembler.hpp"
+#include "depChecker_loongarch.hpp"
+
+// Nothing to do on LoongArch
diff --git a/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp
new file mode 100644
index 00000000000..598be0ee6f4
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/depChecker_loongarch.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP
+
+// Nothing to do on LoongArch
+
+#endif // CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp
new file mode 100644
index 00000000000..ccd89e8d6d2
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/disassembler_loongarch.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP
+
+  static int pd_instruction_alignment() {
+    return sizeof(int);
+  }
+
+  static const char* pd_cpu_opts() {
+    return "gpr-names=64";
+  }
+
+#endif // CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp
new file mode 100644
index 00000000000..0f50a5715de
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.cpp
@@ -0,0 +1,711 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/markOop.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/monitorChunk.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_loongarch.inline.hpp"
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif
+
+
+// Profiling/safepoint support
+// for Profiling - acting on another frame. walks sender frames
+// if valid.
+// frame profile_find_Java_sender_frame(JavaThread *thread);
+
+bool frame::safe_for_sender(JavaThread *thread) {
+  address   sp = (address)_sp;
+  address   fp = (address)_fp;
+  address   unextended_sp = (address)_unextended_sp;
+
+  // consider stack guards when trying to determine "safe" stack pointers
+  static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0;
+  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
+
+  // sp must be within the usable part of the stack (not in guards)
+  bool sp_safe = (sp < thread->stack_base()) &&
+                 (sp >= thread->stack_base() - usable_stack_size);
+
+
+  if (!sp_safe) {
+    return false;
+  }
+
+  // unextended sp must be within the stack and above or equal sp
+  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
+                            (unextended_sp >= sp);
+
+  if (!unextended_sp_safe) {
+    return false;
+  }
+
+  // an fp must be within the stack and above (but not equal) sp
+  // second evaluation on fp+ is added to handle situation where fp is -1
+  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
+
+  // We know sp/unextended_sp are safe only fp is questionable here
+
+  // If the current frame is known to the code cache then we can attempt to
+  // construct the sender and do some validation of it. This goes a long way
+  // toward eliminating issues when we get in frame construction code
+
+  if (_cb != NULL ) {
+
+    // First check if frame is complete and tester is reliable
+    // Unfortunately we can only check frame complete for runtime stubs and nmethod
+    // other generic buffer blobs are more problematic so we just assume they are
+    // ok. adapter blobs never have a frame complete and are never ok.
+
+    if (!_cb->is_frame_complete_at(_pc)) {
+      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
+        return false;
+      }
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!_cb->code_contains(_pc)) {
+      return false;
+    }
+
+    // Entry frame checks
+    if (is_entry_frame()) {
+      // an entry frame must have a valid fp.
+      return fp_safe && is_entry_frame_valid(thread);
+    }
+
+    intptr_t* sender_sp = NULL;
+    intptr_t* sender_unextended_sp = NULL;
+    address   sender_pc = NULL;
+    intptr_t* saved_fp =  NULL;
+
+    if (is_interpreted_frame()) {
+      // fp must be safe
+      if (!fp_safe) {
+        return false;
+      }
+
+      sender_pc = (address) this->fp()[return_addr_offset];
+      // for interpreted frames, the value below is the sender "raw" sp,
+      // which can be different from the sender unextended sp (the sp seen
+      // by the sender) because of current frame local variables
+      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
+      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
+      saved_fp = (intptr_t*) this->fp()[link_offset];
+
+    } else {
+      // must be some sort of compiled/runtime frame
+      // fp does not have to be safe (although it could be check for c1?)
+
+      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
+      if (_cb->frame_size() <= 0) {
+        return false;
+      }
+
+      sender_sp = _unextended_sp + _cb->frame_size();
+      sender_unextended_sp = sender_sp;
+      // On LA the return_address is always the word on the stack
+      sender_pc = (address) *(sender_sp-1);
+      // Note: frame::sender_sp_offset is only valid for compiled frame
+      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
+    }
+
+
+    // If the potential sender is the interpreter then we can do some more checking
+    if (Interpreter::contains(sender_pc)) {
+
+      // FP is always saved in a recognizable place in any code we generate. However
+      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
+      // is really a frame pointer.
+
+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+
+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
+
+      return sender.is_interpreted_frame_valid(thread);
+
+    }
+
+    // We must always be able to find a recognizable pc
+    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
+    if (sender_pc == NULL ||  sender_blob == NULL) {
+      return false;
+    }
+
+    // Could be a zombie method
+    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
+      return false;
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!sender_blob->code_contains(sender_pc)) {
+      return false;
+    }
+
+    // We should never be able to see an adapter if the current frame is something from code cache
+    if (sender_blob->is_adapter_blob()) {
+      return false;
+    }
+
+    // Could be the call_stub
+    if (StubRoutines::returns_to_call_stub(sender_pc)) {
+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+
+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
+
+      // Validate the JavaCallWrapper an entry frame must have
+      address jcw = (address)sender.entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp());
+
+      return jcw_safe;
+    }
+
+    if (sender_blob->is_nmethod()) {
+        nmethod* nm = sender_blob->as_nmethod_or_null();
+        if (nm != NULL) {
+            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
+                return false;
+            }
+        }
+    }
+
+    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
+    // because the return address counts against the callee's frame.
+
+    if (sender_blob->frame_size() <= 0) {
+      assert(!sender_blob->is_nmethod(), "should count return address at least");
+      return false;
+    }
+
+    // We should never be able to see anything here except an nmethod. If something in the
+    // code cache (current frame) is called by an entity within the code cache that entity
+    // should not be anything but the call stub (already covered), the interpreter (already covered)
+    // or an nmethod.
+
+    if (!sender_blob->is_nmethod()) {
+        return false;
+    }
+
+    // Could put some more validation for the potential non-interpreted sender
+    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
+
+    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
+
+    // We've validated the potential sender that would be created
+    return true;
+  }
+  // Note: fp == NULL is not really a prerequisite for this to be safe to
+  // walk for c2. However we've modified the code such that if we get
+  // a failure with fp != NULL that we then try with FP == NULL.
+  // This is basically to mimic what a last_frame would look like if
+  // c2 had generated it.
+
+  // Must be native-compiled frame. Since sender will try and use fp to find
+  // linkages it must be safe
+
+  if (!fp_safe) {
+    return false;
+  }
+
+  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
+
+  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
+
+
+  // could try and do some more potential verification of native frame if we could think of some...
+
+  return true;
+
+}
+
+void frame::patch_pc(Thread* thread, address pc) {
+  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
+  address* pc_addr = &(((address*) sp())[-1]);
+  if (TracePcPatching) {
+    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
+                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
+  }
+
+  // Either the return address is the original one or we are going to
+  // patch in the same address that's already there.
+  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
+  *pc_addr = pc;
+  _cb = CodeCache::find_blob(pc);
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    assert(original_pc == _pc, "expected original PC to be stored before patching");
+    _deopt_state = is_deoptimized;
+    // leave _pc as is
+  } else {
+    _deopt_state = not_deoptimized;
+    _pc = pc;
+  }
+}
+
+bool frame::is_interpreted_frame() const  {
+  return Interpreter::contains(pc());
+}
+
+int frame::frame_size(RegisterMap* map) const {
+  frame sender = this->sender(map);
+  return sender.sp() - sp();
+}
+
+intptr_t* frame::entry_frame_argument_at(int offset) const {
+  // convert offset to index to deal with tsi
+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+  // Entry frame's arguments are always in relation to unextended_sp()
+  return &unextended_sp()[index];
+}
+
+// sender_sp
+#ifdef CC_INTERP
+intptr_t* frame::interpreter_frame_sender_sp() const {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
+  // seems odd and if we always know interpreted vs. non then sender_sp() is really
+  // doing too much work.
+  return get_interpreterState()->sender_sp();
+}
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+  return get_interpreterState()->monitor_base();
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+  return (BasicObjectLock*) get_interpreterState()->stack_base();
+}
+
+#else // CC_INTERP
+
+intptr_t* frame::interpreter_frame_sender_sp() const {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
+}
+
+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
+}
+
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
+  // make sure the pointer points inside the frame
+  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
+  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
+  return result;
+}
+
+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
+  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
+}
+
+// Used by template based interpreter deoptimization
+void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
+  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
+}
+#endif // CC_INTERP
+
+frame frame::sender_for_entry_frame(RegisterMap* map) const {
+  assert(map != NULL, "map must be set");
+  // Java frame called from C; skip all C frames and return top C
+  // frame of that chunk as the sender
+  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+  assert(!entry_frame_is_first(), "next Java fp must be non zero");
+  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
+  map->clear();
+  assert(map->include_argument_oops(), "should be set by clear");
+  if (jfa->last_Java_pc() != NULL ) {
+    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
+    return fr;
+  }
+  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
+  return fr;
+}
+
+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
+  // sp is the raw sp from the sender after adapter or interpreter extension
+  intptr_t* sender_sp = this->sender_sp();
+
+  // This is the sp before any possible extension (adapter/locals).
+  intptr_t* unextended_sp = interpreter_frame_sender_sp();
+
+  // The interpreter and compiler(s) always save FP in a known
+  // location on entry. We must record where that location is
+  // so this if FP was live on callout from c2 we can find
+  // the saved copy no matter what it called.
+
+  // Since the interpreter always saves FP if we record where it is then
+  // we don't have to always save FP on entry and exit to c2 compiled
+  // code, on entry will be enough.
+#ifdef COMPILER2
+  if (map->update_map()) {
+    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
+  }
+#endif /* COMPILER2 */
+  return frame(sender_sp, unextended_sp, link(), sender_pc());
+}
+
+
+//------------------------------------------------------------------------------
+// frame::verify_deopt_original_pc
+//
+// Verifies the calculated original PC of a deoptimization PC for the
+// given unextended SP.  The unextended SP might also be the saved SP
+// for MethodHandle call sites.
+#ifdef ASSERT
+void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+  frame fr;
+
+  // This is ugly but it's better than to change {get,set}_original_pc
+  // to take an SP value as argument.  And it's only a debugging
+  // method anyway.
+  fr._unextended_sp = unextended_sp;
+
+  address original_pc = nm->get_original_pc(&fr);
+  assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
+  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
+}
+#endif
+
+
+//------------------------------------------------------------------------------
+// frame::adjust_unextended_sp
+void frame::adjust_unextended_sp() {
+  // On LoongArch, sites calling method handle intrinsics and lambda forms are treated
+  // as any other call site. Therefore, no special action is needed when we are
+  // returning to any of these call sites.
+
+  nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
+  if (sender_nm != NULL) {
+    // If the sender PC is a deoptimization point, get the original PC.
+    if (sender_nm->is_deopt_entry(_pc) ||
+        sender_nm->is_deopt_mh_entry(_pc)) {
+      DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// frame::update_map_with_saved_link
+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
+  // The interpreter and compiler(s) always save fp in a known
+  // location on entry. We must record where that location is
+  // so that if fp was live on callout from c2 we can find
+  // the saved copy no matter what it called.
+
+  // Since the interpreter always saves fp if we record where it is then
+  // we don't have to always save fp on entry and exit to c2 compiled
+  // code, on entry will be enough.
+  map->set_location(FP->as_VMReg(), (address) link_addr);
+  // this is weird "H" ought to be at a higher address however the
+  // oopMaps seems to have the "H" regs at the same address and the
+  // vanilla register.
+  // XXXX make this go away
+  if (true) {
+    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
+  }
+}
+
+//------------------------------sender_for_compiled_frame-----------------------
+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
+  assert(map != NULL, "map must be set");
+
+  // frame owned by optimizing compiler
+  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
+
+  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
+  intptr_t* unextended_sp = sender_sp;
+
+#ifdef ASSERT
+  const bool c1_compiled = _cb->is_compiled_by_c1();
+  bool native = _cb->is_nmethod() && ((nmethod*)_cb)->is_native_method();
+  if (c1_compiled && native) {
+    assert(sender_sp == fp() + frame::sender_sp_offset, "incorrect frame size");
+  }
+#endif // ASSERT
+  // On Intel the return_address is always the word on the stack
+  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
+  // so getting sender for compiled frame is not same as interpreter frame.
+  // we hard code here temporarily
+  // spark
+  address sender_pc = (address) *(sender_sp-1);
+
+  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
+
+  if (map->update_map()) {
+    // Tell GC to use argument oopmaps for some runtime stubs that need it.
+    // For C1, the runtime stub might not have oop maps, so set this flag
+    // outside of update_register_map.
+    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+    if (_cb->oop_maps() != NULL) {
+      OopMapSet::update_register_map(this, map);
+    }
+
+    // Since the prolog does the save and restore of epb there is no oopmap
+    // for it so we must fill in its location as if there was an oopmap entry
+    // since if our caller was compiled code there could be live jvm state in it.
+    update_map_with_saved_link(map, saved_fp_addr);
+  }
+  assert(sender_sp != sp(), "must have changed");
+  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
+}
+
+frame frame::sender(RegisterMap* map) const {
+  // Default is we done have to follow them. The sender_for_xxx will
+  // update it accordingly
+  map->set_include_argument_oops(false);
+
+  if (is_entry_frame())       return sender_for_entry_frame(map);
+  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
+  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+
+  if (_cb != NULL) {
+    return sender_for_compiled_frame(map);
+  }
+  // Must be native-compiled frame, i.e. the marshaling code for native
+  // methods that exists in the core system.
+  return frame(sender_sp(), link(), sender_pc());
+}
+
+
+bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
+  assert(is_interpreted_frame(), "must be interpreter frame");
+  Method* method = interpreter_frame_method();
+  // When unpacking an optimized frame the frame pointer is
+  // adjusted with:
+  int diff = (method->max_locals() - method->size_of_parameters()) *
+    Interpreter::stackElementWords;
+  printf("^^^^^^^^^^^^^^^adjust fp in deopt fp = 0%lx \n", (intptr_t)(fp - diff));
+  return _fp == (fp - diff);
+}
+
+void frame::pd_gc_epilog() {
+  // nothing done here now
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+// QQQ
+#ifdef CC_INTERP
+#else
+  assert(is_interpreted_frame(), "Not an interpreted frame");
+  // These are reasonable sanity checks
+  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
+    return false;
+  }
+  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
+    return false;
+  }
+  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
+    return false;
+  }
+  // These are hacks to keep us out of trouble.
+  // The problem with these is that they mask other problems
+  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
+    return false;
+  }
+
+  // do some validation of frame elements
+
+  // first the method
+
+  Method* m = *interpreter_frame_method_addr();
+
+  // validate the method we'd find in this potential sender
+  if (!m->is_valid_method()) return false;
+
+  // stack frames shouldn't be much larger than max_stack elements
+
+  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
+  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
+    return false;
+  }
+
+  // validate bci/bcx
+
+  intptr_t  bcx    = interpreter_frame_bcx();
+  if (m->validate_bci_from_bcx(bcx) < 0) {
+    return false;
+  }
+
+  // validate ConstantPoolCache*
+
+  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
+
+  if (cp == NULL || !cp->is_metaspace_object()) return false;
+
+  // validate locals
+
+  address locals =  (address) *interpreter_frame_locals_addr();
+
+  if (locals > thread->stack_base() || locals < (address) fp()) return false;
+
+  // We'd have to be pretty unlucky to be mislead at this point
+
+#endif // CC_INTERP
+  return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+#ifdef CC_INTERP
+  // Needed for JVMTI. The result should always be in the interpreterState object
+  assert(false, "NYI");
+  interpreterState istate = get_interpreterState();
+#endif // CC_INTERP
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  Method* method = interpreter_frame_method();
+  BasicType type = method->result_type();
+
+  intptr_t* tos_addr;
+  if (method->is_native()) {
+    // Prior to calling into the runtime to report the method_exit the possible
+    // return value is pushed to the native stack. If the result is a jfloat/jdouble
+    // then ST0 is saved. See the note in generate_native_result
+    tos_addr = (intptr_t*)sp();
+    if (type == T_FLOAT || type == T_DOUBLE) {
+      tos_addr += 2;
+    }
+  } else {
+    tos_addr = (intptr_t*)interpreter_frame_tos_address();
+  }
+
+  switch (type) {
+    case T_OBJECT  :
+    case T_ARRAY   : {
+      oop obj;
+      if (method->is_native()) {
+#ifdef CC_INTERP
+        obj = istate->_oop_temp;
+#else
+        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
+#endif // CC_INTERP
+      } else {
+        oop* obj_p = (oop*)tos_addr;
+        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
+      }
+      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+      *oop_result = obj;
+      break;
+    }
+    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
+    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
+    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
+    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
+    case T_INT     : value_result->i = *(jint*)tos_addr; break;
+    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
+    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
+    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
+    case T_VOID    : /* Nothing to do */ break;
+    default        : ShouldNotReachHere();
+  }
+
+  return type;
+}
+
+
+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+  return &interpreter_frame_tos_address()[index];
+}
+
+#ifndef PRODUCT
+
+#define DESCRIBE_FP_OFFSET(name) \
+  values.describe(frame_no, fp() + frame::name##_offset, #name)
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+  if (is_interpreted_frame()) {
+    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_method);
+    DESCRIBE_FP_OFFSET(interpreter_frame_mdx);
+    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
+    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
+    DESCRIBE_FP_OFFSET(interpreter_frame_bcx);
+    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+  }
+}
+#endif
+
+intptr_t *frame::initial_deoptimization_info() {
+  // used to reset the saved FP
+  return fp();
+}
+
+intptr_t* frame::real_fp() const {
+  if (_cb != NULL) {
+    // use the frame size if valid
+    int size = _cb->frame_size();
+    if (size > 0) {
+      return unextended_sp() + size;
+    }
+  }
+  // else rely on fp()
+  assert(! is_compiled_frame(), "unknown compiled frame size");
+  return fp();
+}
+
+#ifndef PRODUCT
+// This is a generic constructor which is only used by pns() in debug.cpp.
+frame::frame(void* sp, void* fp, void* pc) {
+  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
+}
+#endif
diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp
new file mode 100644
index 00000000000..964026e6219
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.hpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP
+
+#include "runtime/synchronizer.hpp"
+#include "utilities/top.hpp"
+
+// A frame represents a physical stack frame (an activation).  Frames can be
+// C or Java frames, and the Java frames can be interpreted or compiled.
+// In contrast, vframes represent source-level activations, so that one physical frame
+// can correspond to multiple source level frames because of inlining.
+// A frame is comprised of {pc, fp, sp}
+// ------------------------------ Asm interpreter ----------------------------------------
+// Layout of asm interpreter frame:
+//    [expression stack      ] * <- sp
+//    [monitors              ]   \
+//     ...                        | monitor block size
+//    [monitors              ]   /
+//    [monitor block size    ]
+//    [byte code index/pointr]                   = bcx()                bcx_offset
+//    [pointer to locals     ]                   = locals()             locals_offset
+//    [constant pool cache   ]                   = cache()              cache_offset
+//    [methodData            ]                   = mdp()                mdx_offset
+//    [methodOop             ]                   = method()             method_offset
+//    [last sp               ]                   = last_sp()            last_sp_offset
+//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
+//    [old frame pointer     ]   <- fp           = link()
+//    [return pc             ]
+//    [oop temp              ]                     (only for native calls)
+//    [locals and parameters ]
+//                               <- sender sp
+// ------------------------------ Asm interpreter ----------------------------------------
+
+// ------------------------------ C++ interpreter ----------------------------------------
+//
+// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
+//
+//                             <- SP (current sp)
+//    [local variables         ] BytecodeInterpreter::run local variables
+//    ...                        BytecodeInterpreter::run local variables
+//    [local variables         ] BytecodeInterpreter::run local variables
+//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's fp ]
+//    [return pc               ]  (return to frame manager)
+//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
+//    [expression stack        ] <- last_Java_sp                           |
+//    [...                     ] * <- interpreter_state.stack              |
+//    [expression stack        ] * <- interpreter_state.stack_base         |
+//    [monitors                ]   \                                       |
+//     ...                          | monitor block size                   |
+//    [monitors                ]   / <- interpreter_state.monitor_base     |
+//    [struct interpretState   ] <-----------------------------------------|
+//    [return pc               ] (return to callee of frame manager [1]
+//    [locals and parameters   ]
+//                               <- sender sp
+
+// [1] When the c++ interpreter calls a new method it returns to the frame
+//     manager which allocates a new frame on the stack. In that case there
+//     is no real callee of this newly allocated frame. The frame manager is
+//     aware of the  additional frame(s) and will pop them as nested calls
+//     complete. Howevers tTo make it look good in the debugger the frame
+//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
+//     with a fake interpreter_state* parameter to make it easy to debug
+//     nested calls.
+
+// Note that contrary to the layout for the assembly interpreter the
+// expression stack allocated for the C++ interpreter is full sized.
+// However this is not as bad as it seems as the interpreter frame_manager
+// will truncate the unused space on succesive method calls.
+//
+// ------------------------------ C++ interpreter ----------------------------------------
+
+// Layout of interpreter frame:
+//
+//    [ monitor entry            ] <--- sp
+//      ...
+//    [ monitor entry            ]
+// -9 [ monitor block top        ] ( the top monitor entry )
+// -8 [ byte code pointer        ] (if native, bcp = 0)
+// -7 [ constant pool cache      ]
+// -6 [ methodData               ] mdx_offset(not core only)
+// -5 [ mirror                   ]
+// -4 [ methodOop                ]
+// -3 [ locals offset            ]
+// -2 [ last_sp                  ]
+// -1 [ sender's sp              ]
+//  0 [ sender's fp              ] <--- fp
+//  1 [ return address           ]
+//  2 [ oop temp offset          ] (only for native calls)
+//  3 [ result handler offset    ] (only for native calls)
+//  4 [ result type info         ] (only for native calls)
+//    [ local var m-1            ]
+//      ...
+//    [ local var 0              ]
+//    [ argumnet word n-1        ] <--- ( sender's sp )
+//        ...
+//    [ argument word 0          ] <--- S7
+
+ public:
+  enum {
+    pc_return_offset                                 =  0,
+    // All frames
+    link_offset                                      =  0,
+    return_addr_offset                               =  1,
+    // non-interpreter frames
+    sender_sp_offset                                 =  2,
+
+#ifndef CC_INTERP
+
+    // Interpreter frames
+    interpreter_frame_return_addr_offset             =  1,
+    interpreter_frame_result_handler_offset          =  3, // for native calls only
+    interpreter_frame_oop_temp_offset                =  2, // for native calls only
+
+    interpreter_frame_sender_fp_offset               =  0,
+    interpreter_frame_sender_sp_offset               = -1,
+    // outgoing sp before a call to an invoked method
+    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
+    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
+    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
+    interpreter_frame_mdx_offset                     = interpreter_frame_method_offset - 1,
+    interpreter_frame_cache_offset                   = interpreter_frame_mdx_offset - 1,
+    interpreter_frame_bcx_offset                     = interpreter_frame_cache_offset - 1,
+    interpreter_frame_initial_sp_offset              = interpreter_frame_bcx_offset - 1,
+
+    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
+    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
+
+#endif // CC_INTERP
+
+    // Entry frames
+    entry_frame_call_wrapper_offset                  =  -9,
+
+    // Native frames
+
+    native_frame_initial_param_offset                =  2
+
+  };
+
+  intptr_t ptr_at(int offset) const {
+    return *ptr_at_addr(offset);
+  }
+
+  void ptr_at_put(int offset, intptr_t value) {
+    *ptr_at_addr(offset) = value;
+  }
+
+ private:
+  // an additional field beyond _sp and _pc:
+  intptr_t*   _fp; // frame pointer
+  // The interpreter and adapters will extend the frame of the caller.
+  // Since oopMaps are based on the sp of the caller before extension
+  // we need to know that value. However in order to compute the address
+  // of the return address we need the real "raw" sp. Since sparc already
+  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
+  // original sp we use that convention.
+
+  intptr_t*     _unextended_sp;
+  void adjust_unextended_sp();
+
+  intptr_t* ptr_at_addr(int offset) const {
+    return (intptr_t*) addr_at(offset);
+  }
+#ifdef ASSERT
+  // Used in frame::sender_for_{interpreter,compiled}_frame
+  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
+  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
+    verify_deopt_original_pc(nm, unextended_sp, true);
+  }
+#endif
+
+ public:
+  // Constructors
+
+  frame(intptr_t* sp, intptr_t* fp, address pc);
+
+  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
+
+  frame(intptr_t* sp, intptr_t* fp);
+
+  void init(intptr_t* sp, intptr_t* fp, address pc);
+
+  // accessors for the instance variables
+  intptr_t*   fp() const { return _fp; }
+
+  inline address* sender_pc_addr() const;
+
+  // return address of param, zero origin index.
+  inline address* native_param_addr(int idx) const;
+
+  // expression stack tos if we are nested in a java call
+  intptr_t* interpreter_frame_last_sp() const;
+
+  // helper to update a map with callee-saved FP
+  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
+
+#ifndef CC_INTERP
+  // deoptimization support
+  void interpreter_frame_set_last_sp(intptr_t* sp);
+#endif // CC_INTERP
+
+#ifdef CC_INTERP
+  inline interpreterState get_interpreterState() const;
+#endif // CC_INTERP
+
+#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp
new file mode 100644
index 00000000000..3d22339ad7d
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/frame_loongarch.inline.hpp
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP
+#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP
+
+#include "code/codeCache.hpp"
+
+// Inline functions for Loongson frames:
+
+// Constructors:
+
+inline frame::frame() {
+  _pc = NULL;
+  _sp = NULL;
+  _unextended_sp = NULL;
+  _fp = NULL;
+  _cb = NULL;
+  _deopt_state = unknown;
+}
+
+inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
+  _sp = sp;
+  _unextended_sp = sp;
+  _fp = fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
+  init(sp, fp, pc);
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
+  _sp = sp;
+  _unextended_sp = unextended_sp;
+  _fp = fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* fp) {
+  _sp = sp;
+  _unextended_sp = sp;
+  _fp = fp;
+  _pc = (address)(sp[-1]);
+
+  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
+  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
+  // unlucky the junk value could be to a zombied method and we'll die on the
+  // find_blob call. This is also why we can have no asserts on the validity
+  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
+  // -> pd_last_frame should use a specialized version of pd_last_frame which could
+  // call a specilaized frame constructor instead of this one.
+  // Then we could use the assert below. However this assert is of somewhat dubious
+  // value.
+  // assert(_pc != NULL, "no pc?");
+
+  _cb = CodeCache::find_blob(_pc);
+  adjust_unextended_sp();
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+// Accessors
+
+inline bool frame::equal(frame other) const {
+  bool ret =  sp() == other.sp()
+              && unextended_sp() == other.unextended_sp()
+              && fp() == other.fp()
+              && pc() == other.pc();
+  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
+  return ret;
+}
+
+// Return unique id for this frame. The id must have a value where we can distinguish
+// identity and younger/older relationship. NULL represents an invalid (incomparable)
+// frame.
+inline intptr_t* frame::id(void) const { return unextended_sp(); }
+
+// Relationals on frames based
+// Return true if the frame is younger (more recent activation) than the frame represented by id
+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() < id ; }
+
+// Return true if the frame is older (less recent activation) than the frame represented by id
+inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() > id ; }
+
+
+
+inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
+inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
+
+
+inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
+
+// Return address:
+
+inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
+inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
+
+// return address of param, zero origin index.
+inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); }
+
+#ifdef CC_INTERP
+
+inline interpreterState frame::get_interpreterState() const {
+  return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize ));
+}
+
+inline intptr_t*    frame::sender_sp()        const {
+  // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames?
+  if (is_interpreted_frame()) {
+    assert(false, "should never happen");
+    return get_interpreterState()->sender_sp();
+  } else {
+    return            addr_at(sender_sp_offset);
+  }
+}
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_locals);
+}
+
+inline intptr_t* frame::interpreter_frame_bcx_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return (intptr_t*) &(get_interpreterState()->_bcp);
+}
+
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_constants);
+}
+
+// Method
+
+inline Method** frame::interpreter_frame_method_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_method);
+}
+
+inline intptr_t* frame::interpreter_frame_mdx_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return (intptr_t*) &(get_interpreterState()->_mdx);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  assert(is_interpreted_frame(), "wrong frame type");
+  return get_interpreterState()->_stack + 1;
+}
+
+#else // asm interpreter
+inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_last_sp() const {
+  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_bcx_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_bcx_offset);
+}
+
+
+inline intptr_t* frame::interpreter_frame_mdx_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_mdx_offset);
+}
+
+
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
+}
+
+// Method
+
+inline Method** frame::interpreter_frame_method_addr() const {
+  return (Method**)addr_at(interpreter_frame_method_offset);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  intptr_t* last_sp = interpreter_frame_last_sp();
+  if (last_sp == NULL ) {
+    return sp();
+  } else {
+    // sp() may have been extended by an adapter
+    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
+    return last_sp;
+  }
+}
+
+inline oop* frame::interpreter_frame_temp_oop_addr() const {
+  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
+}
+
+#endif // CC_INTERP
+
+inline int frame::pd_oop_map_offset_adjustment() const {
+  return 0;
+}
+
+inline int frame::interpreter_frame_monitor_size() {
+  return BasicObjectLock::size();
+}
+
+
+// expression stack
+// (the max_stack arguments are used by the GC; see class FrameClosure)
+
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
+  return monitor_end-1;
+}
+
+
+inline jint frame::interpreter_frame_expression_stack_direction() { return -1; }
+
+
+// Entry frames
+
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
+}
+
+// Compiled frames
+
+inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return (nof_args - local_index + (local_index < nof_args ? 1: -1));
+}
+
+inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors);
+}
+
+inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1);
+}
+
+inline bool frame::volatile_across_calls(Register reg) {
+  return true;
+}
+
+
+
+inline oop frame::saved_oop_result(RegisterMap* map) const       {
+  return *((oop*) map->location(V0->as_VMReg()));
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+  *((oop*) map->location(V0->as_VMReg())) = obj;
+}
+
+#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp
new file mode 100644
index 00000000000..f9f93b9e657
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/globalDefinitions_loongarch.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP
+// Size of LoongArch Instructions
+const int BytesPerInstWord = 4;
+
+const int StackAlignmentInBytes = (2*wordSize);
+
+// Indicates whether the C calling conventions require that
+// 32-bit integer argument values are properly extended to 64 bits.
+// If set, SharedRuntime::c_calling_convention() must adapt
+// signatures accordingly.
+const bool CCallingConventionRequiresIntsAsLongs = false;
+
+#define SUPPORTS_NATIVE_CX8
+
+#endif // CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp
new file mode 100644
index 00000000000..182be608a30
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/globals_loongarch.hpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+#ifdef CORE
+define_pd_global(bool,  UseSSE,      0);
+#endif /* CORE */
+define_pd_global(bool,  ConvertSleepToYield,      true);
+define_pd_global(bool,  ShareVtableStubs,         true);
+define_pd_global(bool,  CountInterpCalls,         true);
+
+define_pd_global(bool, ImplicitNullChecks,          true);  // Generate code for implicit null checks
+define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
+define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
+define_pd_global(bool, NeedsDeoptSuspend,           false); // only register window machines need this
+
+define_pd_global(intx, CodeEntryAlignment,       16);
+define_pd_global(intx, OptoLoopAlignment,        16);
+define_pd_global(intx, InlineFrequencyCount,     100);
+define_pd_global(intx, InlineSmallCode,          2000);
+
+define_pd_global(uintx, TLABSize,                 0);
+define_pd_global(uintx, NewSize,                  1024 * K);
+define_pd_global(intx,  PreInflateSpin,      10);
+
+define_pd_global(intx, PrefetchFieldsAhead,         -1);
+
+define_pd_global(intx, StackYellowPages, 2);
+define_pd_global(intx, StackRedPages, 1);
+define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1));
+
+define_pd_global(bool, RewriteBytecodes,     true);
+define_pd_global(bool, RewriteFrequentPairs, true);
+define_pd_global(bool, UseMembar,            true);
+// GC Ergo Flags
+define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
+
+define_pd_global(uintx, TypeProfileLevel, 111);
+
+define_pd_global(bool, PreserveFramePointer, false);
+// Only c2 cares about this at the moment
+define_pd_global(intx, AllocatePrefetchStyle,        2);
+define_pd_global(intx, AllocatePrefetchDistance,     -1);
+
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+                                                                            \
+  product(bool, UseCodeCacheAllocOpt, true,                                 \
+                "Allocate code cache within 32-bit memory address space")   \
+                                                                            \
+  product(bool, UseLSX, false,                                              \
+                "Use LSX 128-bit vector instructions")                      \
+                                                                            \
+  product(bool, UseLASX, false,                                             \
+                "Use LASX 256-bit vector instructions")                     \
+                                                                            \
+  product(intx, UseSyncLevel, 10000,                                        \
+                "The sync level on Loongson CPUs"                           \
+                "UseSyncLevel == 10000, 111, for all Loongson CPUs, "       \
+                "UseSyncLevel == 4000, 101, maybe for GS464V"               \
+                "UseSyncLevel == 3000, 001, maybe for GS464V"               \
+                "UseSyncLevel == 2000, 011, maybe for GS464E/GS264"         \
+                "UseSyncLevel == 1000, 110, maybe for GS464")               \
+                                                                            \
+  product(bool, UseUnalignedAccesses, false,                                \
+          "Use unaligned memory accesses in Unsafe")                        \
+                                                                            \
+  product(bool, UseCRC32, false,                                            \
+          "Use CRC32 instructions for CRC32 computation")                   \
+                                                                            \
+  product(bool, UseActiveCoresMP, false,                                    \
+                "Eliminate barriers for single active cpu")
+
+#endif // CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp
new file mode 100644
index 00000000000..8c782253462
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/icBuffer_loongarch.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/icBuffer.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_loongarch.hpp"
+#include "oops/oop.inline.hpp"
+#include "oops/oop.inline2.hpp"
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+int InlineCacheBuffer::ic_stub_code_size() {
+  return NativeMovConstReg::instruction_size +
+         NativeGeneralJump::instruction_size +
+         1;
+  // so that code_end can be set in CodeBuffer
+  // 64bit 15 = 6 + 8 bytes + 1 byte
+  // 32bit 7 = 2 + 4 bytes + 1 byte
+}
+
+
+// we use T1 as cached oop(klass) now. this is the target of virtual call,
+// when reach here, the receiver in T0
+// refer to shareRuntime_loongarch.cpp,gen_i2c2i_adapters
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value,
+                                                address entry_point) {
+  ResourceMark rm;
+  CodeBuffer code(code_begin, ic_stub_code_size());
+  MacroAssembler* masm = new MacroAssembler(&code);
+  // note: even though the code contains an embedded oop, we do not need reloc info
+  // because
+  // (1) the oop is old (i.e., doesn't matter for scavenges)
+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
+  //  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
+#define __ masm->
+  __ patchable_li52(T1, (long)cached_value);
+  // TODO: confirm reloc
+  __ jmp(entry_point, relocInfo::runtime_call_type);
+  __ flush();
+#undef __
+}
+
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
+  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
+  return jump->jump_destination();
+}
+
+
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
+  // creation also verifies the object
+  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
+  // Verifies the jump
+  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
+  void* o= (void*)move->data();
+  return o;
+}
diff --git a/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp
new file mode 100644
index 00000000000..d577e41f59c
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/icache_loongarch.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "runtime/icache.hpp"
+
+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
+{
+#define __ _masm->
+  StubCodeMark mark(this, "ICache", "flush_icache_stub");
+  address start = __ pc();
+
+  __ ibar(0);
+  __ ori(V0, RA2, 0);
+  __ jr(RA);
+
+  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
+#undef __
+}
diff --git a/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp
new file mode 100644
index 00000000000..15e45cb3508
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/icache_loongarch.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP
+
+// Interface for updating the instruction cache.  Whenever the VM modifies
+// code, part of the processor instruction cache potentially has to be flushed.
+
+class ICache : public AbstractICache {
+ public:
+  enum {
+    stub_size      = 3 * BytesPerInstWord, // Size of the icache flush stub in bytes
+    line_size      = 32,                   // flush instruction affects a dword
+    log2_line_size = 5                     // log2(line_size)
+  };
+};
+
+#endif // CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp
new file mode 100644
index 00000000000..8c84f21511b
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.cpp
@@ -0,0 +1,1960 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interp_masm_loongarch_64.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiRedefineClassesTrace.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.inline.hpp"
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+// Implementation of InterpreterMacroAssembler
+
+#ifdef CC_INTERP
+void InterpreterMacroAssembler::get_method(Register reg) {
+}
+#endif // CC_INTERP
+
+void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
+  if (UseUnalignedAccesses) {
+    ld_hu(reg, BCP, offset);
+  } else {
+    ld_bu(reg, BCP, offset);
+    ld_bu(tmp, BCP, offset + 1);
+    bstrins_d(reg, tmp, 15, 8);
+  }
+}
+
+void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, int offset) {
+  if (UseUnalignedAccesses) {
+    ld_wu(reg, BCP, offset);
+  } else {
+    ldr_w(reg, BCP, offset);
+    ldl_w(reg, BCP, offset + 3);
+    lu32i_d(reg, 0);
+  }
+}
+
+#ifndef CC_INTERP
+
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
+                                                  int number_of_arguments) {
+  // interpreter specific
+  //
+  // Note: No need to save/restore bcp & locals pointer
+  //       since these are callee saved registers and no blocking/
+  //       GC can happen in leaf calls.
+  // Further Note: DO NOT save/restore bcp/locals. If a caller has
+  // already saved them so that it can use BCP/LVP as temporaries
+  // then a save/restore here will DESTROY the copy the caller
+  // saved! There used to be a save_bcp() that only happened in
+  // the ASSERT path (no restore_bcp). Which caused bizarre failures
+  // when jvm built with ASSERTs.
+#ifdef ASSERT
+  save_bcp();
+  {
+    Label L;
+    ld_d(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
+    beq(AT,R0,L);
+    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
+    bind(L);
+  }
+#endif
+  // super call
+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
+  // interpreter specific
+  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
+  // but since they may not have been saved (and we don't want to
+  // save them here (see note above) the assert is invalid.
+}
+
+void InterpreterMacroAssembler::call_VM_base(Register oop_result,
+                                             Register java_thread,
+                                             Register last_java_sp,
+                                             address  entry_point,
+                                             int      number_of_arguments,
+                                             bool     check_exceptions) {
+  // interpreter specific
+  //
+  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
+  //       really make a difference for these runtime calls, since they are
+  //       slow anyway. Btw., bcp must be saved/restored since it may change
+  //       due to GC.
+  assert(java_thread == noreg , "not expecting a precomputed java thread");
+  save_bcp();
+#ifdef ASSERT
+  {
+    Label L;
+    ld_d(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+    beq(AT, R0, L);
+    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
+    bind(L);
+  }
+#endif /* ASSERT */
+  // super call
+  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
+                               entry_point, number_of_arguments,
+                               check_exceptions);
+  // interpreter specific
+  restore_bcp();
+  restore_locals();
+}
+
+
+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
+  if (JvmtiExport::can_pop_frame()) {
+    Label L;
+    // Initiate popframe handling only if it is not already being
+    // processed.  If the flag has the popframe_processing bit set, it
+    // means that this code is called *during* popframe handling - we
+    // don't want to reenter.
+    // This method is only called just after the call into the vm in
+    // call_VM_base, so the arg registers are available.
+    // Not clear if any other register is available, so load AT twice
+    assert(AT != java_thread, "check");
+    ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
+    andi(AT, AT, JavaThread::popframe_pending_bit);
+    beq(AT, R0, L);
+
+    ld_w(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
+    andi(AT, AT, JavaThread::popframe_processing_bit);
+    bne(AT, R0, L);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
+    jr(V0);
+    bind(L);
+  }
+}
+
+
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
+  Register thread = T8;
+#ifndef OPT_THREAD
+  get_thread(thread);
+#else
+  move(T8, TREG);
+#endif
+  ld_ptr(thread, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
+  const Address tos_addr (thread, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
+  const Address oop_addr (thread, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
+  const Address val_addr (thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+  //V0, oop_addr,V1,val_addr
+  switch (state) {
+    case atos:
+      ld_ptr(V0, oop_addr);
+      st_ptr(R0, oop_addr);
+      verify_oop(V0, state);
+      break;
+    case ltos:
+      ld_ptr(V0, val_addr);               // fall through
+      break;
+    case btos:                                     // fall through
+    case ztos:                                     // fall through
+    case ctos:                                     // fall through
+    case stos:                                     // fall through
+    case itos:
+      ld_w(V0, val_addr);
+      break;
+    case ftos:
+      fld_s(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+      break;
+    case dtos:
+      fld_d(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+      break;
+    case vtos: /* nothing to do */                    break;
+    default  : ShouldNotReachHere();
+  }
+  // Clean up tos value in the thread object
+  li(AT, (int)ilgl);
+  st_w(AT, tos_addr);
+  st_w(R0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+}
+
+
+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
+  if (JvmtiExport::can_force_early_return()) {
+    Label L;
+    Register tmp = T4;
+
+    assert(java_thread != AT, "check");
+    assert(java_thread != tmp, "check");
+    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
+    beq(AT, R0, L);
+
+    // Initiate earlyret handling only if it is not already being processed.
+    // If the flag has the earlyret_processing bit set, it means that this code
+    // is called *during* earlyret handling - we don't want to reenter.
+    ld_w(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
+    li(tmp, JvmtiThreadState::earlyret_pending);
+    bne(tmp, AT, L);
+
+    // Call Interpreter::remove_activation_early_entry() to get the address of the
+    // same-named entrypoint in the generated interpreter code.
+    ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
+    ld_w(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
+    move(A0, AT);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
+    jr(V0);
+    bind(L);
+  }
+}
+
+
+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
+                                                                 int bcp_offset) {
+  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
+  ld_bu(AT, BCP, bcp_offset);
+  ld_bu(reg, BCP, bcp_offset + 1);
+  bstrins_w(reg, AT, 15, 8);
+}
+
+
+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
+                                                       int bcp_offset,
+                                                       size_t index_size) {
+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  if (index_size == sizeof(u2)) {
+    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
+  } else if (index_size == sizeof(u4)) {
+    assert(EnableInvokeDynamic, "giant index used only for JSR 292");
+    get_4_byte_integer_at_bcp(index, bcp_offset);
+    // Check if the secondary index definition is still ~x, otherwise
+    // we have to change the following assembler code to calculate the
+    // plain index.
+    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
+    nor(index, index, R0);
+    slli_w(index, index, 0);
+  } else if (index_size == sizeof(u1)) {
+    ld_bu(index, BCP, bcp_offset);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
+                                                           Register index,
+                                                           int bcp_offset,
+                                                           size_t index_size) {
+  assert_different_registers(cache, index);
+  get_cache_index_at_bcp(index, bcp_offset, index_size);
+  ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
+  shl(index, 2);
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+                                                                        Register index,
+                                                                        Register bytecode,
+                                                                        int byte_no,
+                                                                        int bcp_offset,
+                                                                        size_t index_size) {
+  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
+  // We use a 32-bit load here since the layout of 64-bit words on
+  // little-endian machines allow us that.
+  alsl_d(AT, index, cache, Address::times_ptr - 1);
+  ld_w(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
+  if(os::is_MP()) {
+    membar(Assembler::Membar_mask_bits(LoadLoad|LoadStore));
+  }
+
+  const int shift_count = (1 + byte_no) * BitsPerByte;
+  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
+         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
+         "correct shift count");
+  srli_d(bytecode, bytecode, shift_count);
+  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
+  li(AT, ConstantPoolCacheEntry::bytecode_1_mask);
+  andr(bytecode, bytecode, AT);
+}
+
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
+                                                               Register tmp,
+                                                               int bcp_offset,
+                                                               size_t index_size) {
+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  assert(cache != tmp, "must use different register");
+  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+  // convert from field index to ConstantPoolCacheEntry index
+  // and from word offset to byte offset
+  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
+  shl(tmp, 2 + LogBytesPerWord);
+  ld_d(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
+  // skip past the header
+  addi_d(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
+  add_d(cache, cache, tmp);
+}
+
+void InterpreterMacroAssembler::get_method_counters(Register method,
+                                                    Register mcs, Label& skip) {
+  Label has_counters;
+  ld_d(mcs, method, in_bytes(Method::method_counters_offset()));
+  bne(mcs, R0, has_counters);
+  call_VM(noreg, CAST_FROM_FN_PTR(address,
+          InterpreterRuntime::build_method_counters), method);
+  ld_d(mcs, method, in_bytes(Method::method_counters_offset()));
+  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
+  bind(has_counters);
+}
+
+// Load object from cpool->resolved_references(index)
+void InterpreterMacroAssembler::load_resolved_reference_at_index(
+                                           Register result, Register index) {
+  assert_different_registers(result, index);
+  // convert from field index to resolved_references() index and from
+  // word index to byte offset. Since this is a java object, it can be compressed
+  Register tmp = index;  // reuse
+  shl(tmp, LogBytesPerHeapOop);
+
+  get_constant_pool(result);
+  // load pointer for resolved_references[] objArray
+  ld_d(result, result, ConstantPool::resolved_references_offset_in_bytes());
+  // JNIHandles::resolve(obj);
+  ld_d(result, result, 0); //? is needed?
+  // Add in the index
+  add_d(result, result, tmp);
+  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+}
+
+// Resets LVP to locals.  Register sub_klass cannot be any of the above.
+void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
+
+  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
+  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
+  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
+  // Profile the not-null value's klass.
+  // Here T4 and T1 are used as temporary registers.
+  profile_typecheck(T4, Rsub_klass, T1); // blows T4, reloads T1
+
+  // Do the check.
+  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
+
+  // Profile the failure of the check.
+  profile_typecheck_failed(T4); // blows T4
+
+}
+
+
+
+// Java Expression Stack
+
+void InterpreterMacroAssembler::pop_ptr(Register r) {
+  ld_d(r, SP, 0);
+  addi_d(SP, SP, Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::pop_i(Register r) {
+  ld_w(r, SP, 0);
+  addi_d(SP, SP, Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::pop_l(Register r) {
+  ld_d(r, SP, 0);
+  addi_d(SP, SP, 2 * Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::pop_f(FloatRegister r) {
+  fld_s(r, SP, 0);
+  addi_d(SP, SP, Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::pop_d(FloatRegister r) {
+  fld_d(r, SP, 0);
+  addi_d(SP, SP, 2 * Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r) {
+  addi_d(SP, SP, - Interpreter::stackElementSize);
+  st_d(r, SP, 0);
+}
+
+void InterpreterMacroAssembler::push_i(Register r) {
+  // For compatibility reason, don't change to sw.
+  addi_d(SP, SP, - Interpreter::stackElementSize);
+  st_d(r, SP, 0);
+}
+
+void InterpreterMacroAssembler::push_l(Register r) {
+  addi_d(SP, SP, -2 * Interpreter::stackElementSize);
+  st_d(r, SP, 0);
+  st_d(R0, SP, Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_f(FloatRegister r) {
+  addi_d(SP, SP, - Interpreter::stackElementSize);
+  fst_s(r, SP, 0);
+}
+
+void InterpreterMacroAssembler::push_d(FloatRegister r) {
+  addi_d(SP, SP, -2 * Interpreter::stackElementSize);
+  fst_d(r, SP, 0);
+  st_d(R0, SP, Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::pop(TosState state) {
+  switch (state) {
+    case atos: pop_ptr();           break;
+    case btos:
+    case ztos:
+    case ctos:
+    case stos:
+    case itos: pop_i();             break;
+    case ltos: pop_l();             break;
+    case ftos: pop_f();             break;
+    case dtos: pop_d();             break;
+    case vtos: /* nothing to do */  break;
+    default:   ShouldNotReachHere();
+  }
+  verify_oop(FSR, state);
+}
+
+//FSR=V0,SSR=V1
+void InterpreterMacroAssembler::push(TosState state) {
+  verify_oop(FSR, state);
+  switch (state) {
+    case atos: push_ptr();          break;
+    case btos:
+    case ztos:
+    case ctos:
+    case stos:
+    case itos: push_i();            break;
+    case ltos: push_l();            break;
+    case ftos: push_f();            break;
+    case dtos: push_d();            break;
+    case vtos: /* nothing to do */  break;
+    default  : ShouldNotReachHere();
+  }
+}
+
+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
+  ld_d(val, SP, Interpreter::expr_offset_in_bytes(n));
+}
+
+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
+  st_d(val, SP, Interpreter::expr_offset_in_bytes(n));
+}
+
+// Jump to from_interpreted entry of a call unless single stepping is possible
+// in this thread in which case we must call the i2i entry
+void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
+  // record last_sp
+  move(Rsender, SP);
+  st_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled.  Check here for
+    // interp_only_mode if these events CAN be enabled.
+#ifndef OPT_THREAD
+    get_thread(temp);
+#else
+    move(temp, TREG);
+#endif
+    // interp_only is an int, on little endian it is sufficient to test the byte only
+    // Is a cmpl faster?
+    ld_w(AT, temp, in_bytes(JavaThread::interp_only_mode_offset()));
+    beq(AT, R0, run_compiled_code);
+    ld_d(AT, method, in_bytes(Method::interpreter_entry_offset()));
+    jr(AT);
+    bind(run_compiled_code);
+  }
+
+  ld_d(AT, method, in_bytes(Method::from_interpreted_offset()));
+  jr(AT);
+}
+
+
+// The following two routines provide a hook so that an implementation
+// can schedule the dispatch in two parts. LoongArch64 does not do this.
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
+  // Nothing LoongArch64 specific to be done here
+}
+
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
+  dispatch_next(state, step);
+}
+
+// assume the next bytecode in T8.
+void InterpreterMacroAssembler::dispatch_base(TosState state,
+                                              address* table,
+                                              bool verifyoop) {
+  if (VerifyActivationFrameSize) {
+    Label L;
+
+    sub_d(T2, FP, SP);
+    int min_frame_size = (frame::link_offset -
+      frame::interpreter_frame_initial_sp_offset) * wordSize;
+    addi_d(T2, T2, -min_frame_size);
+    bge(T2, R0, L);
+    stop("broken stack frame");
+    bind(L);
+  }
+  // FIXME: I do not know which register should pass to verify_oop
+  if (verifyoop) verify_oop(FSR, state);
+
+  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
+     (long)table <= (long)Interpreter::dispatch_table(vtos)) {
+    int table_size = (long)Interpreter::dispatch_table(itos) -
+                     (long)Interpreter::dispatch_table(stos);
+    int table_offset = ((int)state - (int)itos) * table_size;
+
+    // S8 points to the starting address of Interpreter::dispatch_table(itos).
+    // See StubGenerator::generate_call_stub(address& return_address) for the initialization of S8.
+    if (table_offset != 0) {
+      if (is_simm(table_offset, 12)) {
+        alsl_d(T3, Rnext, S8, LogBytesPerWord - 1);
+        ld_d(T3, T3, table_offset);
+      } else {
+        li(T2, table_offset);
+        alsl_d(T3, Rnext, S8, LogBytesPerWord - 1);
+        ldx_d(T3, T2, T3);
+      }
+    } else {
+      slli_d(T2, Rnext, LogBytesPerWord);
+      ldx_d(T3, S8, T2);
+    }
+  } else {
+    li(T3, (long)table);
+    slli_d(T2, Rnext, LogBytesPerWord);
+    ldx_d(T3, T2, T3);
+  }
+  jr(T3);
+}
+
+void InterpreterMacroAssembler::dispatch_only(TosState state) {
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state), false);
+}
+
+
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
+  // load next bytecode
+  ld_bu(Rnext, BCP, step);
+  increment(BCP, step);
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
+  // load current bytecode
+  ld_bu(Rnext, BCP, 0);
+  dispatch_base(state, table);
+}
+
+// remove activation
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+// Remove the activation from the stack.
+//
+// If there are locked Java monitors
+//    If throw_monitor_exception
+//       throws IllegalMonitorStateException
+//    Else if install_monitor_exception
+//       installs IllegalMonitorStateException
+//    Else
+//       no error processing
+// used registers : T1, T2, T3, T8
+// T1 : thread, method access flags
+// T2 : monitor entry pointer
+// T3 : method, monitor top
+// T8 : unlock flag
+void InterpreterMacroAssembler::remove_activation(
+        TosState state,
+        Register ret_addr,
+        bool throw_monitor_exception,
+        bool install_monitor_exception,
+  bool notify_jvmdi) {
+  // Note: Registers V0, V1 and F0, F1 may be in use for the result
+  // check if synchronized method
+  Label unlocked, unlock, no_unlock;
+
+  // get the value of _do_not_unlock_if_synchronized into T8
+#ifndef OPT_THREAD
+  Register thread = T1;
+  get_thread(thread);
+#else
+  Register thread = TREG;
+#endif
+  ld_b(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  // reset the flag
+  st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  // get method access flags
+  ld_d(T3, FP, frame::interpreter_frame_method_offset * wordSize);
+  ld_w(T1, T3, in_bytes(Method::access_flags_offset()));
+  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
+  beq(T1, R0, unlocked);
+
+  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
+  bne(T8, R0, no_unlock);
+  // unlock monitor
+  push(state); // save result
+
+  // BasicObjectLock will be first in list, since this is a
+  // synchronized method. However, need to check that the object has
+  // not been unlocked by an explicit monitorexit bytecode.
+  addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
+      - (int)sizeof(BasicObjectLock));
+  // address of first monitor
+  ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
+  bne(T1, R0, unlock);
+  pop(state);
+  if (throw_monitor_exception) {
+    // Entry already unlocked, need to throw exception
+    // I think LA do not need empty_FPU_stack
+    // remove possible return value from FPU-stack, otherwise stack could overflow
+    empty_FPU_stack();
+    call_VM(NOREG, CAST_FROM_FN_PTR(address,
+    InterpreterRuntime::throw_illegal_monitor_state_exception));
+    should_not_reach_here();
+  } else {
+    // Monitor already unlocked during a stack unroll. If requested,
+    // install an illegal_monitor_state_exception.  Continue with
+    // stack unrolling.
+    if (install_monitor_exception) {
+      // remove possible return value from FPU-stack,
+      // otherwise stack could overflow
+      empty_FPU_stack();
+      call_VM(NOREG, CAST_FROM_FN_PTR(address,
+      InterpreterRuntime::new_illegal_monitor_state_exception));
+
+    }
+
+    b(unlocked);
+  }
+
+  bind(unlock);
+  unlock_object(c_rarg0);
+  pop(state);
+
+  // Check that for block-structured locking (i.e., that all locked
+  // objects has been unlocked)
+  bind(unlocked);
+
+  // V0, V1: Might contain return value
+
+  // Check that all monitors are unlocked
+  {
+    Label loop, exception, entry, restart;
+    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+    const Address monitor_block_top(FP,
+        frame::interpreter_frame_monitor_block_top_offset * wordSize);
+
+    bind(restart);
+    // points to current entry, starting with top-most entry
+    ld_d(c_rarg0, monitor_block_top);
+    // points to word before bottom of monitor block
+    addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
+    b(entry);
+
+    // Entry already locked, need to throw exception
+    bind(exception);
+
+    if (throw_monitor_exception) {
+      // Throw exception
+      // remove possible return value from FPU-stack,
+      // otherwise stack could overflow
+      empty_FPU_stack();
+      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::throw_illegal_monitor_state_exception));
+      should_not_reach_here();
+    } else {
+      // Stack unrolling. Unlock object and install illegal_monitor_exception
+      // Unlock does not block, so don't have to worry about the frame
+      // We don't have to preserve c_rarg0, since we are going to
+      // throw an exception
+
+      push(state);
+      unlock_object(c_rarg0);
+      pop(state);
+
+      if (install_monitor_exception) {
+        empty_FPU_stack();
+        call_VM(NOREG, CAST_FROM_FN_PTR(address,
+                                        InterpreterRuntime::new_illegal_monitor_state_exception));
+      }
+
+      b(restart);
+    }
+
+    bind(loop);
+    ld_d(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
+    bne(T1, R0, exception);// check if current entry is used
+
+    addi_d(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
+    bind(entry);
+    bne(c_rarg0, T3, loop);  // check if bottom reached
+  }
+
+  bind(no_unlock);
+
+  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
+  if (notify_jvmdi) {
+    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
+  } else {
+    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
+  }
+
+  // remove activation
+  ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
+  ld_d(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize);
+  ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
+}
+
+#endif // C_INTERP
+
+// Lock object
+//
+// Args:
+//      c_rarg0: BasicObjectLock to be used for locking
+//
+// Kills:
+//      T1
+//      T2
+void InterpreterMacroAssembler::lock_object(Register lock_reg) {
+  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
+
+  if (UseHeavyMonitors) {
+    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
+  } else {
+    Label done, slow_case;
+    const Register tmp_reg = T2;
+    const Register scr_reg = T1;
+    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
+    const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
+
+    // Load object pointer into scr_reg
+    ld_d(scr_reg, lock_reg, obj_offset);
+
+    if (UseBiasedLocking) {
+      // Note: we use noreg for the temporary register since it's hard
+      // to come up with a free register on all incoming code paths
+      biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case);
+    }
+
+    // Load (object->mark() | 1) into tmp_reg
+    ld_d(AT, scr_reg, 0);
+    ori(tmp_reg, AT, 1);
+
+    // Save (object->mark() | 1) into BasicLock's displaced header
+    st_d(tmp_reg, lock_reg, mark_offset);
+
+    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
+
+    if (PrintBiasedLockingStatistics) {
+      Label succ, fail;
+      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail);
+      bind(succ);
+      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
+      b(done);
+      bind(fail);
+    } else {
+      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done);
+    }
+
+    // Test if the oopMark is an obvious stack pointer, i.e.,
+    //  1) (mark & 3) == 0, and
+    //  2) SP <= mark < SP + os::pagesize()
+    //
+    // These 3 tests can be done by evaluating the following
+    // expression: ((mark - sp) & (3 - os::vm_page_size())),
+    // assuming both stack pointer and pagesize have their
+    // least significant 2 bits clear.
+    // NOTE: the oopMark is in tmp_reg as the result of cmpxchg
+    sub_d(tmp_reg, tmp_reg, SP);
+    li(AT, 7 - os::vm_page_size());
+    andr(tmp_reg, tmp_reg, AT);
+    // Save the test result, for recursive case, the result is zero
+    st_d(tmp_reg, lock_reg, mark_offset);
+    if (PrintBiasedLockingStatistics) {
+      bnez(tmp_reg, slow_case);
+      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
+    }
+    beqz(tmp_reg, done);
+
+    bind(slow_case);
+    // Call the runtime routine for slow case
+    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
+
+    bind(done);
+  }
+}
+
+// Unlocks an object. Used in monitorexit bytecode and
+// remove_activation.  Throws an IllegalMonitorException if object is
+// not locked by current thread.
+//
+// Args:
+//      c_rarg0: BasicObjectLock for lock
+//
+// Kills:
+//      T1
+//      T2
+//      T3
+// Throw an IllegalMonitorException if object is not locked by current thread
+void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
+  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
+
+  if (UseHeavyMonitors) {
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
+  } else {
+    Label done;
+    const Register tmp_reg = T1;
+    const Register scr_reg = T2;
+    const Register hdr_reg = T3;
+
+    save_bcp(); // Save in case of exception
+
+    // Convert from BasicObjectLock structure to object and BasicLock structure
+    // Store the BasicLock address into tmp_reg
+    addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
+
+    // Load oop into scr_reg
+    ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes());
+    // free entry
+    st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
+    if (UseBiasedLocking) {
+      biased_locking_exit(scr_reg, hdr_reg, done);
+    }
+
+    // Load the old header from BasicLock structure
+    ld_d(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes());
+    // zero for recursive case
+    beqz(hdr_reg, done);
+
+    // Atomic swap back the old header
+    cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done);
+
+    // Call the runtime routine for slow case.
+    st_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
+    call_VM(NOREG,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            lock_reg);
+
+    bind(done);
+
+    restore_bcp();
+  }
+}
+
+#ifndef CC_INTERP
+
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
+                                                         Label& zero_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  ld_d(mdp, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
+  beq(mdp, R0, zero_continue);
+}
+
+
+// Set the method data pointer for the current bcp.
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Label set_mdp;
+
+  // V0 and T0 will be used as two temporary registers.
+  push2(V0, T0);
+
+  get_method(T0);
+  // Test MDO to avoid the call if it is NULL.
+  ld_d(V0, T0, in_bytes(Method::method_data_offset()));
+  beq(V0, R0, set_mdp);
+
+  // method: T0
+  // bcp: BCP --> S0
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
+  // mdi: V0
+  // mdo is guaranteed to be non-zero here, we checked for it before the call.
+  get_method(T0);
+  ld_d(T0, T0, in_bytes(Method::method_data_offset()));
+  addi_d(T0, T0, in_bytes(MethodData::data_offset()));
+  add_d(V0, T0, V0);
+  bind(set_mdp);
+  st_d(V0, FP, frame::interpreter_frame_mdx_offset * wordSize);
+  pop2(T0, V0);
+}
+
+void InterpreterMacroAssembler::verify_method_data_pointer() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+#ifdef ASSERT
+  Label verify_continue;
+  Register method = T5;
+  Register mdp = T6;
+  Register tmp = A0;
+  push(method);
+  push(mdp);
+  push(tmp);
+  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
+  get_method(method);
+
+  // If the mdp is valid, it will point to a DataLayout header which is
+  // consistent with the bcp.  The converse is highly probable also.
+  ld_hu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
+  ld_d(AT, method, in_bytes(Method::const_offset()));
+  add_d(tmp, tmp, AT);
+  addi_d(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
+  beq(tmp, BCP, verify_continue);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
+  bind(verify_continue);
+  pop(tmp);
+  pop(mdp);
+  pop(method);
+#endif // ASSERT
+}
+
+
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
+                                                int constant,
+                                                Register value) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Address data(mdp_in, constant);
+  st_d(value, data);
+}
+
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      int constant,
+                                                      bool decrement) {
+  // Counter address
+  Address data(mdp_in, constant);
+
+  increment_mdp_data_at(data, decrement);
+}
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
+                                                      bool decrement) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  // %%% this does 64bit counters at best it is wasting space
+  // at worst it is a rare bug when counters overflow
+  Register tmp = S0;
+  push(tmp);
+  if (decrement) {
+    // Decrement the register.
+    ld_d(AT, data);
+    addi_d(tmp, AT, (int32_t) -DataLayout::counter_increment);
+    // If the decrement causes the counter to overflow, stay negative
+    Label L;
+    blt(tmp, R0, L);
+    addi_d(tmp, tmp, (int32_t) DataLayout::counter_increment);
+    bind(L);
+    st_d(tmp, data);
+  } else {
+    assert(DataLayout::counter_increment == 1,
+           "flow-free idiom only works with 1");
+    ld_d(AT, data);
+    // Increment the register.
+    addi_d(tmp, AT, DataLayout::counter_increment);
+    // If the increment causes the counter to overflow, pull back by 1.
+    slt(AT, tmp, R0);
+    sub_d(tmp, tmp, AT);
+    st_d(tmp, data);
+  }
+  pop(tmp);
+}
+
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      Register reg,
+                                                      int constant,
+                                                      bool decrement) {
+  Register tmp = S0;
+  push(S0);
+  if (decrement) {
+    // Decrement the register.
+    add_d(AT, mdp_in, reg);
+    assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !");
+    ld_d(AT, AT, constant);
+
+    addi_d(tmp, AT, (int32_t) -DataLayout::counter_increment);
+    // If the decrement causes the counter to overflow, stay negative
+    Label L;
+    blt(tmp, R0, L);
+    addi_d(tmp, tmp, (int32_t) DataLayout::counter_increment);
+    bind(L);
+
+    add_d(AT, mdp_in, reg);
+    st_d(tmp, AT, constant);
+  } else {
+    add_d(AT, mdp_in, reg);
+    assert(Assembler::is_simm(constant, 12), "constant is not a simm12 !");
+    ld_d(AT, AT, constant);
+
+    // Increment the register.
+    addi_d(tmp, AT, DataLayout::counter_increment);
+    // If the increment causes the counter to overflow, pull back by 1.
+    slt(AT, tmp, R0);
+    sub_d(tmp, tmp, AT);
+
+    add_d(AT, mdp_in, reg);
+    st_d(tmp, AT, constant);
+  }
+  pop(S0);
+}
+
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
+                                                int flag_byte_constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  int header_offset = in_bytes(DataLayout::header_offset());
+  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
+  // Set the flag
+  ld_w(AT, Address(mdp_in, header_offset));
+  if(Assembler::is_simm(header_bits, 12)) {
+    ori(AT, AT, header_bits);
+  } else {
+    push(T8);
+    // T8 is used as a temporary register.
+    li(T8, header_bits);
+    orr(AT, AT, T8);
+    pop(T8);
+  }
+  st_w(AT, Address(mdp_in, header_offset));
+}
+
+
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
+                                                 int offset,
+                                                 Register value,
+                                                 Register test_value_out,
+                                                 Label& not_equal_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  if (test_value_out == noreg) {
+    ld_d(AT, Address(mdp_in, offset));
+    bne(AT, value, not_equal_continue);
+  } else {
+    // Put the test value into a register, so caller can use it:
+    ld_d(test_value_out, Address(mdp_in, offset));
+    bne(value, test_value_out, not_equal_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12");
+  ld_d(AT, mdp_in, offset_of_disp);
+  add_d(mdp_in, mdp_in, AT);
+  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     Register reg,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  add_d(AT, reg, mdp_in);
+  assert(Assembler::is_simm(offset_of_disp, 12), "offset is not an simm12");
+  ld_d(AT, AT, offset_of_disp);
+  add_d(mdp_in, mdp_in, AT);
+  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
+                                                       int constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  if(Assembler::is_simm(constant, 12)) {
+    addi_d(mdp_in, mdp_in, constant);
+  } else {
+    li(AT, constant);
+    add_d(mdp_in, mdp_in, AT);
+  }
+  st_d(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  push(return_bci); // save/restore across call_VM
+  call_VM(noreg,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
+          return_bci);
+  pop(return_bci);
+}
+
+
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
+                                                     Register bumped_count) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    // Otherwise, assign to mdp
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the taken count.
+    // We inline increment_mdp_data_at to return bumped_count in a register
+    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
+    ld_d(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
+    assert(DataLayout::counter_increment == 1,
+           "flow-free idiom only works with 1");
+    push(T8);
+    // T8 is used as a temporary register.
+    addi_d(T8, bumped_count, DataLayout::counter_increment);
+    slt(AT, T8, R0);
+    sub_d(bumped_count, T8, AT);
+    pop(T8);
+    st_d(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the not taken count.
+    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
+
+    // The method data pointer needs to be updated to correspond to
+    // the next bytecode
+    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
+                                                     Register mdp,
+                                                     Register reg2,
+                                                     bool receiver_can_be_null) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    Label skip_receiver_profile;
+    if (receiver_can_be_null) {
+      Label not_null;
+      bnez(receiver, not_null);
+      // We are making a call.  Increment the count.
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+      b(skip_receiver_profile);
+      bind(not_null);
+    }
+
+    // Record the receiver type.
+    record_klass_in_profile(receiver, mdp, reg2, true);
+    bind(skip_receiver_profile);
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+// This routine creates a state machine for updating the multi-row
+// type profile at a virtual call site (or other type-sensitive bytecode).
+// The machine visits each row (of receiver/count) until the receiver type
+// is found, or until it runs out of rows.  At the same time, it remembers
+// the location of the first empty row.  (An empty row records null for its
+// receiver, and can be allocated for a newly-observed receiver type.)
+// Because there are two degrees of freedom in the state, a simple linear
+// search will not work; it must be a decision tree.  Hence this helper
+// function is recursive, to generate the required tree structured code.
+// It's the interpreter, so we are trading off code space for speed.
+// See below for example code.
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
+                                        Register receiver, Register mdp,
+                                        Register reg2, int start_row,
+                                        Label& done, bool is_virtual_call) {
+  if (TypeProfileWidth == 0) {
+    if (is_virtual_call) {
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+    }
+    return;
+  }
+
+  int last_row = VirtualCallData::row_limit() - 1;
+  assert(start_row <= last_row, "must be work left to do");
+  // Test this row for both the receiver and for null.
+  // Take any of three different outcomes:
+  //   1. found receiver => increment count and goto done
+  //   2. found null => keep looking for case 1, maybe allocate this cell
+  //   3. found something else => keep looking for cases 1 and 2
+  // Case 3 is handled by a recursive call.
+  for (int row = start_row; row <= last_row; row++) {
+    Label next_test;
+    bool test_for_null_also = (row == start_row);
+
+    // See if the receiver is receiver[n].
+    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
+    test_mdp_data_at(mdp, recvr_offset, receiver,
+                     (test_for_null_also ? reg2 : noreg),
+                     next_test);
+    // (Reg2 now contains the receiver from the CallData.)
+
+    // The receiver is receiver[n].  Increment count[n].
+    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    increment_mdp_data_at(mdp, count_offset);
+    beq(R0, R0, done);
+    bind(next_test);
+
+    if (test_for_null_also) {
+      Label found_null;
+      // Failed the equality check on receiver[n]...  Test for null.
+      if (start_row == last_row) {
+        // The only thing left to do is handle the null case.
+        if (is_virtual_call) {
+          beq(reg2, R0, found_null);
+          // Receiver did not match any saved receiver and there is no empty row for it.
+          // Increment total counter to indicate polymorphic case.
+          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+          beq(R0, R0, done);
+          bind(found_null);
+        } else {
+          bne(reg2, R0, done);
+        }
+        break;
+      }
+      // Since null is rare, make it be the branch-taken case.
+      beq(reg2, R0, found_null);
+
+      // Put all the "Case 3" tests here.
+      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
+
+      // Found a null.  Keep searching for a matching receiver,
+      // but remember that this is an empty (unused) slot.
+      bind(found_null);
+    }
+  }
+
+  // In the fall-through case, we found no matching receiver, but we
+  // observed the receiver[start_row] is NULL.
+
+  // Fill in the receiver field and increment the count.
+  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
+  set_mdp_data_at(mdp, recvr_offset, receiver);
+  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  li(reg2, DataLayout::counter_increment);
+  set_mdp_data_at(mdp, count_offset, reg2);
+  if (start_row > 0) {
+    beq(R0, R0, done);
+  }
+}
+
+// Example state machine code for three profile rows:
+//   // main copy of decision tree, rooted at row[1]
+//   if (row[0].rec == rec) { row[0].incr(); goto done; }
+//   if (row[0].rec != NULL) {
+//     // inner copy of decision tree, rooted at row[1]
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[1].rec != NULL) {
+//       // degenerate decision tree, rooted at row[2]
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       if (row[2].rec != NULL) { goto done; } // overflow
+//       row[2].init(rec); goto done;
+//     } else {
+//       // remember row[1] is empty
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       row[1].init(rec); goto done;
+//     }
+//   } else {
+//     // remember row[0] is empty
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[2].rec == rec) { row[2].incr(); goto done; }
+//     row[0].init(rec); goto done;
+//   }
+//   done:
+
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
+                                                        Register mdp, Register reg2,
+                                                        bool is_virtual_call) {
+  assert(ProfileInterpreter, "must be profiling");
+  Label done;
+
+  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
+
+  bind (done);
+}
+
+void InterpreterMacroAssembler::profile_ret(Register return_bci,
+                                            Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+    uint row;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the total ret count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    for (row = 0; row < RetData::row_limit(); row++) {
+      Label next_test;
+
+      // See if return_bci is equal to bci[n]:
+      test_mdp_data_at(mdp,
+                       in_bytes(RetData::bci_offset(row)),
+                       return_bci, noreg,
+                       next_test);
+
+      // return_bci is equal to bci[n].  Increment the count.
+      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
+
+      // The method data pointer needs to be updated to reflect the new target.
+      update_mdp_by_offset(mdp,
+                           in_bytes(RetData::bci_displacement_offset(row)));
+      b(profile_continue);
+      bind(next_test);
+    }
+
+    update_mdp_for_ret(return_bci);
+
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
+  if (ProfileInterpreter && TypeProfileCasts) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    int count_offset = in_bytes(CounterData::count_offset());
+    // Back up the address, since we have already bumped the mdp.
+    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+
+    // *Decrement* the counter.  We expect to see zero or small negatives.
+    increment_mdp_data_at(mdp, count_offset, true);
+
+    bind (profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+
+      // Record the object type.
+      record_klass_in_profile(klass, mdp, reg2, false);
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the default case count
+    increment_mdp_data_at(mdp,
+                          in_bytes(MultiBranchData::default_count_offset()));
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp,
+                         in_bytes(MultiBranchData::
+                                  default_displacement_offset()));
+
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_switch_case(Register index,
+                                                    Register mdp,
+                                                    Register reg2) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Build the base (index * per_case_size_in_bytes()) +
+    // case_array_offset_in_bytes()
+    li(reg2, in_bytes(MultiBranchData::per_case_size()));
+    mul_d(index, index, reg2);
+    addi_d(index, index, in_bytes(MultiBranchData::case_array_offset()));
+
+    // Update the case count
+    increment_mdp_data_at(mdp,
+                          index,
+                          in_bytes(MultiBranchData::relative_count_offset()));
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp,
+                         index,
+                         in_bytes(MultiBranchData::
+                                  relative_displacement_offset()));
+
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::narrow(Register result) {
+  // Get method->_constMethod->_result_type
+  ld_d(T4, FP, frame::interpreter_frame_method_offset * wordSize);
+  ld_d(T4, T4, in_bytes(Method::const_offset()));
+  ld_bu(T4, T4, in_bytes(ConstMethod::result_type_offset()));
+
+  Label done, notBool, notByte, notChar;
+
+  // common case first
+  addi_d(AT, T4, -T_INT);
+  beq(AT, R0, done);
+
+  // mask integer result to narrower return type.
+  addi_d(AT, T4, -T_BOOLEAN);
+  bne(AT, R0, notBool);
+  andi(result, result, 0x1);
+  beq(R0, R0, done);
+
+  bind(notBool);
+  addi_d(AT, T4, -T_BYTE);
+  bne(AT, R0, notByte);
+  ext_w_b(result, result);
+  beq(R0, R0, done);
+
+  bind(notByte);
+  addi_d(AT, T4, -T_CHAR);
+  bne(AT, R0, notChar);
+  bstrpick_d(result, result, 15, 0);
+  beq(R0, R0, done);
+
+  bind(notChar);
+  ext_w_h(result, result);
+
+  // Nothing to do for T_INT
+  bind(done);
+}
+
+
+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
+  Label update, next, none;
+
+  verify_oop(obj);
+
+  if (mdo_addr.index() != noreg) {
+    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
+    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
+    push(T0);
+    alsl_d(T0, mdo_addr.index(), mdo_addr.base(), mdo_addr.scale() - 1);
+  }
+
+  bnez(obj, update);
+
+  if (mdo_addr.index() == noreg) {
+    ld_d(AT, mdo_addr);
+  } else {
+    ld_d(AT, T0, mdo_addr.disp());
+  }
+  ori(AT, AT, TypeEntries::null_seen);
+  if (mdo_addr.index() == noreg) {
+    st_d(AT, mdo_addr);
+  } else {
+    st_d(AT, T0, mdo_addr.disp());
+  }
+
+  b(next);
+
+  bind(update);
+  load_klass(obj, obj);
+
+  if (mdo_addr.index() == noreg) {
+    ld_d(AT, mdo_addr);
+  } else {
+    ld_d(AT, T0, mdo_addr.disp());
+  }
+  xorr(obj, obj, AT);
+
+  assert(TypeEntries::type_klass_mask == -4, "must be");
+  bstrpick_d(AT, obj, 63, 2);
+  beqz(AT, next);
+
+  andi(AT, obj, TypeEntries::type_unknown);
+  bnez(AT, next);
+
+  if (mdo_addr.index() == noreg) {
+    ld_d(AT, mdo_addr);
+  } else {
+    ld_d(AT, T0, mdo_addr.disp());
+  }
+  beqz(AT, none);
+
+  addi_d(AT, AT, -(TypeEntries::null_seen));
+  beqz(AT, none);
+
+  // There is a chance that the checks above (re-reading profiling
+  // data from memory) fail if another thread has just set the
+  // profiling to this obj's klass
+  if (mdo_addr.index() == noreg) {
+    ld_d(AT, mdo_addr);
+  } else {
+    ld_d(AT, T0, mdo_addr.disp());
+  }
+  xorr(obj, obj, AT);
+  assert(TypeEntries::type_klass_mask == -4, "must be");
+  bstrpick_d(AT, obj, 63, 2);
+  beqz(AT, next);
+
+  // different than before. Cannot keep accurate profile.
+  if (mdo_addr.index() == noreg) {
+    ld_d(AT, mdo_addr);
+  } else {
+    ld_d(AT, T0, mdo_addr.disp());
+  }
+  ori(AT, AT, TypeEntries::type_unknown);
+  if (mdo_addr.index() == noreg) {
+    st_d(AT, mdo_addr);
+  } else {
+    st_d(AT, T0, mdo_addr.disp());
+  }
+  b(next);
+
+  bind(none);
+  // first time here. Set profile type.
+  if (mdo_addr.index() == noreg) {
+    st_d(obj, mdo_addr);
+  } else {
+    st_d(obj, T0, mdo_addr.disp());
+  }
+
+  bind(next);
+  if (mdo_addr.index() != noreg) {
+    pop(T0);
+  }
+}
+
+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
+  if (!ProfileInterpreter) {
+    return;
+  }
+
+  if (MethodData::profile_arguments() || MethodData::profile_return()) {
+    Label profile_continue;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
+
+    ld_b(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
+    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
+    bne(tmp, AT, profile_continue);
+
+
+    if (MethodData::profile_arguments()) {
+      Label done;
+      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
+      if (Assembler::is_simm(off_to_args, 12)) {
+        addi_d(mdp, mdp, off_to_args);
+      } else {
+        li(AT, off_to_args);
+        add_d(mdp, mdp, AT);
+      }
+
+
+      for (int i = 0; i < TypeProfileArgsLimit; i++) {
+        if (i > 0 || MethodData::profile_return()) {
+          // If return value type is profiled we may have no argument to profile
+          ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
+
+          if (Assembler::is_simm(-1 * i * TypeStackSlotEntries::per_arg_count(), 12)) {
+            addi_w(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
+          } else {
+            li(AT, i*TypeStackSlotEntries::per_arg_count());
+            sub_w(tmp, tmp, AT);
+          }
+
+          li(AT, TypeStackSlotEntries::per_arg_count());
+          blt(tmp, AT, done);
+        }
+        ld_d(tmp, callee, in_bytes(Method::const_offset()));
+
+        ld_hu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
+
+        // stack offset o (zero based) from the start of the argument
+        // list, for n arguments translates into offset n - o - 1 from
+        // the end of the argument list
+        ld_d(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
+        sub_d(tmp, tmp, AT);
+
+        addi_w(tmp, tmp, -1);
+
+        Address arg_addr = argument_address(tmp);
+        ld_d(tmp, arg_addr);
+
+        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
+        profile_obj_type(tmp, mdo_arg_addr);
+
+        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
+        if (Assembler::is_simm(to_add, 12)) {
+          addi_d(mdp, mdp, to_add);
+        } else {
+          li(AT, to_add);
+          add_d(mdp, mdp, AT);
+        }
+
+        off_to_args += to_add;
+      }
+
+      if (MethodData::profile_return()) {
+        ld_d(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
+
+        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
+        if (Assembler::is_simm(-1 * tmp_arg_counts, 12)) {
+          addi_w(tmp, tmp, -1 * tmp_arg_counts);
+        } else {
+          li(AT, tmp_arg_counts);
+          sub_w(mdp, mdp, AT);
+        }
+      }
+
+      bind(done);
+
+      if (MethodData::profile_return()) {
+        // We're right after the type profile for the last
+        // argument. tmp is the number of cells left in the
+        // CallTypeData/VirtualCallTypeData to reach its end. Non null
+        // if there's a return to profile.
+        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
+        slli_w(tmp, tmp, exact_log2(DataLayout::cell_size));
+        add_d(mdp, mdp, tmp);
+      }
+      st_d(mdp, FP, frame::interpreter_frame_mdx_offset * wordSize);
+    } else {
+      assert(MethodData::profile_return(), "either profile call args or call ret");
+      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
+    }
+
+    // mdp points right after the end of the
+    // CallTypeData/VirtualCallTypeData, right after the cells for the
+    // return value type if there's one
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
+  assert_different_registers(mdp, ret, tmp, _bcp_register);
+  if (ProfileInterpreter && MethodData::profile_return()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    if (MethodData::profile_return_jsr292_only()) {
+      // If we don't profile all invoke bytecodes we must make sure
+      // it's a bytecode we indeed profile. We can't go back to the
+      // begining of the ProfileData we intend to update to check its
+      // type because we're right after it and we don't known its
+      // length
+      Label do_profile;
+      ld_b(tmp, _bcp_register, 0);
+      addi_d(AT, tmp, -1 * Bytecodes::_invokedynamic);
+      beqz(AT, do_profile);
+      addi_d(AT, tmp, -1 * Bytecodes::_invokehandle);
+      beqz(AT, do_profile);
+
+      get_method(tmp);
+      ld_b(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
+      li(AT, vmIntrinsics::_compiledLambdaForm);
+      bne(tmp, AT, profile_continue);
+
+      bind(do_profile);
+    }
+
+    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
+    add_d(tmp, ret, R0);
+    profile_obj_type(tmp, mdo_ret_addr);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
+  guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !");
+
+  if (ProfileInterpreter && MethodData::profile_parameters()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Load the offset of the area within the MDO used for
+    // parameters. If it's negative we're not profiling any parameters
+    ld_w(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
+    blt(tmp1, R0, profile_continue);
+
+    // Compute a pointer to the area for parameters from the offset
+    // and move the pointer to the slot for the last
+    // parameters. Collect profiling from last parameter down.
+    // mdo start + parameters offset + array length - 1
+    add_d(mdp, mdp, tmp1);
+    ld_d(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
+    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
+
+
+    Label loop;
+    bind(loop);
+
+    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
+    int type_base = in_bytes(ParametersTypeData::type_offset(0));
+    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
+    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
+
+    // load offset on the stack from the slot for this parameter
+    alsl_d(AT, tmp1, mdp, per_arg_scale - 1);
+    ld_d(tmp2, AT, off_base);
+
+    sub_d(tmp2, R0, tmp2);
+
+    // read the parameter from the local area
+    slli_d(AT, tmp2, Interpreter::stackElementScale());
+    ldx_d(tmp2, AT, _locals_register);
+
+    // profile the parameter
+    profile_obj_type(tmp2, arg_type);
+
+    // go to next parameter
+    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
+    blt(R0, tmp1, loop);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
+  if (state == atos) {
+    MacroAssembler::verify_oop(reg);
+  }
+}
+
+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
+}
+#endif // !CC_INTERP
+
+
+void InterpreterMacroAssembler::notify_method_entry() {
+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+  // track stack depth.  If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  Register tempreg = T0;
+#ifndef OPT_THREAD
+    get_thread(T8);
+#else
+    move(T8, TREG);
+#endif
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label L;
+    ld_w(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset()));
+    beq(tempreg, R0, L);
+    call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                    InterpreterRuntime::post_method_entry));
+    bind(L);
+  }
+
+  {
+    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
+    get_method(S3);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+                                  //Rthread,
+                                  T8,
+                                  //Rmethod);
+                                  S3);
+  }
+}
+
+void InterpreterMacroAssembler::notify_method_exit(
+    TosState state, NotifyMethodExitMode mode) {
+  Register tempreg = T0;
+#ifndef OPT_THREAD
+    get_thread(T8);
+#else
+    move(T8, TREG);
+#endif
+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+  // track stack depth.  If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+    Label skip;
+    // Note: frame::interpreter_frame_result has a dependency on how the
+    // method result is saved across the call to post_method_exit. If this
+    // is changed then the interpreter_frame_result implementation will
+    // need to be updated too.
+
+    // For c++ interpreter the result is always stored at a known location in the frame
+    // template interpreter will leave it on the top of the stack.
+    NOT_CC_INTERP(push(state);)
+    ld_w(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset()));
+    beq(tempreg, R0, skip);
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
+    bind(skip);
+    NOT_CC_INTERP(pop(state));
+  }
+
+  {
+    // Dtrace notification
+    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
+    NOT_CC_INTERP(push(state));
+    get_method(S3);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+                 //Rthread, Rmethod);
+                 T8, S3);
+    NOT_CC_INTERP(pop(state));
+  }
+}
+
+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
+                                                        int increment, int mask,
+                                                        Register scratch, bool preloaded,
+                                                        Condition cond, Label* where) {
+  assert_different_registers(scratch, AT);
+
+  if (!preloaded) {
+    ld_w(scratch, counter_addr);
+  }
+  addi_w(scratch, scratch, increment);
+  st_w(scratch, counter_addr);
+
+  li(AT, mask);
+  andr(scratch, scratch, AT);
+
+  if (cond == Assembler::zero) {
+    beq(scratch, R0, *where);
+  } else {
+    unimplemented();
+  }
+}
diff --git a/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp
new file mode 100644
index 00000000000..9113da54ff1
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/interp_masm_loongarch_64.hpp
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP
+#define CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP
+
+#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/invocationCounter.hpp"
+#include "runtime/frame.hpp"
+
+// This file specializes the assember with interpreter-specific macros
+
+
+class InterpreterMacroAssembler: public MacroAssembler {
+#ifndef CC_INTERP
+ private:
+
+  Register _locals_register; // register that contains the pointer to the locals
+  Register _bcp_register; // register that contains the bcp
+
+ protected:
+  // Interpreter specific version of call_VM_base
+  virtual void call_VM_leaf_base(address entry_point,
+                                 int number_of_arguments);
+
+  virtual void call_VM_base(Register oop_result,
+                            Register java_thread,
+                            Register last_java_sp,
+                            address  entry_point,
+                            int number_of_arguments,
+                            bool check_exceptions);
+
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  // base routine for all dispatches
+  void dispatch_base(TosState state, address* table, bool verifyoop = true);
+#endif // CC_INTERP
+
+ public:
+  // narrow int return value
+  void narrow(Register result);
+
+  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
+
+  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
+  void  get_4_byte_integer_at_bcp(Register reg, int offset);
+
+  void load_earlyret_value(TosState state);
+
+#ifdef CC_INTERP
+  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
+  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
+
+  // Helpers for runtime call arguments/results
+  void get_method(Register reg);
+
+#else
+
+  // Interpreter-specific registers
+  void save_bcp() {
+    st_d(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize);
+  }
+
+  void restore_bcp() {
+    ld_d(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize);
+  }
+
+  void restore_locals() {
+    ld_d(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
+  }
+
+  // Helpers for runtime call arguments/results
+  void get_method(Register reg) {
+    ld_d(reg, FP, frame::interpreter_frame_method_offset * wordSize);
+  }
+
+  void get_const(Register reg){
+    get_method(reg);
+    ld_d(reg, reg, in_bytes(Method::const_offset()));
+  }
+
+  void get_constant_pool(Register reg) {
+    get_const(reg);
+    ld_d(reg, reg, in_bytes(ConstMethod::constants_offset()));
+  }
+
+  void get_constant_pool_cache(Register reg) {
+    get_constant_pool(reg);
+    ld_d(reg, reg, ConstantPool::cache_offset_in_bytes());
+  }
+
+  void get_cpool_and_tags(Register cpool, Register tags) {
+    get_constant_pool(cpool);
+    ld_d(tags, cpool, ConstantPool::tags_offset_in_bytes());
+  }
+
+  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
+  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_method_counters(Register method, Register mcs, Label& skip);
+
+  // load cpool->resolved_references(index);
+  void load_resolved_reference_at_index(Register result, Register index);
+
+  void pop_ptr(   Register r = FSR);
+  void pop_i(     Register r = FSR);
+  void pop_l(     Register r = FSR);
+  void pop_f(FloatRegister r = FSF);
+  void pop_d(FloatRegister r = FSF);
+
+  void push_ptr(   Register r = FSR);
+  void push_i(     Register r = FSR);
+  void push_l(     Register r = FSR);
+  void push_f(FloatRegister r = FSF);
+  void push_d(FloatRegister r = FSF);
+
+  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
+
+  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
+
+  void pop(TosState state); // transition vtos -> state
+  void push(TosState state); // transition state -> vtos
+
+  void empty_expression_stack() {
+    ld_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    // NULL last_sp until next java call
+    st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  }
+
+  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
+  void load_ptr(int n, Register val);
+  void store_ptr(int n, Register val);
+
+  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
+  // a subtype of super_klass.
+  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
+  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
+
+  // Dispatching
+  void dispatch_prolog(TosState state, int step = 0);
+  void dispatch_epilog(TosState state, int step = 0);
+  void dispatch_only(TosState state);
+  void dispatch_only_normal(TosState state);
+  void dispatch_only_noverify(TosState state);
+  void dispatch_next(TosState state, int step = 0);
+  void dispatch_via (TosState state, address* table);
+
+  // jump to an invoked target
+  void prepare_to_jump_from_interpreted();
+  void jump_from_interpreted(Register method, Register temp);
+
+
+  // Returning from interpreted functions
+  //
+  // Removes the current activation (incl. unlocking of monitors)
+  // and sets up the return address.  This code is also used for
+  // exception unwindwing. In that case, we do not want to throw
+  // IllegalMonitorStateExceptions, since that might get us into an
+  // infinite rethrow exception loop.
+  // Additionally this code is used for popFrame and earlyReturn.
+  // In popFrame case we want to skip throwing an exception,
+  // installing an exception, and notifying jvmdi.
+  // In earlyReturn case we only want to skip throwing an exception
+  // and installing an exception.
+  void remove_activation(TosState state, Register ret_addr,
+                         bool throw_monitor_exception = true,
+                         bool install_monitor_exception = true,
+                         bool notify_jvmdi = true);
+#endif // CC_INTERP
+
+  // Object locking
+  void lock_object  (Register lock_reg);
+  void unlock_object(Register lock_reg);
+
+#ifndef CC_INTERP
+
+  // Interpreter profiling operations
+  void set_method_data_pointer_for_bcp();
+  void test_method_data_pointer(Register mdp, Label& zero_continue);
+  void verify_method_data_pointer();
+
+  void set_mdp_data_at(Register mdp_in, int constant, Register value);
+  void increment_mdp_data_at(Address data, bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, int constant,
+                             bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
+                             bool decrement = false);
+  void increment_mask_and_jump(Address counter_addr,
+                               int increment, int mask,
+                               Register scratch, bool preloaded,
+                               Condition cond, Label* where);
+  void set_mdp_flag_at(Register mdp_in, int flag_constant);
+  void test_mdp_data_at(Register mdp_in, int offset, Register value,
+                        Register test_value_out,
+                        Label& not_equal_continue);
+
+  void record_klass_in_profile(Register receiver, Register mdp,
+                               Register reg2, bool is_virtual_call);
+  void record_klass_in_profile_helper(Register receiver, Register mdp,
+                                      Register reg2, int start_row,
+                                      Label& done, bool is_virtual_call);
+
+  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
+  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
+  void update_mdp_by_constant(Register mdp_in, int constant);
+  void update_mdp_for_ret(Register return_bci);
+
+  void profile_taken_branch(Register mdp, Register bumped_count);
+  void profile_not_taken_branch(Register mdp);
+  void profile_call(Register mdp);
+  void profile_final_call(Register mdp);
+  void profile_virtual_call(Register receiver, Register mdp,
+                            Register scratch2,
+                            bool receiver_can_be_null = false);
+  void profile_ret(Register return_bci, Register mdp);
+  void profile_null_seen(Register mdp);
+  void profile_typecheck(Register mdp, Register klass, Register scratch);
+  void profile_typecheck_failed(Register mdp);
+  void profile_switch_default(Register mdp);
+  void profile_switch_case(Register index_in_scratch, Register mdp,
+                           Register scratch2);
+
+  // Debugging
+  // only if +VerifyOops && state == atos
+  void verify_oop(Register reg, TosState state = atos);
+  // only if +VerifyFPU  && (state == ftos || state == dtos)
+  void verify_FPU(int stack_depth, TosState state = ftos);
+
+  void profile_obj_type(Register obj, const Address& mdo_addr);
+  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
+  void profile_return_type(Register mdp, Register ret, Register tmp);
+  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
+#endif // !CC_INTERP
+
+  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
+
+  // support for jvmti/dtrace
+  void notify_method_entry();
+  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
+};
+
+#endif // CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp
new file mode 100644
index 00000000000..7f253b2d516
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/interpreterGenerator_loongarch.hpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP
+
+
+// Generation of Interpreter
+//
+  friend class AbstractInterpreterGenerator;
+
+ private:
+
+  address generate_normal_entry(bool synchronized);
+  address generate_native_entry(bool synchronized);
+  address generate_abstract_entry(void);
+  address generate_math_entry(AbstractInterpreter::MethodKind kind);
+  address generate_empty_entry(void);
+  address generate_accessor_entry(void);
+  address generate_Reference_get_entry();
+  address generate_CRC32_update_entry();
+  address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+  void lock_method(void);
+  void generate_stack_overflow_check(void);
+
+  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
+  void generate_counter_overflow(Label* do_continue);
+
+#endif // CPU_LOONGARCH_VM_INTERPRETERGENERATOR_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp
new file mode 100644
index 00000000000..052eb997e47
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch.hpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP
+
+#include "memory/allocation.hpp"
+
+// native method calls
+
+class SignatureHandlerGenerator: public NativeSignatureIterator {
+ private:
+  MacroAssembler* _masm;
+  unsigned int _num_fp_args;
+  unsigned int _num_int_args;
+  int _stack_offset;
+
+  void move(int from_offset, int to_offset);
+  void box(int from_offset, int to_offset);
+  void pass_int();
+  void pass_long();
+  void pass_object();
+  void pass_float();
+  void pass_double();
+
+ public:
+  // Creation
+  SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
+    _masm = new MacroAssembler(buffer);
+    _num_int_args = (method->is_static() ? 1 : 0);
+    _num_fp_args = 0;
+    _stack_offset = 0;
+  }
+
+  // Code generation
+  void generate(uint64_t fingerprint);
+
+  // Code generation support
+  static Register from();
+  static Register to();
+  static Register temp();
+};
+
+#endif // CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp
new file mode 100644
index 00000000000..0c9df4aa711
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/interpreterRT_loongarch_64.cpp
@@ -0,0 +1,274 @@
+/*
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/signature.hpp"
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+// Implementation of SignatureHandlerGenerator
+
+void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
+  __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
+  __ st_d(temp(), to(), to_offset * longSize);
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
+  __ addi_d(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
+  __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
+
+  __ maskeqz(temp(), temp(), AT);
+  __ st_w(temp(), to(), to_offset * wordSize);
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
+  // generate code to handle arguments
+  iterate(fingerprint);
+  // return result handler
+  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
+  // return
+  __ jr(RA);
+
+  __ flush();
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
+  if (_num_int_args < Argument::n_register_parameters - 1) {
+    __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset()));
+  } else {
+    __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset()));
+    __ st_w(AT, to(), _stack_offset);
+    _stack_offset += wordSize;
+  }
+}
+
+// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
+  if (_num_int_args < Argument::n_register_parameters - 1) {
+    __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1));
+  } else {
+    __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1));
+    __ st_d(AT, to(), _stack_offset);
+    _stack_offset += wordSize;
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
+  if (_num_int_args < Argument::n_register_parameters - 1) {
+    Register reg = as_Register(++_num_int_args + RA0->encoding());
+    if (_num_int_args == 1) {
+      assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
+      __ addi_d(reg, from(), Interpreter::local_offset_in_bytes(offset()));
+    } else {
+      __ ld_d(reg, from(), Interpreter::local_offset_in_bytes(offset()));
+      __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset()));
+      __ maskeqz(reg, AT, reg);
+    }
+  } else {
+    __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
+    __ addi_d(AT, from(), Interpreter::local_offset_in_bytes(offset()));
+    __ maskeqz(temp(), AT, temp());
+    __ st_d(temp(), to(), _stack_offset);
+    _stack_offset += wordSize;
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
+  if (_num_fp_args < Argument::n_float_register_parameters) {
+    __ fld_s(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset()));
+  } else if (_num_int_args < Argument::n_register_parameters - 1) {
+    __ ld_w(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset()));
+  } else {
+    __ ld_w(AT, from(), Interpreter::local_offset_in_bytes(offset()));
+    __ st_w(AT, to(), _stack_offset);
+    _stack_offset += wordSize;
+  }
+}
+
+// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
+  if (_num_fp_args < Argument::n_float_register_parameters) {
+    __ fld_d(as_FloatRegister(_num_fp_args++), from(), Interpreter::local_offset_in_bytes(offset() + 1));
+  } else if (_num_int_args < Argument::n_register_parameters - 1) {
+    __ ld_d(as_Register(++_num_int_args + RA0->encoding()), from(), Interpreter::local_offset_in_bytes(offset() + 1));
+  } else {
+    __ ld_d(AT, from(), Interpreter::local_offset_in_bytes(offset() + 1));
+    __ st_d(AT, to(), _stack_offset);
+    _stack_offset += wordSize;
+  }
+}
+
+
+Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
+Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
+Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
+
+// Implementation of SignatureHandlerLibrary
+
+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
+
+
+class SlowSignatureHandler
+  : public NativeSignatureIterator {
+ private:
+  address   _from;
+  intptr_t* _to;
+  intptr_t* _int_args;
+  intptr_t* _fp_args;
+  intptr_t* _fp_identifiers;
+  unsigned int _num_int_args;
+  unsigned int _num_fp_args;
+
+  virtual void pass_int()
+  {
+    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+    _from -= Interpreter::stackElementSize;
+
+    if (_num_int_args < Argument::n_register_parameters - 1) {
+      *_int_args++ = from_obj;
+      _num_int_args++;
+    } else {
+      *_to++ = from_obj;
+    }
+  }
+
+  virtual void pass_long()
+  {
+    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+    _from -= 2 * Interpreter::stackElementSize;
+
+    if (_num_int_args < Argument::n_register_parameters - 1) {
+      *_int_args++ = from_obj;
+      _num_int_args++;
+    } else {
+      *_to++ = from_obj;
+    }
+  }
+
+  virtual void pass_object()
+  {
+    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
+    _from -= Interpreter::stackElementSize;
+
+    if (_num_int_args < Argument::n_register_parameters - 1) {
+      *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
+      _num_int_args++;
+    } else {
+      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
+    }
+  }
+
+  virtual void pass_float()
+  {
+    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+    _from -= Interpreter::stackElementSize;
+
+    if (_num_fp_args < Argument::n_float_register_parameters) {
+      *_fp_args++ = from_obj;
+      _num_fp_args++;
+    } else if (_num_int_args < Argument::n_register_parameters - 1) {
+      *_int_args++ = from_obj;
+      _num_int_args++;
+    } else {
+      *_to++ = from_obj;
+    }
+  }
+
+  virtual void pass_double()
+  {
+    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+    _from -= 2*Interpreter::stackElementSize;
+
+    if (_num_fp_args < Argument::n_float_register_parameters) {
+      *_fp_args++ = from_obj;
+      *_fp_identifiers |= (1 << _num_fp_args); // mark as double
+      _num_fp_args++;
+    } else if (_num_int_args < Argument::n_register_parameters - 1) {
+      *_int_args++ = from_obj;
+      _num_int_args++;
+    } else {
+      *_to++ = from_obj;
+    }
+  }
+
+ public:
+  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
+    : NativeSignatureIterator(method)
+  {
+    _from = from;
+    _to   = to;
+
+    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
+    _int_args = to - (method->is_static() ? 15 : 16);
+    _fp_args =  to - 8;
+    _fp_identifiers = to - 9;
+    *(int*) _fp_identifiers = 0;
+    _num_int_args = (method->is_static() ? 1 : 0);
+    _num_fp_args = 0;
+  }
+};
+
+
+IRT_ENTRY(address,
+          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
+                                                     Method* method,
+                                                     intptr_t* from,
+                                                     intptr_t* to))
+  methodHandle m(thread, (Method*)method);
+  assert(m->is_native(), "sanity check");
+
+  // handle arguments
+  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
+
+  // return result handler
+  return Interpreter::result_handler(m->result_type());
+IRT_END
diff --git a/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp
new file mode 100644
index 00000000000..c83afbdaf03
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch.hpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP
+
+ public:
+
+  // Sentinel placed in the code for interpreter returns so
+  // that i2c adapters and osr code can recognize an interpreter
+  // return address and convert the return to a specialized
+  // block of code to handle compiedl return values and cleaning
+  // the fpu stack.
+  static const int return_sentinel;
+
+  static Address::ScaleFactor stackElementScale() {
+    return Address::times_8;
+  }
+
+  // Offset from sp (which points to the last stack element)
+  static int expr_offset_in_bytes(int i) { return stackElementSize * i; }
+  // Size of interpreter code.  Increase if too small.  Interpreter will
+  // fail with a guarantee ("not enough space for interpreter generation");
+  // if too small.
+  // Run with +PrintInterpreterSize to get the VM to print out the size.
+  // Max size with JVMTI and TaggedStackInterpreter
+  const static int InterpreterCodeSize = 168 * 1024;
+#endif // CPU_LOONGARCH_VM_INTERPRETER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp
new file mode 100644
index 00000000000..5a4f102cfd0
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/interpreter_loongarch_64.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterGenerator.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+address AbstractInterpreterGenerator::generate_slow_signature_handler() {
+  address entry = __ pc();
+  // Rmethod: method
+  // LVP: pointer to locals
+  // A3: first stack arg
+  __ move(A3, SP);
+  __ addi_d(SP, SP, -18 * wordSize);
+  __ st_d(RA, SP, 0);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::slow_signature_handler),
+             Rmethod, LVP, A3);
+
+  // V0: result handler
+
+  // Stack layout:
+  //        ...
+  //     18 stack arg0   <--- old sp
+  //     17 floatReg arg7
+  //        ...
+  //     10 floatReg arg0
+  //      9 float/double identifiers
+  //      8 IntReg arg7
+  //        ...
+  //      2 IntReg arg1
+  //      1 aligned slot
+  // SP:  0 return address
+
+  // Do FP first so we can use A3 as temp
+  __ ld_d(A3, Address(SP, 9 * wordSize)); // float/double identifiers
+
+  for (int i= 0; i < Argument::n_float_register_parameters; i++) {
+    FloatRegister floatreg = as_FloatRegister(i + FA0->encoding());
+    Label isdouble, done;
+
+    __ andi(AT, A3, 1 << i);
+    __ bnez(AT, isdouble);
+    __ fld_s(floatreg, SP, (10 + i) * wordSize);
+    __ b(done);
+    __ bind(isdouble);
+    __ fld_d(floatreg, SP, (10 + i) * wordSize);
+    __ bind(done);
+  }
+
+  // A0 is for env.
+  // If the mothed is not static, A1 will be corrected in generate_native_entry.
+  for (int i= 1; i < Argument::n_register_parameters; i++) {
+    Register reg = as_Register(i + A0->encoding());
+
+    __ ld_d(reg, SP, (1 + i) * wordSize);
+  }
+
+  // A0/V0 contains the result from the call of
+  // InterpreterRuntime::slow_signature_handler so we don't touch it
+  // here.  It will be loaded with the JNIEnv* later.
+  __ ld_d(RA, SP, 0);
+  __ addi_d(SP, SP, 18 * wordSize);
+  __ jr(RA);
+  return entry;
+}
+
+
+//
+// Various method entries
+//
+
+address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+
+  // Rmethod: methodOop
+  // V0: scratrch
+  // Rsender: send 's sp
+
+  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
+
+  address entry_point = __ pc();
+  //guarantee(0, "LA not implemented yet");
+  // These don't need a safepoint check because they aren't virtually
+  // callable. We won't enter these intrinsics from compiled code.
+  // If in the future we added an intrinsic which was virtually callable
+  // we'd have to worry about how to safepoint so that this code is used.
+
+  // mathematical functions inlined by compiler
+  // (interpreter must provide identical implementation
+  // in order to avoid monotonicity bugs when switching
+  // from interpreter to compiler in the middle of some
+  // computation)
+  //
+  // stack: [ lo(arg) ] <-- sp
+  //        [ hi(arg) ]
+  {
+    // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are
+    //       java methods.  Interpreter::method_kind(...) will select
+    //       this entry point for the corresponding methods in JDK 1.3.
+    __ fld_d(FA0, SP, 0 * wordSize);
+    __ fld_d(FA1, SP, 1 * wordSize);
+    __ push2(RA, FP);
+    __ addi_d(FP, SP, 2 * wordSize);
+
+    // [ fp     ] <-- sp
+    // [ ra     ]
+    // [ lo     ] <-- fp
+    // [ hi     ]
+    //FIXME, need consider this
+    switch (kind) {
+      case Interpreter::java_lang_math_sin :
+        __ trigfunc('s');
+        break;
+      case Interpreter::java_lang_math_cos :
+        __ trigfunc('c');
+        break;
+      case Interpreter::java_lang_math_tan :
+        __ trigfunc('t');
+        break;
+      case Interpreter::java_lang_math_sqrt:
+        __ fsqrt_d(F0, FA0);
+        break;
+      case Interpreter::java_lang_math_abs:
+        __ fabs_d(F0, FA0);
+        break;
+      case Interpreter::java_lang_math_log:
+        // Store to stack to convert 80bit precision back to 64bits
+        break;
+      case Interpreter::java_lang_math_log10:
+        // Store to stack to convert 80bit precision back to 64bits
+        break;
+      case Interpreter::java_lang_math_pow:
+        break;
+      case Interpreter::java_lang_math_exp:
+        break;
+
+      default                              :
+        ShouldNotReachHere();
+    }
+
+    // must maintain return value in F0:F1
+    __ ld_d(RA, FP, (-1) * wordSize);
+    //FIXME
+    __ ld_d(FP, FP, (-2) * wordSize);
+    __ move(SP, Rsender);
+    __ jr(RA);
+  }
+  return entry_point;
+}
+
+
+// Abstract method entry
+// Attempt to execute abstract method. Throw exception
+address InterpreterGenerator::generate_abstract_entry(void) {
+
+  // Rmethod: methodOop
+  // V0: receiver (unused)
+  // Rsender : sender 's sp
+  address entry_point = __ pc();
+
+  // abstract method entry
+  // throw exception
+  // adjust stack to what a normal return would do
+  __ empty_expression_stack();
+  __ restore_bcp();
+  __ restore_locals();
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  return entry_point;
+}
+
+
+// Empty method, generate a very fast return.
+
+address InterpreterGenerator::generate_empty_entry(void) {
+
+  // Rmethod: methodOop
+  // V0: receiver (unused)
+  // Rsender: sender 's sp, must set sp to this value on return, on LoongArch, now use T0, as it right?
+  if (!UseFastEmptyMethods) return NULL;
+
+  address entry_point = __ pc();
+  //TODO: LA
+  //guarantee(0, "LA not implemented yet");
+  Label slow_path;
+  __ li(RT0, SafepointSynchronize::address_of_state());
+  __ ld_w(AT, RT0, 0);
+  __ li(RT0, (SafepointSynchronize::_not_synchronized));
+  __ bne(AT, RT0,slow_path);
+  __ move(SP, Rsender);
+  __ jr(RA);
+  __ bind(slow_path);
+  (void) generate_normal_entry(false);
+  return entry_point;
+
+}
+
+void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
+
+  // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
+  // the days we had adapter frames. When we deoptimize a situation where a
+  // compiled caller calls a compiled caller will have registers it expects
+  // to survive the call to the callee. If we deoptimize the callee the only
+  // way we can restore these registers is to have the oldest interpreter
+  // frame that we create restore these values. That is what this routine
+  // will accomplish.
+
+  // At the moment we have modified c2 to not have any callee save registers
+  // so this problem does not exist and this routine is just a place holder.
+
+  assert(f->is_interpreted_frame(), "must be interpreted");
+}
diff --git a/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp
new file mode 100644
index 00000000000..de97de58044
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/javaFrameAnchor_loongarch.hpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP
+
+private:
+
+  // FP value associated with _last_Java_sp:
+  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
+
+public:
+  // Each arch must define reset, save, restore
+  // These are used by objects that only care about:
+  //  1 - initializing a new state (thread creation, javaCalls)
+  //  2 - saving a current state (javaCalls)
+  //  3 - restoring an old state (javaCalls)
+
+  void clear(void) {
+    // clearing _last_Java_sp must be first
+    _last_Java_sp = NULL;
+    // fence?
+    _last_Java_fp = NULL;
+    _last_Java_pc = NULL;
+  }
+
+  void copy(JavaFrameAnchor* src) {
+    // In order to make sure the transition state is valid for "this"
+    // We must clear _last_Java_sp before copying the rest of the new data
+    //
+    // Hack Alert: Temporary bugfix for 4717480/4721647
+    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
+    // unless the value is changing
+    //
+    if (_last_Java_sp != src->_last_Java_sp)
+      _last_Java_sp = NULL;
+
+    _last_Java_fp = src->_last_Java_fp;
+    _last_Java_pc = src->_last_Java_pc;
+    // Must be last so profiler will always see valid frame if has_last_frame() is true
+    _last_Java_sp = src->_last_Java_sp;
+  }
+
+  // Always walkable
+  bool walkable(void) { return true; }
+  // Never any thing to do since we are always walkable and can find address of return addresses
+  void make_walkable(JavaThread* thread) { }
+
+  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
+
+  address last_Java_pc(void)                     { return _last_Java_pc; }
+
+private:
+
+  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
+
+public:
+
+  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
+
+  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
+  // Assert (last_Java_sp == NULL || fp == NULL)
+  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
+
+#endif // CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp
new file mode 100644
index 00000000000..5b52e54e080
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/jniFastGetField_loongarch_64.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/codeBlob.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+#include "runtime/safepoint.hpp"
+
+#define __ masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+#define BUFFER_SIZE 30*wordSize
+
+// Instead of issuing lfence for LoadLoad barrier, we create data dependency
+// between loads, which is more efficient than lfence.
+
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+  const char *name = NULL;
+  switch (type) {
+    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
+    case T_BYTE:    name = "jni_fast_GetByteField";    break;
+    case T_CHAR:    name = "jni_fast_GetCharField";    break;
+    case T_SHORT:   name = "jni_fast_GetShortField";   break;
+    case T_INT:     name = "jni_fast_GetIntField";     break;
+    case T_LONG:    name = "jni_fast_GetLongField";    break;
+    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
+    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
+    default:        ShouldNotReachHere();
+  }
+  ResourceMark rm;
+  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
+  CodeBuffer cbuf(blob);
+  MacroAssembler* masm = new MacroAssembler(&cbuf);
+  address fast_entry = __ pc();
+  Label slow;
+
+  //  return pc        RA
+  //  jni env          A0
+  //  obj              A1
+  //  jfieldID         A2
+
+  address counter_addr = SafepointSynchronize::safepoint_counter_addr();
+  __ li(AT, (long)counter_addr);
+  __ ld_w(T1, AT, 0);
+
+  // Parameters(A0~A3) should not be modified, since they will be used in slow path
+  __ andi(AT, T1, 1);
+  __ bne(AT, R0, slow);
+
+  __ move(T0, A1);
+  __ clear_jweak_tag(T0);
+
+  __ ld_d(T0, T0, 0);              // unbox, *obj
+  __ srli_d(T2, A2, 2);                 // offset
+  __ add_d(T0, T0, T2);
+
+  __ li(AT, (long)counter_addr);
+  __ ld_w(AT, AT, 0);
+  __ bne(T1, AT, slow);
+
+  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
+  speculative_load_pclist[count] = __ pc();
+  switch (type) {
+    case T_BOOLEAN: __ ld_bu (V0, T0, 0); break;
+    case T_BYTE:    __ ld_b  (V0, T0, 0); break;
+    case T_CHAR:    __ ld_hu (V0, T0, 0); break;
+    case T_SHORT:   __ ld_h  (V0, T0, 0); break;
+    case T_INT:     __ ld_w  (V0, T0, 0); break;
+    case T_LONG:    __ ld_d  (V0, T0, 0); break;
+    case T_FLOAT:   __ fld_s (F0, T0, 0); break;
+    case T_DOUBLE:  __ fld_d (F0, T0, 0); break;
+    default:        ShouldNotReachHere();
+  }
+
+  __ jr(RA);
+
+  slowcase_entry_pclist[count++] = __ pc();
+  __ bind (slow);
+  address slow_case_addr = NULL;
+  switch (type) {
+    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
+    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
+    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
+    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
+    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
+    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
+    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
+    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
+    default:        ShouldNotReachHere();
+  }
+  __ jmp(slow_case_addr);
+
+  __ flush ();
+  return fast_entry;
+}
+
+address JNI_FastGetField::generate_fast_get_boolean_field() {
+  return generate_fast_get_int_field0(T_BOOLEAN);
+}
+
+address JNI_FastGetField::generate_fast_get_byte_field() {
+  return generate_fast_get_int_field0(T_BYTE);
+}
+
+address JNI_FastGetField::generate_fast_get_char_field() {
+  return generate_fast_get_int_field0(T_CHAR);
+}
+
+address JNI_FastGetField::generate_fast_get_short_field() {
+  return generate_fast_get_int_field0(T_SHORT);
+}
+
+address JNI_FastGetField::generate_fast_get_int_field() {
+  return generate_fast_get_int_field0(T_INT);
+}
+
+address JNI_FastGetField::generate_fast_get_long_field() {
+  return generate_fast_get_int_field0(T_LONG);
+}
+
+address JNI_FastGetField::generate_fast_get_float_field() {
+  return generate_fast_get_int_field0(T_FLOAT);
+}
+
+address JNI_FastGetField::generate_fast_get_double_field() {
+  return generate_fast_get_int_field0(T_DOUBLE);
+}
diff --git a/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp
new file mode 100644
index 00000000000..554ff216ac2
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/jniTypes_loongarch.hpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+#include "prims/jni.h"
+
+// This file holds platform-dependent routines used to write primitive jni
+// types to the array of arguments passed into JavaCalls::call
+
+class JNITypes : AllStatic {
+  // These functions write a java primitive type (in native format)
+  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
+  // I.e., they are functionally 'push' operations if they have a 'pos'
+  // formal parameter.  Note that jlong's and jdouble's are written
+  // _in reverse_ of the order in which they appear in the interpreter
+  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
+  // reverse the argument list constructed by JavaCallArguments (see
+  // javaCalls.hpp).
+
+private:
+
+  // 32bit Helper routines.
+  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
+                                                                        *(jint *)(to  ) = from[0]; }
+  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
+
+public:
+  // In LoongArch64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
+  //   is 8 bytes.
+  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
+  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
+  // This error occurs in ReflectInvoke.java
+  // The parameter of DD(int) should be 4 instead of 0x550000004.
+  //
+  // See: [runtime/javaCalls.hpp]
+
+  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
+  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
+  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
+
+  // Longs are stored in native format in one JavaCallArgument slot at
+  // *(to).
+  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
+  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
+  static inline void put_long(jlong  from, intptr_t *to) {
+    *(jlong*) (to + 1) = from;
+    *(jlong*) (to) = from;
+  }
+
+  // A long parameter occupies two slot.
+  // It must fit the layout rule in methodHandle.
+  //
+  // See: [runtime/reflection.cpp] Reflection::invoke()
+  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
+
+  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = from;
+    *(jlong*) (to + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = *from;
+    *(jlong*) (to + pos) = *from;
+    pos += 2;
+  }
+
+  // Oops are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
+  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
+  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
+
+  // Floats are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
+  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
+  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
+
+#undef _JNI_SLOT_OFFSET
+#define _JNI_SLOT_OFFSET 0
+
+  // Longs are stored in native format in one JavaCallArgument slot at
+  // *(to).
+  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
+  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
+  static inline void put_double(jdouble  from, intptr_t *to) {
+    *(jdouble*) (to + 1) = from;
+    *(jdouble*) (to) = from;
+  }
+
+  // A long parameter occupies two slot.
+  // It must fit the layout rule in methodHandle.
+  //
+  // See: [runtime/reflection.cpp] Reflection::invoke()
+  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
+
+  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = from;
+    *(jdouble*) (to + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = *from;
+    *(jdouble*) (to + pos) = *from;
+    pos += 2;
+  }
+
+  // The get_xxx routines, on the other hand, actually _do_ fetch
+  // java primitive types from the interpreter stack.
+  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
+  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
+  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
+  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
+  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
+#undef _JNI_SLOT_OFFSET
+};
+
+#endif // CPU_LOONGARCH_VM_JNITYPES_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/jni_loongarch.h b/hotspot/src/cpu/loongarch/vm/jni_loongarch.h
new file mode 100644
index 00000000000..eb25cbc3546
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/jni_loongarch.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef _JAVASOFT_JNI_MD_H_
+#define _JAVASOFT_JNI_MD_H_
+
+// Note: please do not change these without also changing jni_md.h in the JDK
+// repository
+#ifndef __has_attribute
+  #define __has_attribute(x) 0
+#endif
+#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility)
+  #define JNIEXPORT     __attribute__((visibility("default")))
+  #define JNIIMPORT     __attribute__((visibility("default")))
+#else
+  #define JNIEXPORT
+  #define JNIIMPORT
+#endif
+
+#define JNICALL
+
+typedef int jint;
+
+  typedef long jlong;
+
+typedef signed char jbyte;
+
+#endif
diff --git a/hotspot/src/cpu/loongarch/vm/loongarch.ad b/hotspot/src/cpu/loongarch/vm/loongarch.ad
new file mode 100644
index 00000000000..48c44779e71
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/loongarch.ad
@@ -0,0 +1,24 @@
+//
+// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
diff --git a/hotspot/src/cpu/loongarch/vm/loongarch_64.ad b/hotspot/src/cpu/loongarch/vm/loongarch_64.ad
new file mode 100644
index 00000000000..fa4bf6e1703
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/loongarch_64.ad
@@ -0,0 +1,12861 @@
+//
+// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// GodSon3 Architecture Description File
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+
+// format:
+// reg_def name (call convention, c-call convention, ideal type, encoding);
+//     call convention :
+//      NS  = No-Save
+//      SOC = Save-On-Call
+//      SOE = Save-On-Entry
+//      AS  = Always-Save
+//    ideal type :
+//      see opto/opcodes.hpp for more info
+// reg_class name (reg, ...);
+// alloc_class name (reg, ...);
+register %{
+
+// General Registers
+// Integer Registers
+  reg_def R0    ( NS,  NS,  Op_RegI,   0, VMRegImpl::Bad());
+  reg_def RA    ( NS,  NS,  Op_RegI,   1, RA->as_VMReg());
+  reg_def RA_H  ( NS,  NS,  Op_RegI,   1, RA->as_VMReg()->next());
+  // TODO: LA
+  reg_def TP    ( NS,  NS,  Op_RegI,   2, TP->as_VMReg());
+  reg_def TP_H  ( NS,  NS,  Op_RegI,   2, TP->as_VMReg()->next());
+  reg_def SP    ( NS,  NS,  Op_RegI,   3, SP->as_VMReg());
+  reg_def SP_H  ( NS,  NS,  Op_RegI,   3, SP->as_VMReg()->next());
+  reg_def A0    (SOC, SOC,  Op_RegI,   4, A0->as_VMReg());
+  reg_def A0_H  (SOC, SOC,  Op_RegI,   4, A0->as_VMReg()->next());
+  reg_def A1    (SOC, SOC,  Op_RegI,   5, A1->as_VMReg());
+  reg_def A1_H  (SOC, SOC,  Op_RegI,   5, A1->as_VMReg()->next());
+  reg_def A2    (SOC, SOC,  Op_RegI,   6, A2->as_VMReg());
+  reg_def A2_H  (SOC, SOC,  Op_RegI,   6, A2->as_VMReg()->next());
+  reg_def A3    (SOC, SOC,  Op_RegI,   7, A3->as_VMReg());
+  reg_def A3_H  (SOC, SOC,  Op_RegI,   7, A3->as_VMReg()->next());
+  reg_def A4    (SOC, SOC,  Op_RegI,   8, A4->as_VMReg());
+  reg_def A4_H  (SOC, SOC,  Op_RegI,   8, A4->as_VMReg()->next());
+  reg_def A5    (SOC, SOC,  Op_RegI,   9, A5->as_VMReg());
+  reg_def A5_H  (SOC, SOC,  Op_RegI,   9, A5->as_VMReg()->next());
+  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
+  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
+  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
+  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
+  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
+  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
+  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
+  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
+  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
+  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
+  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
+  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
+  reg_def T4    (SOC, SOC,  Op_RegI,  16, T4->as_VMReg());
+  reg_def T4_H  (SOC, SOC,  Op_RegI,  16, T4->as_VMReg()->next());
+  reg_def T5    (SOC, SOC,  Op_RegI,  17, T5->as_VMReg());
+  reg_def T5_H  (SOC, SOC,  Op_RegI,  17, T5->as_VMReg()->next());
+  reg_def T6    (SOC, SOC,  Op_RegI,  18, T6->as_VMReg());
+  reg_def T6_H  (SOC, SOC,  Op_RegI,  18, T6->as_VMReg()->next());
+  reg_def T7    (SOC, SOC,  Op_RegI,  19, T7->as_VMReg());
+  reg_def T7_H  (SOC, SOC,  Op_RegI,  19, T7->as_VMReg()->next());
+  reg_def T8    (SOC, SOC,  Op_RegI,  20, T8->as_VMReg());
+  reg_def T8_H  (SOC, SOC,  Op_RegI,  20, T8->as_VMReg()->next());
+  reg_def RX    ( NS,  NS,  Op_RegI,  21, RX->as_VMReg());
+  reg_def RX_H  ( NS,  NS,  Op_RegI,  21, RX->as_VMReg()->next());
+  reg_def FP    ( NS,  NS,  Op_RegI,  22, FP->as_VMReg());
+  reg_def FP_H  ( NS,  NS,  Op_RegI,  22, FP->as_VMReg()->next());
+  reg_def S0    (SOC, SOE,  Op_RegI,  23, S0->as_VMReg());
+  reg_def S0_H  (SOC, SOE,  Op_RegI,  23, S0->as_VMReg()->next());
+  reg_def S1    (SOC, SOE,  Op_RegI,  24, S1->as_VMReg());
+  reg_def S1_H  (SOC, SOE,  Op_RegI,  24, S1->as_VMReg()->next());
+  reg_def S2    (SOC, SOE,  Op_RegI,  25, S2->as_VMReg());
+  reg_def S2_H  (SOC, SOE,  Op_RegI,  25, S2->as_VMReg()->next());
+  reg_def S3    (SOC, SOE,  Op_RegI,  26, S3->as_VMReg());
+  reg_def S3_H  (SOC, SOE,  Op_RegI,  26, S3->as_VMReg()->next());
+  reg_def S4    (SOC, SOE,  Op_RegI,  27, S4->as_VMReg());
+  reg_def S4_H  (SOC, SOE,  Op_RegI,  27, S4->as_VMReg()->next());
+  reg_def S5    (SOC, SOE,  Op_RegI,  28, S5->as_VMReg());
+  reg_def S5_H  (SOC, SOE,  Op_RegI,  28, S5->as_VMReg()->next());
+  reg_def S6    (SOC, SOE,  Op_RegI,  29, S6->as_VMReg());
+  reg_def S6_H  (SOC, SOE,  Op_RegI,  29, S6->as_VMReg()->next());
+  reg_def S7    (SOC, SOE,  Op_RegI,  30, S7->as_VMReg());
+  reg_def S7_H  (SOC, SOE,  Op_RegI,  30, S7->as_VMReg()->next());
+  // TODO: LA
+  reg_def S8    ( NS,  NS,  Op_RegI,  31, S8->as_VMReg());
+  reg_def S8_H  ( NS,  NS,  Op_RegI,  31, S8->as_VMReg()->next());
+
+
+// Floating/Vector registers.
+reg_def F0          ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()          );
+reg_def F0_H        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()  );
+reg_def F0_J        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) );
+reg_def F0_K        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) );
+reg_def F0_L        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) );
+reg_def F0_M        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) );
+reg_def F0_N        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) );
+reg_def F0_O        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) );
+
+reg_def F1          ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()          );
+reg_def F1_H        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()  );
+reg_def F1_J        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) );
+reg_def F1_K        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) );
+reg_def F1_L        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) );
+reg_def F1_M        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) );
+reg_def F1_N        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) );
+reg_def F1_O        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) );
+
+reg_def F2          ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()          );
+reg_def F2_H        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()  );
+reg_def F2_J        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) );
+reg_def F2_K        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) );
+reg_def F2_L        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) );
+reg_def F2_M        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) );
+reg_def F2_N        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) );
+reg_def F2_O        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) );
+
+reg_def F3          ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()          );
+reg_def F3_H        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()  );
+reg_def F3_J        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) );
+reg_def F3_K        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) );
+reg_def F3_L        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) );
+reg_def F3_M        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) );
+reg_def F3_N        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) );
+reg_def F3_O        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) );
+
+reg_def F4          ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()          );
+reg_def F4_H        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()  );
+reg_def F4_J        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) );
+reg_def F4_K        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) );
+reg_def F4_L        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) );
+reg_def F4_M        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) );
+reg_def F4_N        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) );
+reg_def F4_O        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) );
+
+reg_def F5          ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()          );
+reg_def F5_H        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()  );
+reg_def F5_J        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) );
+reg_def F5_K        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) );
+reg_def F5_L        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) );
+reg_def F5_M        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) );
+reg_def F5_N        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) );
+reg_def F5_O        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) );
+
+reg_def F6          ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()          );
+reg_def F6_H        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()  );
+reg_def F6_J        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) );
+reg_def F6_K        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) );
+reg_def F6_L        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) );
+reg_def F6_M        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) );
+reg_def F6_N        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) );
+reg_def F6_O        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) );
+
+reg_def F7          ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()          );
+reg_def F7_H        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()  );
+reg_def F7_J        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) );
+reg_def F7_K        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) );
+reg_def F7_L        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) );
+reg_def F7_M        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) );
+reg_def F7_N        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) );
+reg_def F7_O        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) );
+
+reg_def F8          ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()          );
+reg_def F8_H        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()  );
+reg_def F8_J        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) );
+reg_def F8_K        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) );
+reg_def F8_L        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) );
+reg_def F8_M        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) );
+reg_def F8_N        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) );
+reg_def F8_O        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) );
+
+reg_def F9          ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()          );
+reg_def F9_H        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()  );
+reg_def F9_J        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) );
+reg_def F9_K        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) );
+reg_def F9_L        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) );
+reg_def F9_M        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) );
+reg_def F9_N        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) );
+reg_def F9_O        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) );
+
+reg_def F10          ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()          );
+reg_def F10_H        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()  );
+reg_def F10_J        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) );
+reg_def F10_K        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) );
+reg_def F10_L        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) );
+reg_def F10_M        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) );
+reg_def F10_N        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) );
+reg_def F10_O        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) );
+
+reg_def F11          ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()          );
+reg_def F11_H        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()  );
+reg_def F11_J        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) );
+reg_def F11_K        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) );
+reg_def F11_L        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) );
+reg_def F11_M        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) );
+reg_def F11_N        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) );
+reg_def F11_O        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) );
+
+reg_def F12          ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()          );
+reg_def F12_H        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()  );
+reg_def F12_J        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) );
+reg_def F12_K        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) );
+reg_def F12_L        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) );
+reg_def F12_M        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) );
+reg_def F12_N        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) );
+reg_def F12_O        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) );
+
+reg_def F13          ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()          );
+reg_def F13_H        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()  );
+reg_def F13_J        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) );
+reg_def F13_K        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) );
+reg_def F13_L        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) );
+reg_def F13_M        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) );
+reg_def F13_N        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) );
+reg_def F13_O        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) );
+
+reg_def F14          ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()          );
+reg_def F14_H        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()  );
+reg_def F14_J        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) );
+reg_def F14_K        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) );
+reg_def F14_L        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) );
+reg_def F14_M        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) );
+reg_def F14_N        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) );
+reg_def F14_O        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) );
+
+reg_def F15          ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()          );
+reg_def F15_H        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()  );
+reg_def F15_J        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) );
+reg_def F15_K        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) );
+reg_def F15_L        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) );
+reg_def F15_M        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) );
+reg_def F15_N        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) );
+reg_def F15_O        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) );
+
+reg_def F16          ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()          );
+reg_def F16_H        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()  );
+reg_def F16_J        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) );
+reg_def F16_K        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) );
+reg_def F16_L        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) );
+reg_def F16_M        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) );
+reg_def F16_N        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) );
+reg_def F16_O        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) );
+
+reg_def F17          ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()          );
+reg_def F17_H        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()  );
+reg_def F17_J        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) );
+reg_def F17_K        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) );
+reg_def F17_L        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) );
+reg_def F17_M        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) );
+reg_def F17_N        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) );
+reg_def F17_O        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) );
+
+reg_def F18          ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()          );
+reg_def F18_H        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()  );
+reg_def F18_J        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) );
+reg_def F18_K        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) );
+reg_def F18_L        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) );
+reg_def F18_M        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) );
+reg_def F18_N        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) );
+reg_def F18_O        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) );
+
+reg_def F19          ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()          );
+reg_def F19_H        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()  );
+reg_def F19_J        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) );
+reg_def F19_K        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) );
+reg_def F19_L        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) );
+reg_def F19_M        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) );
+reg_def F19_N        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) );
+reg_def F19_O        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) );
+
+reg_def F20          ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()          );
+reg_def F20_H        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()  );
+reg_def F20_J        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) );
+reg_def F20_K        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) );
+reg_def F20_L        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) );
+reg_def F20_M        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) );
+reg_def F20_N        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) );
+reg_def F20_O        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) );
+
+reg_def F21          ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()          );
+reg_def F21_H        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()  );
+reg_def F21_J        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) );
+reg_def F21_K        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) );
+reg_def F21_L        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) );
+reg_def F21_M        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) );
+reg_def F21_N        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) );
+reg_def F21_O        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) );
+
+reg_def F22          ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()          );
+reg_def F22_H        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()  );
+reg_def F22_J        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) );
+reg_def F22_K        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) );
+reg_def F22_L        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) );
+reg_def F22_M        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) );
+reg_def F22_N        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) );
+reg_def F22_O        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) );
+
+reg_def F23          ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()          );
+reg_def F23_H        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()  );
+reg_def F23_J        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) );
+reg_def F23_K        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) );
+reg_def F23_L        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) );
+reg_def F23_M        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) );
+reg_def F23_N        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) );
+reg_def F23_O        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) );
+
+reg_def F24          ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()          );
+reg_def F24_H        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next()  );
+reg_def F24_J        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) );
+reg_def F24_K        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) );
+reg_def F24_L        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) );
+reg_def F24_M        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) );
+reg_def F24_N        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) );
+reg_def F24_O        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) );
+
+reg_def F25          ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()          );
+reg_def F25_H        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next()  );
+reg_def F25_J        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) );
+reg_def F25_K        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) );
+reg_def F25_L        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) );
+reg_def F25_M        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) );
+reg_def F25_N        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) );
+reg_def F25_O        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) );
+
+reg_def F26          ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()          );
+reg_def F26_H        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next()  );
+reg_def F26_J        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) );
+reg_def F26_K        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) );
+reg_def F26_L        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) );
+reg_def F26_M        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) );
+reg_def F26_N        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) );
+reg_def F26_O        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) );
+
+reg_def F27          ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()          );
+reg_def F27_H        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next()  );
+reg_def F27_J        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) );
+reg_def F27_K        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) );
+reg_def F27_L        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) );
+reg_def F27_M        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) );
+reg_def F27_N        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) );
+reg_def F27_O        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) );
+
+reg_def F28          ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()          );
+reg_def F28_H        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next()  );
+reg_def F28_J        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) );
+reg_def F28_K        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) );
+reg_def F28_L        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) );
+reg_def F28_M        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) );
+reg_def F28_N        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) );
+reg_def F28_O        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) );
+
+reg_def F29          ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()          );
+reg_def F29_H        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next()  );
+reg_def F29_J        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) );
+reg_def F29_K        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) );
+reg_def F29_L        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) );
+reg_def F29_M        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) );
+reg_def F29_N        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) );
+reg_def F29_O        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) );
+
+reg_def F30          ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()          );
+reg_def F30_H        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next()  );
+reg_def F30_J        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) );
+reg_def F30_K        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) );
+reg_def F30_L        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) );
+reg_def F30_M        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) );
+reg_def F30_N        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) );
+reg_def F30_O        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) );
+
+reg_def F31          ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()          );
+reg_def F31_H        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next()  );
+reg_def F31_J        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) );
+reg_def F31_K        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) );
+reg_def F31_L        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) );
+reg_def F31_M        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) );
+reg_def F31_N        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) );
+reg_def F31_O        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) );
+
+
+// ----------------------------
+// Special Registers
+//S6 is used for get_thread(S6)
+//S5 is uesd for heapbase of compressed oop
+alloc_class chunk0(
+                     S7, S7_H,
+                     S0, S0_H,
+                     S1, S1_H,
+                     S2, S2_H,
+                     S4, S4_H,
+                     S5, S5_H,
+                     S6, S6_H,
+                     S3, S3_H,
+                     T2, T2_H,
+                     T3, T3_H,
+                     T8, T8_H,
+                     T4, T4_H,
+                     T1, T1_H, // inline_cache_reg
+                     T6, T6_H,
+                     A7, A7_H,
+                     A6, A6_H,
+                     A5, A5_H,
+                     A4, A4_H,
+                     T5, T5_H,
+                     A3, A3_H,
+                     A2, A2_H,
+                     A1, A1_H,
+                     A0, A0_H,
+                     T0, T0_H,
+                     S8, S8_H
+                     RA, RA_H,
+                     SP, SP_H, // stack_pointer
+                     FP, FP_H  // frame_pointer
+                 );
+
+// F23 is scratch reg
+alloc_class chunk1(  F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O,
+                     F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O,
+                     F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O,
+                     F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O,
+                     F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O,
+                     F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O,
+                     F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O,
+                     F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O,
+                     F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O,
+                     F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O,
+                     F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O,
+                     F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O,
+                     F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O,
+                     F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O,
+                     F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O,
+                     F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O,
+                     F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O,
+                     F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O,
+                     F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O,
+                     F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O,
+                     F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O,
+                     F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O,
+                     F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O,
+                     F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O,
+                     F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O,
+                     F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O,
+                     F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O,
+                     F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
+                     F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
+                     F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
+                     F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O);
+
+reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
+reg_class s0_reg( S0 );
+reg_class s1_reg( S1 );
+reg_class s2_reg( S2 );
+reg_class s3_reg( S3 );
+reg_class s4_reg( S4 );
+reg_class s5_reg( S5 );
+reg_class s6_reg( S6 );
+reg_class s7_reg( S7 );
+
+reg_class t_reg( T0, T1, T2, T3, T8, T4 );
+reg_class t0_reg( T0 );
+reg_class t1_reg( T1 );
+reg_class t2_reg( T2 );
+reg_class t3_reg( T3 );
+reg_class t8_reg( T8 );
+reg_class t4_reg( T4 );
+
+reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
+reg_class a0_reg( A0 );
+reg_class a1_reg( A1 );
+reg_class a2_reg( A2 );
+reg_class a3_reg( A3 );
+reg_class a4_reg( A4 );
+reg_class a5_reg( A5 );
+reg_class a6_reg( A6 );
+reg_class a7_reg( A7 );
+
+// TODO: LA
+//reg_class v0_reg( A0 );
+//reg_class v1_reg( A1 );
+
+reg_class sp_reg( SP, SP_H );
+reg_class fp_reg( FP, FP_H );
+
+reg_class v0_long_reg( A0, A0_H );
+reg_class v1_long_reg( A1, A1_H );
+reg_class a0_long_reg( A0, A0_H );
+reg_class a1_long_reg( A1, A1_H );
+reg_class a2_long_reg( A2, A2_H );
+reg_class a3_long_reg( A3, A3_H );
+reg_class a4_long_reg( A4, A4_H );
+reg_class a5_long_reg( A5, A5_H );
+reg_class a6_long_reg( A6, A6_H );
+reg_class a7_long_reg( A7, A7_H );
+reg_class t0_long_reg( T0, T0_H );
+reg_class t1_long_reg( T1, T1_H );
+reg_class t2_long_reg( T2, T2_H );
+reg_class t3_long_reg( T3, T3_H );
+reg_class t8_long_reg( T8, T8_H );
+reg_class t4_long_reg( T4, T4_H );
+reg_class s0_long_reg( S0, S0_H );
+reg_class s1_long_reg( S1, S1_H );
+reg_class s2_long_reg( S2, S2_H );
+reg_class s3_long_reg( S3, S3_H );
+reg_class s4_long_reg( S4, S4_H );
+reg_class s5_long_reg( S5, S5_H );
+reg_class s6_long_reg( S6, S6_H );
+reg_class s7_long_reg( S7, S7_H );
+
+reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 );
+
+reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 );
+
+reg_class p_reg(
+                 S7, S7_H,
+                 S0, S0_H,
+                 S1, S1_H,
+                 S2, S2_H,
+                 S4, S4_H,
+                 S3, S3_H,
+                 T8, T8_H,
+                 T2, T2_H,
+                 T3, T3_H,
+                 T1, T1_H,
+                 A7, A7_H,
+                 A6, A6_H,
+                 A5, A5_H,
+                 A4, A4_H,
+                 A3, A3_H,
+                 A2, A2_H,
+                 A1, A1_H,
+                 A0, A0_H,
+                 T0, T0_H
+               );
+
+reg_class no_T8_p_reg(
+                 S7, S7_H,
+                 S0, S0_H,
+                 S1, S1_H,
+                 S2, S2_H,
+                 S4, S4_H,
+                 S3, S3_H,
+                 T2, T2_H,
+                 T3, T3_H,
+                 T1, T1_H,
+                 A7, A7_H,
+                 A6, A6_H,
+                 A5, A5_H,
+                 A4, A4_H,
+                 A3, A3_H,
+                 A2, A2_H,
+                 A1, A1_H,
+                 A0, A0_H,
+                 T0, T0_H
+               );
+
+reg_class no_Ax_p_reg(
+                 S7, S7_H,
+                 S0, S0_H,
+                 S1, S1_H,
+                 S2, S2_H,
+                 S4, S4_H,
+                 S3, S3_H,
+                 T2, T2_H,
+                 T3, T3_H,
+                 T1, T1_H,
+                 T0, T0_H
+               );
+
+reg_class long_reg(
+                    S7, S7_H,
+                    S0, S0_H,
+                    S1, S1_H,
+                    S2, S2_H,
+                    S4, S4_H,
+                    S3, S3_H,
+                    T8, T8_H,
+                    T2, T2_H,
+                    T3, T3_H,
+                    T1, T1_H,
+                    A7, A7_H,
+                    A6, A6_H,
+                    A5, A5_H,
+                    A4, A4_H,
+                    A3, A3_H,
+                    A2, A2_H,
+                    A1, A1_H,
+                    A0, A0_H,
+                    T0, T0_H
+                  );
+
+
+// Floating point registers.
+// F31 are not used as temporary registers in D2I
+reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31);
+reg_class dbl_reg( F0, F0_H,
+                   F1, F1_H,
+                   F2, F2_H,
+                   F3, F3_H,
+                   F4, F4_H,
+                   F5, F5_H,
+                   F6, F6_H,
+                   F7, F7_H,
+                   F8, F8_H,
+                   F9, F9_H,
+                   F10, F10_H,
+                   F11, F11_H,
+                   F12, F12_H,
+                   F13, F13_H,
+                   F14, F14_H,
+                   F15, F15_H,
+                   F16, F16_H,
+                   F17, F17_H,
+                   F18, F18_H,
+                   F19, F19_H,
+                   F20, F20_H,
+                   F21, F21_H,
+                   F22, F22_H,
+                   F24, F24_H,
+                   F25, F25_H,
+                   F26, F26_H,
+                   F27, F27_H,
+                   F28, F28_H,
+                   F29, F29_H,
+                   F30, F30_H,
+                   F31, F31_H);
+
+// Class for all 128bit vector registers
+reg_class vectorx_reg(  F0, F0_H, F0_J, F0_K,
+                        F1, F1_H, F1_J, F1_K,
+                        F2, F2_H, F2_J, F2_K,
+                        F3, F3_H, F3_J, F3_K,
+                        F4, F4_H, F4_J, F4_K,
+                        F5, F5_H, F5_J, F5_K,
+                        F6, F6_H, F6_J, F6_K,
+                        F7, F7_H, F7_J, F7_K,
+                        F8, F8_H, F8_J, F8_K,
+                        F9, F9_H, F9_J, F9_K,
+                        F10, F10_H, F10_J, F10_K,
+                        F11, F11_H, F11_J, F11_K,
+                        F12, F12_H, F12_J, F12_K,
+                        F13, F13_H, F13_J, F13_K,
+                        F14, F14_H, F14_J, F14_K,
+                        F15, F15_H, F15_J, F15_K,
+                        F16, F16_H, F16_J, F16_K,
+                        F17, F17_H, F17_J, F17_K,
+                        F18, F18_H, F18_J, F18_K,
+                        F19, F19_H, F19_J, F19_K,
+                        F20, F20_H, F20_J, F20_K,
+                        F21, F21_H, F21_J, F21_K,
+                        F22, F22_H, F22_J, F22_K,
+                        F24, F24_H, F24_J, F24_K,
+                        F25, F25_H, F25_J, F25_K,
+                        F26, F26_H, F26_J, F26_K,
+                        F27, F27_H, F27_J, F27_K,
+                        F28, F28_H, F28_J, F28_K,
+                        F29, F29_H, F29_J, F29_K,
+                        F30, F30_H, F30_J, F30_K,
+                        F31, F31_H, F31_J, F31_K);
+
+// Class for all 256bit vector registers
+reg_class vectory_reg(  F0, F0_H, F0_J, F0_K, F0_L, F0_M, F0_N, F0_O,
+                        F1, F1_H, F1_J, F1_K, F1_L, F1_M, F1_N, F1_O,
+                        F2, F2_H, F2_J, F2_K, F2_L, F2_M, F2_N, F2_O,
+                        F3, F3_H, F3_J, F3_K, F3_L, F3_M, F3_N, F3_O,
+                        F4, F4_H, F4_J, F4_K, F4_L, F4_M, F4_N, F4_O,
+                        F5, F5_H, F5_J, F5_K, F5_L, F5_M, F5_N, F5_O,
+                        F6, F6_H, F6_J, F6_K, F6_L, F6_M, F6_N, F6_O,
+                        F7, F7_H, F7_J, F7_K, F7_L, F7_M, F7_N, F7_O,
+                        F8, F8_H, F8_J, F8_K, F8_L, F8_M, F8_N, F8_O,
+                        F9, F9_H, F9_J, F9_K, F9_L, F9_M, F9_N, F9_O,
+                        F10, F10_H, F10_J, F10_K, F10_L, F10_M, F10_N, F10_O,
+                        F11, F11_H, F11_J, F11_K, F11_L, F11_M, F11_N, F11_O,
+                        F12, F12_H, F12_J, F12_K, F12_L, F12_M, F12_N, F12_O,
+                        F13, F13_H, F13_J, F13_K, F13_L, F13_M, F13_N, F13_O,
+                        F14, F14_H, F14_J, F14_K, F14_L, F14_M, F14_N, F14_O,
+                        F15, F15_H, F15_J, F15_K, F15_L, F15_M, F15_N, F15_O,
+                        F16, F16_H, F16_J, F16_K, F16_L, F16_M, F16_N, F16_O,
+                        F17, F17_H, F17_J, F17_K, F17_L, F17_M, F17_N, F17_O,
+                        F18, F18_H, F18_J, F18_K, F18_L, F18_M, F18_N, F18_O,
+                        F19, F19_H, F19_J, F19_K, F19_L, F19_M, F19_N, F19_O,
+                        F20, F20_H, F20_J, F20_K, F20_L, F20_M, F20_N, F20_O,
+                        F21, F21_H, F21_J, F21_K, F21_L, F21_M, F21_N, F21_O,
+                        F22, F22_H, F22_J, F22_K, F22_L, F22_M, F22_N, F22_O,
+                        F24, F24_H, F24_J, F24_K, F24_L, F24_M, F24_N, F24_O,
+                        F25, F25_H, F25_J, F25_K, F25_L, F25_M, F25_N, F25_O,
+                        F26, F26_H, F26_J, F26_K, F26_L, F26_M, F26_N, F26_O,
+                        F27, F27_H, F27_J, F27_K, F27_L, F27_M, F27_N, F27_O,
+                        F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
+                        F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
+                        F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
+                        F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O);
+
+// TODO: LA
+//reg_class flt_arg0( F0 );
+//reg_class dbl_arg0( F0, F0_H );
+//reg_class dbl_arg1( F1, F1_H );
+
+%}
+
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define name --> value mappings to inform the ADLC of an integer valued name
+// Current support includes integer values in the range [0, 0x7FFFFFFF]
+// Format:
+//        int_def  <name>         ( <int_value>, <expression>);
+// Generated Code in ad_<arch>.hpp
+//        #define  <name>   (<expression>)
+//        // value == <int_value>
+// Generated code in ad_<arch>.cpp adlc_verification()
+//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
+//
+definitions %{
+  int_def DEFAULT_COST      (    100,     100);
+  int_def HUGE_COST         (1000000, 1000000);
+
+  // Memory refs are twice as expensive as run-of-the-mill.
+  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
+
+  // Branches are even more expensive.
+  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
+  // we use jr instruction to construct call, so more expensive
+  int_def CALL_COST         (    500, DEFAULT_COST * 5);
+/*
+        int_def EQUAL             (   1, 1  );
+        int_def NOT_EQUAL         (   2, 2  );
+        int_def GREATER           (   3, 3  );
+        int_def GREATER_EQUAL     (   4, 4  );
+        int_def LESS              (   5, 5  );
+        int_def LESS_EQUAL        (   6, 6  );
+*/
+%}
+
+
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+
+source_hpp %{
+// Header information of the source block.
+// Method declarations/definitions which are used outside
+// the ad-scope can conveniently be defined here.
+//
+// To keep related declarations/definitions/uses close together,
+// we switch between source %{ }% and source_hpp %{ }% freely as needed.
+
+class CallStubImpl {
+
+  //--------------------------------------------------------------
+  //---<  Used for optimization in Compile::shorten_branches  >---
+  //--------------------------------------------------------------
+
+ public:
+  // Size of call trampoline stub.
+  static uint size_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+
+  // number of relocations needed by a call trampoline stub
+  static uint reloc_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+};
+
+class HandlerImpl {
+
+ public:
+
+  static int emit_exception_handler(CodeBuffer &cbuf);
+  static int emit_deopt_handler(CodeBuffer& cbuf);
+
+  static uint size_exception_handler() {
+    // NativeCall instruction size is the same as NativeJump.
+    // exception handler starts out as jump and can be patched to
+    // a call be deoptimization.  (4932387)
+    // Note that this value is also credited (in output.cpp) to
+    // the size of the code section.
+    int size = NativeFarCall::instruction_size;
+    return round_to(size, 16);
+  }
+
+  static uint size_deopt_handler() {
+    int size = NativeFarCall::instruction_size;
+    return round_to(size, 16);
+  }
+};
+
+%} // end source_hpp
+
+source %{
+
+#define   NO_INDEX    0
+#define   RELOC_IMM64    Assembler::imm_operand
+#define   RELOC_DISP32   Assembler::disp32_operand
+
+#define V0_num    A0_num
+#define V0_H_num  A0_H_num
+
+#define __ _masm.
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+// Emit exception handler code.
+// Stuff framesize into a register and call a VM stub routine.
+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base = __ start_a_stub(size_exception_handler());
+  if (base == NULL) {
+    ciEnv::current()->record_failure("CodeCache is full");
+    return 0;  // CodeBuffer::expand failed
+  }
+
+  int offset = __ offset();
+
+  __ block_comment("; emit_exception_handler");
+
+  cbuf.set_insts_mark();
+  __ relocate(relocInfo::runtime_call_type);
+  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
+  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+// Emit deopt handler code.
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base = __ start_a_stub(size_deopt_handler());
+  if (base == NULL) {
+    ciEnv::current()->record_failure("CodeCache is full");
+    return 0;  // CodeBuffer::expand failed
+  }
+
+  int offset = __ offset();
+
+  __ block_comment("; emit_deopt_handler");
+
+  cbuf.set_insts_mark();
+  __ relocate(relocInfo::runtime_call_type);
+  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
+  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+
+const bool Matcher::match_rule_supported(int opcode) {
+  if (!has_match_rule(opcode))
+    return false;
+
+  return true;  // Per default match rules are supported.
+}
+
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+  const int safety_zone = 3 * BytesPerInstWord;
+  int offs = offset - br_size + 4;
+  // To be conservative on LoongArch
+  // branch node should be end with:
+  //   branch inst
+  offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2;
+  switch (rule) {
+    case jmpDir_long_rule:
+    case jmpDir_short_rule:
+      return Assembler::is_simm(offs, 26);
+    case jmpCon_flags_long_rule:
+    case jmpCon_flags_short_rule:
+    case branchConP_0_long_rule:
+    case branchConP_0_short_rule:
+    case branchConN2P_0_long_rule:
+    case branchConN2P_0_short_rule:
+    case cmpN_null_branch_long_rule:
+    case cmpN_null_branch_short_rule:
+    case branchConIU_reg_immI_0_long_rule:
+    case branchConIU_reg_immI_0_short_rule:
+    case branchConF_reg_reg_long_rule:
+    case branchConF_reg_reg_short_rule:
+    case branchConD_reg_reg_long_rule:
+    case branchConD_reg_reg_short_rule:
+      return Assembler::is_simm(offs, 21);
+    default:
+      return Assembler::is_simm(offs, 16);
+  }
+  return false;
+}
+
+
+// No additional cost for CMOVL.
+const int Matcher::long_cmove_cost() { return 0; }
+
+// No CMOVF/CMOVD with SSE2
+const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
+
+// Does the CPU require late expand (see block.cpp for description of late expand)?
+const bool Matcher::require_postalloc_expand = false;
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?  True for Intel but false for most RISCs
+const bool Matcher::clone_shift_expressions = false;
+
+// Do we need to mask the count passed to shift instructions or does
+// the cpu only look at the lower 5/6 bits anyway?
+const bool Matcher::need_masked_shift_count = false;
+
+bool Matcher::narrow_oop_use_complex_address() {
+  assert(UseCompressedOops, "only for compressed oops code");
+  return false;
+}
+
+bool Matcher::narrow_klass_use_complex_address() {
+  assert(UseCompressedClassPointers, "only for compressed klass code");
+  return false;
+}
+
+// This is UltraSparc specific, true just means we have fast l2f conversion
+const bool Matcher::convL2FSupported(void) {
+  return true;
+}
+
+// Vector ideal reg
+const uint Matcher::vector_ideal_reg(int size) {
+  assert(MaxVectorSize == 16 || MaxVectorSize == 32, "");
+  switch(size) {
+    case 16: return Op_VecX;
+    case 32: return Op_VecY;
+  }
+  ShouldNotReachHere();
+  return 0;
+}
+
+// Only lowest bits of xmm reg are used for vector shift count.
+const uint Matcher::vector_shift_count_ideal_reg(int size) {
+  assert(MaxVectorSize == 16 || MaxVectorSize == 32, "");
+  switch(size) {
+    case 16: return Op_VecX;
+    case 32: return Op_VecY;
+  }
+  ShouldNotReachHere();
+  return 0;
+}
+
+// Max vector size in bytes. 0 if not supported.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  return (int)MaxVectorSize;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  assert(is_java_primitive(bt), "only primitive type vectors");
+  return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+
+const int Matcher::min_vector_size(const BasicType bt) {
+  int max_size = max_vector_size(bt);
+  int size     = 0;
+
+  if (UseLSX) size = 16;
+  size = size / type2aelembytes(bt);
+  return MIN2(size,max_size);
+}
+
+// LoongArch supports misaligned vectors store/load?
+const bool Matcher::misaligned_vectors_ok() {
+  return false;
+  //return !AlignVector; // can be changed by flag
+}
+
+// Register for DIVI projection of divmodI
+RegMask Matcher::divI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for MODI projection of divmodI
+RegMask Matcher::modI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for DIVL projection of divmodL
+RegMask Matcher::divL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+int Matcher::regnum_to_fpu_offset(int regnum) {
+  return regnum - 32; // The FP registers are in the second chunk
+}
+
+
+const bool Matcher::isSimpleConstant64(jlong value) {
+  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
+  return true;
+}
+
+
+// Return whether or not this register is ever used as an argument.  This
+// function is used on startup to build the trampoline stubs in generateOptoStub.
+// Registers not mentioned will be killed by the VM call in the trampoline, and
+// arguments in those registers not be available to the callee.
+bool Matcher::can_be_java_arg( int reg ) {
+  // Refer to: [sharedRuntime_loongarch_64.cpp] SharedRuntime::java_calling_convention()
+  if (    reg == T0_num || reg == T0_H_num
+       || reg == A0_num || reg == A0_H_num
+       || reg == A1_num || reg == A1_H_num
+       || reg == A2_num || reg == A2_H_num
+       || reg == A3_num || reg == A3_H_num
+       || reg == A4_num || reg == A4_H_num
+       || reg == A5_num || reg == A5_H_num
+       || reg == A6_num || reg == A6_H_num
+       || reg == A7_num || reg == A7_H_num )
+    return true;
+
+  if (    reg == F0_num || reg == F0_H_num
+       || reg == F1_num || reg == F1_H_num
+       || reg == F2_num || reg == F2_H_num
+       || reg == F3_num || reg == F3_H_num
+       || reg == F4_num || reg == F4_H_num
+       || reg == F5_num || reg == F5_H_num
+       || reg == F6_num || reg == F6_H_num
+       || reg == F7_num || reg == F7_H_num )
+    return true;
+
+  return false;
+}
+
+bool Matcher::is_spillable_arg( int reg ) {
+  return can_be_java_arg(reg);
+}
+
+bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
+  return false;
+}
+
+// Register for MODL projection of divmodL
+RegMask Matcher::modL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
+  return FP_REG_mask();
+}
+
+// LoongArch doesn't support AES intrinsics
+const bool Matcher::pass_original_key_for_aes() {
+  return false;
+}
+
+int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
+  return round_to(current_offset, alignment_required()) - current_offset;
+}
+
+int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
+  return round_to(current_offset, alignment_required()) - current_offset;
+}
+
+int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
+  return round_to(current_offset, alignment_required()) - current_offset;
+}
+
+int CallLeafDirectNode::compute_padding(int current_offset) const {
+  return round_to(current_offset, alignment_required()) - current_offset;
+}
+
+int CallRuntimeDirectNode::compute_padding(int current_offset) const {
+  return round_to(current_offset, alignment_required()) - current_offset;
+}
+
+// If CPU can load and store mis-aligned doubles directly then no fixup is
+// needed.  Else we split the double into 2 integer pieces and move it
+// piece-by-piece.  Only happens when passing doubles into C code as the
+// Java calling convention forces doubles to be aligned.
+const bool Matcher::misaligned_doubles_ok = false;
+// Do floats take an entire double register or just half?
+//const bool Matcher::float_in_double = true;
+bool Matcher::float_in_double() { return false; }
+// Threshold size for cleararray.
+const int Matcher::init_array_short_size = 8 * BytesPerLong;
+// Do ints take an entire long register or just half?
+const bool Matcher::int_in_long = true;
+// Is it better to copy float constants, or load them directly from memory?
+// Intel can load a float constant from a direct address, requiring no
+// extra registers.  Most RISCs will have to materialize an address into a
+// register first, so they would do better to copy the constant from stack.
+const bool Matcher::rematerialize_float_constants = false;
+// Advertise here if the CPU requires explicit rounding operations
+// to implement the UseStrictFP mode.
+const bool Matcher::strict_fp_requires_explicit_rounding = false;
+// false => size gets scaled to BytesPerLong, ok.
+const bool Matcher::init_array_count_is_in_bytes = false;
+
+// Indicate if the safepoint node needs the polling page as an input.
+// Since LA doesn't have absolute addressing, it needs.
+bool SafePointNode::needs_polling_address_input() {
+  return false;
+}
+
+// !!!!! Special hack to get all type of calls to specify the byte offset
+//       from the start of the call to the point where the return address
+//       will point.
+int MachCallStaticJavaNode::ret_addr_offset() {
+  // bl
+  return NativeCall::instruction_size;
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset() {
+  // lu12i_w IC_Klass,
+  // ori IC_Klass,
+  // lu32i_d IC_Klass
+  // lu52i_d IC_Klass
+
+  // bl
+  return NativeMovConstReg::instruction_size + NativeCall::instruction_size;
+}
+
+//=============================================================================
+
+// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
+enum RC { rc_bad, rc_int, rc_float, rc_stack };
+static enum RC rc_class( OptoReg::Name reg ) {
+  if( !OptoReg::is_valid(reg)  ) return rc_bad;
+  if (OptoReg::is_stack(reg)) return rc_stack;
+  VMReg r = OptoReg::as_VMReg(reg);
+  if (r->is_Register()) return rc_int;
+  assert(r->is_FloatRegister(), "must be");
+  return rc_float;
+}
+
+// Helper methods for MachSpillCopyNode::implementation().
+static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
+  int size = 0;
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    int offset = __ offset();
+    switch (ireg) {
+      case Op_VecX:
+        __ vori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0);
+        break;
+      case Op_VecY:
+        __ xvori_b(as_FloatRegister(Matcher::_regEncode[dst_lo]), as_FloatRegister(Matcher::_regEncode[src_lo]), 0);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+#ifndef PRODUCT
+  } else if (!do_size) {
+    switch (ireg) {
+      case Op_VecX:
+        st->print("vori.b    %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+        break;
+      case Op_VecY:
+        st->print("xvori.b    %s, %s, 0\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+#endif
+  }
+  size += 4;
+  return size;
+}
+
+static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+                            int stack_offset, int reg, uint ireg, outputStream* st) {
+  int size = 0;
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    int offset = __ offset();
+    if (is_load) {
+      switch (ireg) {
+        case Op_VecX:
+          __ vld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
+          break;
+        case Op_VecY:
+          __ xvld(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+    } else { // store
+      switch (ireg) {
+        case Op_VecX:
+          __ vst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
+          break;
+        case Op_VecY:
+          __ xvst(as_FloatRegister(Matcher::_regEncode[reg]), SP, stack_offset);
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+    }
+#ifndef PRODUCT
+  } else if (!do_size) {
+    if (is_load) {
+      switch (ireg) {
+        case Op_VecX:
+          st->print("vld    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
+          break;
+        case Op_VecY:
+          st->print("xvld    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+    } else { // store
+      switch (ireg) {
+        case Op_VecX:
+          st->print("vst    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
+          break;
+        case Op_VecY:
+          st->print("xvst    %s, [SP + %d]\t# spill", Matcher::regName[reg], stack_offset);
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+    }
+#endif
+  }
+  size += 4;
+  return size;
+}
+
+static int vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
+                                      int dst_offset, uint ireg, outputStream* st) {
+  int size = 0;
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    switch (ireg) {
+      case Op_VecX:
+        __ vld(F23, SP, src_offset);
+        __ vst(F23, SP, dst_offset);
+        break;
+      case Op_VecY:
+        __ xvld(F23, SP, src_offset);
+        __ xvst(F23, SP, dst_offset);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+#ifndef PRODUCT
+  } else {
+    switch (ireg) {
+      case Op_VecX:
+        st->print("vld f23, %d(sp)\n\t"
+                  "vst f23, %d(sp)\t# 128-bit mem-mem spill",
+                  src_offset, dst_offset);
+        break;
+      case Op_VecY:
+        st->print("xvld f23, %d(sp)\n\t"
+                  "xvst f23, %d(sp)\t# 256-bit mem-mem spill",
+                  src_offset, dst_offset);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+#endif
+  }
+  size += 8;
+  return size;
+}
+
+uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
+  // Get registers to move
+  OptoReg::Name src_second = ra_->get_reg_second(in(1));
+  OptoReg::Name src_first = ra_->get_reg_first(in(1));
+  OptoReg::Name dst_second = ra_->get_reg_second(this );
+  OptoReg::Name dst_first = ra_->get_reg_first(this );
+
+  enum RC src_second_rc = rc_class(src_second);
+  enum RC src_first_rc = rc_class(src_first);
+  enum RC dst_second_rc = rc_class(dst_second);
+  enum RC dst_first_rc = rc_class(dst_first);
+
+  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
+
+  // Generate spill code!
+
+  if( src_first == dst_first && src_second == dst_second )
+    return 0;            // Self copy, no move
+
+  if (bottom_type()->isa_vect() != NULL) {
+    uint ireg = ideal_reg();
+    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
+    if (src_first_rc == rc_stack && dst_first_rc == rc_stack) {
+      // mem -> mem
+      int src_offset = ra_->reg2offset(src_first);
+      int dst_offset = ra_->reg2offset(dst_first);
+      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
+    } else if (src_first_rc == rc_float && dst_first_rc == rc_float) {
+      vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
+    } else if (src_first_rc == rc_float && dst_first_rc == rc_stack) {
+      int stack_offset = ra_->reg2offset(dst_first);
+      vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
+    } else if (src_first_rc == rc_stack && dst_first_rc == rc_float) {
+      int stack_offset = ra_->reg2offset(src_first);
+      vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
+    } else {
+      ShouldNotReachHere();
+    }
+    return 0;
+  }
+
+  if (src_first_rc == rc_stack) {
+    // mem ->
+    if (dst_first_rc == rc_stack) {
+      // mem -> mem
+      assert(src_second != dst_first, "overlap");
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        int src_offset = ra_->reg2offset(src_first);
+        int dst_offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ld_d(AT, Address(SP, src_offset));
+          __ st_d(AT, Address(SP, dst_offset));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("ld_d    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
+                    "st_d    AT, [SP + #%d]",
+                    src_offset, dst_offset);
+#endif
+        }
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        // No pushl/popl, so:
+        int src_offset = ra_->reg2offset(src_first);
+        int dst_offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ld_w(AT, Address(SP, src_offset));
+          __ st_w(AT, Address(SP, dst_offset));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("ld_w    AT, [SP + #%d] spill 2\n\t"
+                    "st_w    AT, [SP + #%d]\n\t",
+                    src_offset, dst_offset);
+#endif
+        }
+      }
+      return 0;
+    } else if (dst_first_rc == rc_int) {
+      // mem -> gpr
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        int offset = ra_->reg2offset(src_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ld_d(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("ld_d    %s, [SP + #%d]\t# spill 3",
+                    Matcher::regName[dst_first],
+                    offset);
+#endif
+        }
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        int offset = ra_->reg2offset(src_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          if (this->ideal_reg() == Op_RegI)
+            __ ld_w(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
+          else {
+            if (Assembler::is_simm(offset, 12)) {
+              __ ld_wu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
+            } else {
+              __ li(AT, offset);
+              __ ldx_wu(as_Register(Matcher::_regEncode[dst_first]), SP, AT);
+            }
+          }
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          if (this->ideal_reg() == Op_RegI)
+            st->print("ld_w    %s, [SP + #%d]\t# spill 4",
+                      Matcher::regName[dst_first],
+                      offset);
+          else
+            st->print("ld_wu    %s, [SP + #%d]\t# spill 5",
+                      Matcher::regName[dst_first],
+                      offset);
+#endif
+        }
+      }
+      return 0;
+    } else if (dst_first_rc == rc_float) {
+      // mem-> xmm
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        int offset = ra_->reg2offset(src_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fld_d( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("fld_d  %s, [SP + #%d]\t# spill 6",
+                    Matcher::regName[dst_first],
+                    offset);
+#endif
+        }
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        int offset = ra_->reg2offset(src_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fld_s( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("fld_s   %s, [SP + #%d]\t# spill 7",
+                    Matcher::regName[dst_first],
+                    offset);
+#endif
+        }
+      }
+    }
+    return 0;
+  } else if (src_first_rc == rc_int) {
+    // gpr ->
+    if (dst_first_rc == rc_stack) {
+      // gpr -> mem
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        int offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ st_d(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("st_d    %s, [SP + #%d] # spill 8",
+                    Matcher::regName[src_first],
+                    offset);
+#endif
+        }
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        int offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ st_w(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("st_w    %s, [SP + #%d]\t# spill 9",
+                    Matcher::regName[src_first], offset);
+#endif
+        }
+      }
+      return 0;
+    } else if (dst_first_rc == rc_int) {
+      // gpr -> gpr
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ move(as_Register(Matcher::_regEncode[dst_first]),
+                  as_Register(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("move(64bit)    %s <-- %s\t# spill 10",
+                    Matcher::regName[dst_first],
+                    Matcher::regName[src_first]);
+#endif
+        }
+        return 0;
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          if (this->ideal_reg() == Op_RegI)
+              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
+          else
+              __ add_d(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("move(32-bit)    %s <-- %s\t# spill 11",
+                    Matcher::regName[dst_first],
+                    Matcher::regName[src_first]);
+#endif
+        }
+        return 0;
+      }
+    } else if (dst_first_rc == rc_float) {
+      // gpr -> xmm
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ movgr2fr_d(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("movgr2fr_d   %s, %s\t# spill 12",
+                    Matcher::regName[dst_first],
+                    Matcher::regName[src_first]);
+#endif
+        }
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ movgr2fr_w(as_FloatRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("movgr2fr_w   %s, %s\t# spill 13",
+                    Matcher::regName[dst_first],
+                    Matcher::regName[src_first]);
+#endif
+        }
+      }
+      return 0;
+    }
+  } else if (src_first_rc == rc_float) {
+    // xmm ->
+    if (dst_first_rc == rc_stack) {
+      // xmm -> mem
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        int offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fst_d( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("fst_d   %s, [SP + #%d]\t# spill 14",
+                    Matcher::regName[src_first],
+                    offset);
+#endif
+        }
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        int offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fst_s(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("fst_s   %s, [SP + #%d]\t# spill 15",
+                    Matcher::regName[src_first],
+                    offset);
+#endif
+        }
+      }
+      return 0;
+    } else if (dst_first_rc == rc_int) {
+      // xmm -> gpr
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ movfr2gr_d( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("movfr2gr_d   %s, %s\t# spill 16",
+                    Matcher::regName[dst_first],
+                    Matcher::regName[src_first]);
+#endif
+        }
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ movfr2gr_s( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("movfr2gr_s   %s, %s\t# spill 17",
+                    Matcher::regName[dst_first],
+                    Matcher::regName[src_first]);
+#endif
+        }
+      }
+      return 0;
+    } else if (dst_first_rc == rc_float) {
+      // xmm -> xmm
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("fmov_d  %s <-- %s\t# spill 18",
+                    Matcher::regName[dst_first],
+                    Matcher::regName[src_first]);
+#endif
+        }
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ fmov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          st->print("\n\t");
+          st->print("fmov_s  %s <-- %s\t# spill 19",
+                    Matcher::regName[dst_first],
+                    Matcher::regName[src_first]);
+#endif
+        }
+      }
+      return 0;
+    }
+  }
+
+  assert(0," foo ");
+  Unimplemented();
+  return 0;
+}
+
+#ifndef PRODUCT
+void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+  implementation( NULL, ra_, false, st );
+}
+#endif
+
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  implementation( &cbuf, ra_, false, NULL );
+}
+
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+//=============================================================================
+#
+
+#ifndef PRODUCT
+void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
+  st->print("BRK");
+}
+#endif
+
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
+  MacroAssembler _masm(&cbuf);
+  __ brk(5);
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
+  return MachNode::size(ra_);
+}
+
+
+//=============================================================================
+#ifndef PRODUCT
+void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+  Compile *C = ra_->C;
+  int framesize = C->frame_size_in_bytes();
+
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+
+  st->print_cr("addi_d   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
+  st->print("\t");
+  st->print_cr("ld_d    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
+  st->print("\t");
+  st->print_cr("ld_d    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
+  if( do_polling() && C->is_method_compilation() ) {
+    st->print("\t");
+    st->print_cr("Poll Safepoint # MachEpilogNode");
+  }
+}
+#endif
+
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  Compile *C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+  int framesize = C->frame_size_in_bytes();
+
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+
+  __ ld_d(RA, Address(SP, framesize - wordSize));
+  __ ld_d(FP, Address(SP, framesize - wordSize * 2));
+  if (Assembler::is_simm(framesize, 12)) {
+    __ addi_d(SP, SP, framesize);
+  } else {
+    __ li(AT, framesize);
+    __ add_d(SP, SP, AT);
+  }
+
+  if( do_polling() && C->is_method_compilation() ) {
+    __ li(AT, (long)os::get_polling_page());
+    __ relocate(relocInfo::poll_return_type);
+    __ ld_w(AT, AT, 0);
+  }
+}
+
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_); // too many variables; just compute it the hard way
+}
+
+int MachEpilogNode::reloc() const {
+  return 0; // a large enough number
+}
+
+const Pipeline * MachEpilogNode::pipeline() const {
+  return MachNode::pipeline_class();
+}
+
+int MachEpilogNode::safepoint_offset() const { return 0; }
+
+//=============================================================================
+
+#ifndef PRODUCT
+void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg = ra_->get_reg_first(this);
+  st->print("ADDI_D %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
+}
+#endif
+
+
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+
+   if (Assembler::is_simm(offset, 12))
+     return 4;
+   else
+     return 3 * 4;
+}
+
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg = ra_->get_encode(this);
+
+  if (Assembler::is_simm(offset, 12)) {
+    __ addi_d(as_Register(reg), SP, offset);
+  } else {
+    __ lu12i_w(AT, Assembler::split_low20(offset >> 12));
+    __ ori(AT, AT, Assembler::split_low12(offset));
+    __ add_d(as_Register(reg), SP, AT);
+  }
+}
+
+int MachCallRuntimeNode::ret_addr_offset() {
+  // pcaddu18i
+  // jirl
+  return NativeFarCall::instruction_size;
+}
+
+
+//=============================================================================
+#ifndef PRODUCT
+void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
+  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
+}
+#endif
+
+void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
+  MacroAssembler _masm(&cbuf);
+  int i = 0;
+  for(i = 0; i < _count; i++)
+     __ nop();
+}
+
+uint MachNopNode::size(PhaseRegAlloc *) const {
+  return 4 * _count;
+}
+const Pipeline* MachNopNode::pipeline() const {
+  return MachNode::pipeline_class();
+}
+
+//=============================================================================
+
+//=============================================================================
+#ifndef PRODUCT
+void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+  st->print_cr("load_klass(T4, T0)");
+  st->print_cr("\tbeq(T4, iCache, L)");
+  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
+  st->print_cr("    L:");
+}
+#endif
+
+
+void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  int  ic_reg = Matcher::inline_cache_reg_encode();
+  Label L;
+  Register receiver = T0;
+  Register   iCache = as_Register(ic_reg);
+
+  __ load_klass(T4, receiver);
+  __ beq(T4, iCache, L);
+  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
+  __ bind(L);
+}
+
+uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+
+
+//=============================================================================
+
+const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
+
+int Compile::ConstantTable::calculate_table_base_offset() const {
+  return 0;  // absolute addressing, no offset
+}
+
+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
+  ShouldNotReachHere();
+}
+
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+  Compile* C = ra_->C;
+  Compile::ConstantTable& constant_table = C->constant_table();
+  MacroAssembler _masm(&cbuf);
+
+  Register Rtoc = as_Register(ra_->get_encode(this));
+  CodeSection* consts_section = cbuf.consts();
+  int consts_size = consts_section->align_at_start(consts_section->size());
+  assert(constant_table.size() == consts_size, "must be equal");
+
+  if (consts_section->size()) {
+    assert((CodeBuffer::SECT_CONSTS + 1) == CodeBuffer::SECT_INSTS,
+           "insts must be immediately follow consts");
+    // Materialize the constant table base.
+    address baseaddr = cbuf.insts()->start() - consts_size + -(constant_table.table_base_offset());
+    jint offs = (baseaddr - __ pc()) >> 2;
+    guarantee(Assembler::is_simm(offs, 20), "Not signed 20-bit offset");
+    __ pcaddi(Rtoc, offs);
+  }
+}
+
+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
+  // pcaddi
+  return 1 * BytesPerInstWord;
+}
+
+#ifndef PRODUCT
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+  Register r = as_Register(ra_->get_encode(this));
+  st->print("pcaddi    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
+}
+#endif
+
+
+//=============================================================================
+#ifndef PRODUCT
+void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+  Compile* C = ra_->C;
+
+  int framesize = C->frame_size_in_bytes();
+  int bangsize = C->bang_size_in_bytes();
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+
+  // Calls to C2R adapters often do not accept exceptional returns.
+  // We require that their callers must bang for them.  But be careful, because
+  // some VM calls (such as call site linkage) can use several kilobytes of
+  // stack.  But the stack safety zone should account for that.
+  // See bugs 4446381, 4468289, 4497237.
+  if (C->need_stack_bang(bangsize)) {
+    st->print_cr("# stack bang"); st->print("\t");
+  }
+  st->print("st_d       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
+  st->print("st_d       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
+  st->print("addi_d   FP, SP, -%d \n\t", wordSize*2);
+  st->print("addi_d   SP, SP, -%d \t",framesize);
+}
+#endif
+
+
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  Compile* C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+
+  int framesize = C->frame_size_in_bytes();
+  int bangsize = C->bang_size_in_bytes();
+
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+
+#ifdef ASSERT
+  address start = __ pc();
+#endif
+
+  if (C->need_stack_bang(bangsize)) {
+    __ generate_stack_overflow_check(bangsize);
+  }
+
+  if (Assembler::is_simm(-framesize, 12)) {
+    __ addi_d(SP, SP, -framesize);
+  } else {
+    __ li(AT, -framesize);
+    __ add_d(SP, SP, AT);
+  }
+  __ st_d(RA, Address(SP, framesize - wordSize));
+  __ st_d(FP, Address(SP, framesize - wordSize * 2));
+  if (Assembler::is_simm(framesize - wordSize * 2, 12)) {
+    __ addi_d(FP, SP, framesize - wordSize * 2);
+  } else {
+    __ li(AT, framesize - wordSize * 2);
+    __ add_d(FP, SP, AT);
+  }
+
+  assert((__ pc() - start) >= 1 * BytesPerInstWord, "No enough room for patch_verified_entry");
+
+  C->set_frame_complete(cbuf.insts_size());
+  if (C->has_mach_constant_base_node()) {
+    // NOTE: We set the table base offset here because users might be
+    // emitted before MachConstantBaseNode.
+    Compile::ConstantTable& constant_table = C->constant_table();
+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+  }
+}
+
+
+uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_); // too many variables; just compute it the hard way
+}
+
+int MachPrologNode::reloc() const {
+  return 0; // a large enough number
+}
+
+%}
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to output
+// byte streams.  Encoding classes generate functions which are called by
+// Machine Instruction Nodes in order to generate the bit encoding of the
+// instruction.  Operands specify their base encoding interface with the
+// interface keyword.  There are currently supported four interfaces,
+// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
+// operand to generate a function which returns its register number when
+// queried.   CONST_INTER causes an operand to generate a function which
+// returns the value of the constant when queried.  MEMORY_INTER causes an
+// operand to generate four functions which return the Base Register, the
+// Index Register, the Scale Value, and the Offset Value of the operand when
+// queried.  COND_INTER causes an operand to generate six functions which
+// return the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional instruction.
+// Instructions specify two basic values for encoding.  They use the
+// ins_encode keyword to specify their encoding class (which must be one of
+// the class names specified in the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode.  Only the opcode sections which a particular instruction
+// needs for encoding need to be specified.
+encode %{
+
+  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
+    MacroAssembler _masm(&cbuf);
+    // This is the instruction starting address for relocation info.
+    __ block_comment("Java_To_Runtime");
+    cbuf.set_insts_mark();
+    __ relocate(relocInfo::runtime_call_type);
+    __ patchable_call((address)$meth$$method);
+  %}
+
+  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
+    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
+    // who we intended to call.
+    MacroAssembler _masm(&cbuf);
+    address addr = (address)$meth$$method;
+    address call;
+    __ block_comment("Java_Static_Call");
+
+    if ( !_method ) {
+      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
+      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
+    } else if(_optimized_virtual) {
+      call = __ trampoline_call(AddressLiteral(addr, relocInfo::opt_virtual_call_type), &cbuf);
+    } else {
+      call = __ trampoline_call(AddressLiteral(addr, relocInfo::static_call_type), &cbuf);
+    }
+
+    if (call == NULL) {
+      ciEnv::current()->record_failure("CodeCache is full");
+      return;
+    }
+
+    if( _method ) {  // Emit stub for static call
+      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
+      if (stub == NULL) {
+        ciEnv::current()->record_failure("CodeCache is full");
+        return;
+      }
+    }
+  %}
+
+
+  //
+  // [Ref: LIR_Assembler::ic_call() ]
+  //
+  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
+    MacroAssembler _masm(&cbuf);
+    __ block_comment("Java_Dynamic_Call");
+    address call = __ ic_call((address)$meth$$method);
+    if (call == NULL) {
+      ciEnv::current()->record_failure("CodeCache is full");
+      return;
+    }
+  %}
+
+
+  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
+    Register result = $result$$Register;
+    Register sub    = $sub$$Register;
+    Register super  = $super$$Register;
+    Register length = $tmp$$Register;
+    Register tmp    = T4;
+    Label miss;
+
+    // result may be the same as sub
+    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
+    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
+    //    4bc     mov   S2, NULL #@loadConP
+    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
+    //
+    MacroAssembler _masm(&cbuf);
+    Label done;
+    __ check_klass_subtype_slow_path(sub, super, length, tmp,
+        NULL, &miss,
+        /*set_cond_codes:*/ true);
+    // Refer to X86_64's RDI
+    __ move(result, 0);
+    __ b(done);
+
+    __ bind(miss);
+    __ li(result, 1);
+    __ bind(done);
+  %}
+
+%}
+
+
+//---------LOONGARCH FRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+//
+//  S T A C K   L A Y O U T    Allocators stack-slot number
+//                             |   (to get allocators register number
+//  G  Owned by    |        |  v    add SharedInfo::stack0)
+//  r   CALLER     |        |
+//  o     |        +--------+      pad to even-align allocators stack-slot
+//  w     V        |  pad0  |        numbers; owned by CALLER
+//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
+//  h     ^        |   in   |  5
+//        |        |  args  |  4   Holes in incoming args owned by SELF
+//  |     |    old |        |  3
+//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
+//  v     |        |  ret   |  3   return address
+//     Owned by    +--------+
+//      Self       |  pad2  |  2   pad to align old SP
+//        |        +--------+  1
+//        |        | locks  |  0
+//        |        +--------+----> SharedInfo::stack0, even aligned
+//        |        |  pad1  | 11   pad to align new SP
+//        |        +--------+
+//        |        |        | 10
+//        |        | spills |  9   spills
+//        V        |        |  8   (pad0 slot for callee)
+//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
+//        ^        |  out   |  7
+//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
+//   Owned by  new |        |
+//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
+//                  |        |
+//
+// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
+//         known from SELF's arguments and the Java calling convention.
+//         Region 6-7 is determined per call site.
+// Note 2: If the calling convention leaves holes in the incoming argument
+//         area, those holes are owned by SELF.  Holes in the outgoing area
+//         are owned by the CALLEE.  Holes should not be nessecary in the
+//         incoming area, as the Java calling convention is completely under
+//         the control of the AD file.  Doubles can be sorted and packed to
+//         avoid holes.  Holes in the outgoing arguments may be nessecary for
+//         varargs C calling conventions.
+// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
+//         even aligned with pad0 as needed.
+//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
+//         region 6-11 is even aligned; it may be padded out more so that
+//         the region from SP to FP meets the minimum stack alignment.
+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
+//         alignment.  Region 11, pad1, may be dynamically extended so that
+//         SP meets the minimum alignment.
+
+
+frame %{
+
+  stack_direction(TOWARDS_LOW);
+
+  // These two registers define part of the calling convention
+  // between compiled code and the interpreter.
+  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
+  // for more information.
+
+  inline_cache_reg(T1);                // Inline Cache Register
+  interpreter_method_oop_reg(S3);      // Method Oop Register when calling interpreter
+
+  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
+  cisc_spilling_operand_name(indOffset32);
+
+  // Number of stack slots consumed by locking an object
+  // generate Compile::sync_stack_slots
+  sync_stack_slots(2);
+
+  frame_pointer(SP);
+
+  // Interpreter stores its frame pointer in a register which is
+  // stored to the stack by I2CAdaptors.
+  // I2CAdaptors convert from interpreted java to compiled java.
+
+  interpreter_frame_pointer(FP);
+
+  // generate Matcher::stack_alignment
+  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
+
+  // Number of stack slots between incoming argument block and the start of
+  // a new frame.  The PROLOG must add this many slots to the stack.  The
+  // EPILOG must remove this many slots.
+  in_preserve_stack_slots(4);  //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp
+
+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
+  // for calls to C.  Supports the var-args backing area for register parms.
+  varargs_C_out_slots_killed(0);
+
+  // The after-PROLOG location of the return address.  Location of
+  // return address specifies a type (REG or STACK) and a number
+  // representing the register number (i.e. - use a register name) or
+  // stack slot.
+  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
+  // Otherwise, it is above the locks and verification slot and alignment word
+  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
+  return_addr(REG RA);
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots.  Passed an array
+  // of ideal registers called "sig" and a "length" count.  Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE.  Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+
+
+  // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing)
+  // StartNode::calling_convention call this.
+  calling_convention %{
+    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
+  %}
+
+
+
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots.  Passed an array
+  // of ideal registers called "sig" and a "length" count.  Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE.  Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+
+
+  // SEE CallRuntimeNode::calling_convention for more information.
+  c_calling_convention %{
+   (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
+  %}
+
+
+  // Location of C & interpreter return values
+  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
+  // SEE Matcher::match.
+  c_return_value %{
+    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
+                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
+    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
+    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
+    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
+  %}
+
+  // Location of return values
+  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
+  // SEE Matcher::match.
+
+  return_value %{
+    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
+                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
+    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
+    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
+    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
+  %}
+
+%}
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(0);        // Required cost attribute
+
+//----------Instruction Attributes---------------------------------------------
+ins_attrib ins_cost(100);       // Required cost attribute
+ins_attrib ins_size(32);         // Required size attribute (in bits)
+ins_attrib ins_pc_relative(0);  // Required PC Relative flag
+ins_attrib ins_short_branch(0); // Required flag: is this instruction a
+                                // non-matching short branch variant of some
+                                                            // long branch?
+ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
+                                // specifies the alignment that some part of the instruction (not
+                                // necessarily the start) requires.  If > 1, a compute_padding()
+                                // function must be provided for the instruction
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+// Vectors
+
+operand vecX() %{
+  constraint(ALLOC_IN_RC(vectorx_reg));
+  match(VecX);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vecY() %{
+  constraint(ALLOC_IN_RC(vectory_reg));
+  match(VecY);
+
+   format %{ %}
+   interface(REG_INTER);
+%}
+
+// Flags register, used as output of compare instructions
+operand FlagsReg() %{
+  constraint(ALLOC_IN_RC(t0_reg));
+  match(RegFlags);
+
+  format %{ "T0" %}
+  interface(REG_INTER);
+%}
+
+//----------Simple Operands----------------------------------------------------
+// TODO: Should we need to define some more special immediate number ?
+// Immediate Operands
+// Integer Immediate
+operand immI() %{
+  match(ConI);
+
+  op_cost(20);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIU1() %{
+  predicate((0 <= n->get_int()) && (n->get_int() <= 1));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIU2() %{
+  predicate((0 <= n->get_int()) && (n->get_int() <= 3));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIU3() %{
+  predicate((0 <= n->get_int()) && (n->get_int() <= 7));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIU4() %{
+  predicate((0 <= n->get_int()) && (n->get_int() <= 15));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIU5() %{
+  predicate((0 <= n->get_int()) && (n->get_int() <= 31));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIU6() %{
+  predicate((0 <= n->get_int()) && (n->get_int() <= 63));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immIU8() %{
+  predicate((0 <= n->get_int()) && (n->get_int() <= 255));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI10() %{
+  predicate((-512 <= n->get_int()) && (n->get_int() <= 511));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI12() %{
+  predicate((-2048 <= n->get_int()) && (n->get_int() <= 2047));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_M65536() %{
+  predicate(n->get_int() == -65536);
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for decrement
+operand immI_M1() %{
+  predicate(n->get_int() == -1);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for zero
+operand immI_0() %{
+  predicate(n->get_int() == 0);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_1() %{
+  predicate(n->get_int() == 1);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_2() %{
+  predicate(n->get_int() == 2);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_16() %{
+  predicate(n->get_int() == 16);
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_24() %{
+  predicate(n->get_int() == 24);
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for long shifts
+operand immI_32() %{
+  predicate(n->get_int() == 32);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for byte-wide masking
+operand immI_255() %{
+  predicate(n->get_int() == 255);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_65535() %{
+  predicate(n->get_int() == 65535);
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_MaxI() %{
+  predicate(n->get_int() == 2147483647);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_M2047_2048() %{
+  predicate((-2047 <= n->get_int()) && (n->get_int() <= 2048));
+  match(ConI);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Valid scale values for addressing modes
+operand immI_0_3() %{
+  predicate(0 <= n->get_int() && (n->get_int() <= 3));
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_0_31() %{
+  predicate(n->get_int() >= 0 && n->get_int() <= 31);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_0_4095() %{
+  predicate(n->get_int() >= 0 && n->get_int() <= 4095);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_1_4() %{
+  predicate(1 <= n->get_int() && (n->get_int() <= 4));
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_32_63() %{
+  predicate(n->get_int() >= 32 && n->get_int() <= 63);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_M128_255() %{
+  predicate((-128 <= n->get_int()) && (n->get_int() <= 255));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Operand for non-negtive integer mask
+operand immI_nonneg_mask() %{
+  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate
+operand immL() %{
+  match(ConL);
+
+  op_cost(20);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immLU5() %{
+  predicate((0 <= n->get_long()) && (n->get_long() <= 31));
+  match(ConL);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL10() %{
+  predicate((-512 <= n->get_long()) && (n->get_long() <= 511));
+  match(ConL);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL12() %{
+  predicate((-2048 <= n->get_long()) && (n->get_long() <= 2047));
+  match(ConL);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate 32-bit signed
+operand immL32()
+%{
+  predicate(n->get_long() == (int)n->get_long());
+  match(ConL);
+
+  op_cost(15);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// bit 3..6 zero
+operand immL_M121() %{
+  predicate(n->get_long() == -121L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// bit 0..2 zero
+operand immL_M8() %{
+  predicate(n->get_long() == -8L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// bit 1..2 zero
+operand immL_M7() %{
+  predicate(n->get_long() == -7L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// bit 2 zero
+operand immL_M5() %{
+  predicate(n->get_long() == -5L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// bit 0..1 zero
+operand immL_M4() %{
+  predicate(n->get_long() == -4L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate zero
+operand immL_0() %{
+  predicate(n->get_long() == 0L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_7() %{
+  predicate(n->get_long() == 7L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_MaxUI() %{
+  predicate(n->get_long() == 0xFFFFFFFFL);
+  match(ConL);
+  op_cost(20);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_M2047_2048() %{
+  predicate((-2047 <= n->get_long()) && (n->get_long() <= 2048));
+  match(ConL);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_0_4095() %{
+  predicate(n->get_long() >= 0 && n->get_long() <= 4095);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Operand for non-negtive long mask
+operand immL_nonneg_mask() %{
+  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate
+operand immP() %{
+  match(ConP);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immP_0() %{
+  predicate(n->get_ptr() == 0);
+  match(ConP);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate
+operand immP_no_oop_cheap() %{
+  predicate(!n->bottom_type()->isa_oop_ptr());
+  match(ConP);
+
+  op_cost(5);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer for polling page
+operand immP_poll() %{
+  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
+  match(ConP);
+  op_cost(5);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate
+operand immN() %{
+  match(ConN);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immN_0() %{
+  predicate(n->get_narrowcon() == 0);
+  match(ConN);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immNKlass() %{
+  match(ConNKlass);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Single-precision floating-point immediate
+operand immF() %{
+  match(ConF);
+
+  op_cost(20);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Single-precision floating-point zero
+operand immF_0() %{
+  predicate(jint_cast(n->getf()) == 0);
+  match(ConF);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double-precision floating-point immediate
+operand immD() %{
+  match(ConD);
+
+  op_cost(20);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double-precision floating-point zero
+operand immD_0() %{
+  predicate(jlong_cast(n->getd()) == 0);
+  match(ConD);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Register Operands
+// Integer Register
+operand mRegI() %{
+  constraint(ALLOC_IN_RC(int_reg));
+  match(RegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand no_Ax_mRegI() %{
+  constraint(ALLOC_IN_RC(no_Ax_int_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{  %}
+  interface(REG_INTER);
+%}
+
+operand mS0RegI() %{
+  constraint(ALLOC_IN_RC(s0_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S0" %}
+  interface(REG_INTER);
+%}
+
+operand mS1RegI() %{
+  constraint(ALLOC_IN_RC(s1_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S1" %}
+  interface(REG_INTER);
+%}
+
+operand mS3RegI() %{
+  constraint(ALLOC_IN_RC(s3_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S3" %}
+  interface(REG_INTER);
+%}
+
+operand mS4RegI() %{
+  constraint(ALLOC_IN_RC(s4_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S4" %}
+  interface(REG_INTER);
+%}
+
+operand mS5RegI() %{
+  constraint(ALLOC_IN_RC(s5_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S5" %}
+  interface(REG_INTER);
+%}
+
+operand mS6RegI() %{
+  constraint(ALLOC_IN_RC(s6_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S6" %}
+  interface(REG_INTER);
+%}
+
+operand mS7RegI() %{
+  constraint(ALLOC_IN_RC(s7_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S7" %}
+  interface(REG_INTER);
+%}
+
+
+operand mT0RegI() %{
+  constraint(ALLOC_IN_RC(t0_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T0" %}
+  interface(REG_INTER);
+%}
+
+operand mT1RegI() %{
+  constraint(ALLOC_IN_RC(t1_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T1" %}
+  interface(REG_INTER);
+%}
+
+operand mT2RegI() %{
+  constraint(ALLOC_IN_RC(t2_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T2" %}
+  interface(REG_INTER);
+%}
+
+operand mT3RegI() %{
+  constraint(ALLOC_IN_RC(t3_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T3" %}
+  interface(REG_INTER);
+%}
+
+operand mT8RegI() %{
+  constraint(ALLOC_IN_RC(t8_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T8" %}
+  interface(REG_INTER);
+%}
+
+operand mT4RegI() %{
+  constraint(ALLOC_IN_RC(t4_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T4" %}
+  interface(REG_INTER);
+%}
+
+operand mA0RegI() %{
+  constraint(ALLOC_IN_RC(a0_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A0" %}
+  interface(REG_INTER);
+%}
+
+operand mA1RegI() %{
+  constraint(ALLOC_IN_RC(a1_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A1" %}
+  interface(REG_INTER);
+%}
+
+operand mA2RegI() %{
+  constraint(ALLOC_IN_RC(a2_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A2" %}
+  interface(REG_INTER);
+%}
+
+operand mA3RegI() %{
+  constraint(ALLOC_IN_RC(a3_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A3" %}
+  interface(REG_INTER);
+%}
+
+operand mA4RegI() %{
+  constraint(ALLOC_IN_RC(a4_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A4" %}
+  interface(REG_INTER);
+%}
+
+operand mA5RegI() %{
+  constraint(ALLOC_IN_RC(a5_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A5" %}
+  interface(REG_INTER);
+%}
+
+operand mA6RegI() %{
+  constraint(ALLOC_IN_RC(a6_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A6" %}
+  interface(REG_INTER);
+%}
+
+operand mA7RegI() %{
+  constraint(ALLOC_IN_RC(a7_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A7" %}
+  interface(REG_INTER);
+%}
+
+operand mRegN() %{
+  constraint(ALLOC_IN_RC(int_reg));
+  match(RegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t0_RegN() %{
+  constraint(ALLOC_IN_RC(t0_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t1_RegN() %{
+  constraint(ALLOC_IN_RC(t1_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t3_RegN() %{
+  constraint(ALLOC_IN_RC(t3_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t8_RegN() %{
+  constraint(ALLOC_IN_RC(t8_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a0_RegN() %{
+  constraint(ALLOC_IN_RC(a0_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a1_RegN() %{
+  constraint(ALLOC_IN_RC(a1_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a2_RegN() %{
+  constraint(ALLOC_IN_RC(a2_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a3_RegN() %{
+  constraint(ALLOC_IN_RC(a3_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a4_RegN() %{
+  constraint(ALLOC_IN_RC(a4_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a5_RegN() %{
+  constraint(ALLOC_IN_RC(a5_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a6_RegN() %{
+  constraint(ALLOC_IN_RC(a6_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a7_RegN() %{
+  constraint(ALLOC_IN_RC(a7_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s0_RegN() %{
+  constraint(ALLOC_IN_RC(s0_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s1_RegN() %{
+  constraint(ALLOC_IN_RC(s1_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s2_RegN() %{
+  constraint(ALLOC_IN_RC(s2_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s3_RegN() %{
+  constraint(ALLOC_IN_RC(s3_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s4_RegN() %{
+  constraint(ALLOC_IN_RC(s4_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s5_RegN() %{
+  constraint(ALLOC_IN_RC(s5_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s6_RegN() %{
+  constraint(ALLOC_IN_RC(s6_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s7_RegN() %{
+  constraint(ALLOC_IN_RC(s7_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer Register
+operand mRegP() %{
+  constraint(ALLOC_IN_RC(p_reg));
+  match(RegP);
+  match(a0_RegP);
+
+  format %{  %}
+  interface(REG_INTER);
+%}
+
+operand no_T8_mRegP() %{
+  constraint(ALLOC_IN_RC(no_T8_p_reg));
+  match(RegP);
+  match(mRegP);
+
+  format %{  %}
+  interface(REG_INTER);
+%}
+
+operand no_Ax_mRegP() %{
+  constraint(ALLOC_IN_RC(no_Ax_p_reg));
+  match(RegP);
+  match(mRegP);
+
+  format %{  %}
+  interface(REG_INTER);
+%}
+
+operand s1_RegP()
+%{
+  constraint(ALLOC_IN_RC(s1_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s3_RegP()
+%{
+  constraint(ALLOC_IN_RC(s3_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s4_RegP()
+%{
+  constraint(ALLOC_IN_RC(s4_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s5_RegP()
+%{
+  constraint(ALLOC_IN_RC(s5_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s6_RegP()
+%{
+  constraint(ALLOC_IN_RC(s6_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s7_RegP()
+%{
+  constraint(ALLOC_IN_RC(s7_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t0_RegP()
+%{
+  constraint(ALLOC_IN_RC(t0_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t1_RegP()
+%{
+  constraint(ALLOC_IN_RC(t1_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t2_RegP()
+%{
+  constraint(ALLOC_IN_RC(t2_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t3_RegP()
+%{
+  constraint(ALLOC_IN_RC(t3_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t8_RegP()
+%{
+  constraint(ALLOC_IN_RC(t8_long_reg));
+  match(RegP);
+  match(mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a0_RegP()
+%{
+  constraint(ALLOC_IN_RC(a0_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a1_RegP()
+%{
+  constraint(ALLOC_IN_RC(a1_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a2_RegP()
+%{
+  constraint(ALLOC_IN_RC(a2_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a3_RegP()
+%{
+  constraint(ALLOC_IN_RC(a3_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a4_RegP()
+%{
+  constraint(ALLOC_IN_RC(a4_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+
+operand a5_RegP()
+%{
+  constraint(ALLOC_IN_RC(a5_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a6_RegP()
+%{
+  constraint(ALLOC_IN_RC(a6_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a7_RegP()
+%{
+  constraint(ALLOC_IN_RC(a7_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand v0_RegP()
+%{
+  constraint(ALLOC_IN_RC(v0_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand v1_RegP()
+%{
+  constraint(ALLOC_IN_RC(v1_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand mRegL() %{
+  constraint(ALLOC_IN_RC(long_reg));
+  match(RegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand mRegI2L(mRegI reg) %{
+  match(ConvI2L reg);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand v0RegL() %{
+  constraint(ALLOC_IN_RC(v0_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand v1RegL() %{
+  constraint(ALLOC_IN_RC(v1_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a0RegL() %{
+  constraint(ALLOC_IN_RC(a0_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ "A0" %}
+  interface(REG_INTER);
+%}
+
+operand a1RegL() %{
+  constraint(ALLOC_IN_RC(a1_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a2RegL() %{
+  constraint(ALLOC_IN_RC(a2_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a3RegL() %{
+  constraint(ALLOC_IN_RC(a3_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t0RegL() %{
+  constraint(ALLOC_IN_RC(t0_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t1RegL() %{
+  constraint(ALLOC_IN_RC(t1_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t3RegL() %{
+  constraint(ALLOC_IN_RC(t3_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t8RegL() %{
+  constraint(ALLOC_IN_RC(t8_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a4RegL() %{
+  constraint(ALLOC_IN_RC(a4_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a5RegL() %{
+  constraint(ALLOC_IN_RC(a5_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a6RegL() %{
+  constraint(ALLOC_IN_RC(a6_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a7RegL() %{
+  constraint(ALLOC_IN_RC(a7_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s0RegL() %{
+  constraint(ALLOC_IN_RC(s0_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s1RegL() %{
+  constraint(ALLOC_IN_RC(s1_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s3RegL() %{
+  constraint(ALLOC_IN_RC(s3_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s4RegL() %{
+  constraint(ALLOC_IN_RC(s4_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s7RegL() %{
+  constraint(ALLOC_IN_RC(s7_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Floating register operands
+operand regF() %{
+  constraint(ALLOC_IN_RC(flt_reg));
+  match(RegF);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+//Double Precision Floating register operands
+operand regD() %{
+  constraint(ALLOC_IN_RC(dbl_reg));
+  match(RegD);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+//----------Memory Operands----------------------------------------------------
+// Indirect Memory Operand
+operand indirect(mRegP reg) %{
+  constraint(ALLOC_IN_RC(p_reg));
+  match(reg);
+
+  format %{ "[$reg] @ indirect" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);  /* NO_INDEX */
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Plus Short Offset Operand
+operand indOffset12(mRegP reg, immL12 off)
+%{
+  constraint(ALLOC_IN_RC(p_reg));
+  match(AddP reg off);
+
+  op_cost(10);
+  format %{ "[$reg + $off (12-bit)] @ indOffset12" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0); /* NO_INDEX */
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffset12I2L(mRegP reg, immI12 off)
+%{
+  constraint(ALLOC_IN_RC(p_reg));
+  match(AddP reg (ConvI2L off));
+
+  op_cost(10);
+  format %{ "[$reg + $off (12-bit)] @ indOffset12I2L" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0); /* NO_INDEX */
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register
+operand indIndex(mRegP addr, mRegL index) %{
+  constraint(ALLOC_IN_RC(p_reg));
+  match(AddP addr index);
+
+  op_cost(20);
+  format %{"[$addr + $index] @ indIndex" %}
+  interface(MEMORY_INTER) %{
+    base($addr);
+    index($index);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexI2L(mRegP reg, mRegI ireg)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP reg (ConvI2L ireg));
+  op_cost(10);
+  format %{ "[$reg + $ireg] @ indIndexI2L" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Operand
+operand indirectNarrow(mRegN reg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(p_reg));
+  op_cost(10);
+  match(DecodeN reg);
+
+  format %{ "[$reg] @ indirectNarrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Plus Short Offset Operand
+operand indOffset12Narrow(mRegN reg, immL12 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(p_reg));
+  op_cost(10);
+  match(AddP (DecodeN reg) off);
+
+  format %{ "[$reg + $off (12-bit)] @ indOffset12Narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op  - This is the operation of the comparison, and is limited to
+//                  the following set of codes:
+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below, such as cmpOpU.
+
+// Comparision Code
+operand cmpOp() %{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x01);
+    not_equal(0x02);
+    greater(0x03);
+    greater_equal(0x04);
+    less(0x05);
+    less_equal(0x06);
+    overflow(0x7);
+    no_overflow(0x8);
+  %}
+%}
+
+
+// Comparision Code
+// Comparison Code, unsigned compare.  Used by FP also, with
+// C2 (unordered) turned into GT or LT already.  The other bits
+// C0 and C3 are turned into Carry & Zero flags.
+operand cmpOpU() %{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x01);
+    not_equal(0x02);
+    greater(0x03);
+    greater_equal(0x04);
+    less(0x05);
+    less_equal(0x06);
+    overflow(0x7);
+    no_overflow(0x8);
+  %}
+%}
+
+
+//----------Special Memory Operands--------------------------------------------
+// Stack Slot Operand - This operand is used for loading and storing temporary
+//                      values on the stack where a match requires a value to
+//                      flow through memory.
+operand stackSlotP(sRegP reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  op_cost(50);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1d);  // SP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotI(sRegI reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  op_cost(50);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1d);  // SP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotF(sRegF reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  op_cost(50);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1d);  // SP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotD(sRegD reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  op_cost(50);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1d);  // SP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotL(sRegL reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  op_cost(50);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1d);  // SP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+
+//------------------------OPERAND CLASSES--------------------------------------
+opclass memory( indirect, indOffset12, indOffset12I2L, indIndex, indIndexI2L,
+                indirectNarrow, indOffset12Narrow);
+opclass memory_loadRange(indOffset12, indirect);
+
+opclass mRegLorI2L(mRegI2L, mRegL);
+//----------PIPELINE-----------------------------------------------------------
+// Rules which define the behavior of the target architectures pipeline.
+
+pipeline %{
+
+  //----------ATTRIBUTES---------------------------------------------------------
+  attributes %{
+    fixed_size_instructions;          // Fixed size instructions
+    max_instructions_per_bundle = 1;     // 1 instruction per bundle
+    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
+         bundle_unit_size=4;
+    instruction_unit_size = 4;           // An instruction is 4 bytes long
+    instruction_fetch_unit_size = 16;    // The processor fetches one line
+    instruction_fetch_units = 1;         // of 16 bytes
+
+    // List of nop instructions
+    nops( MachNop );
+  %}
+
+  //----------RESOURCES----------------------------------------------------------
+  // Resources are the functional units available to the machine
+
+  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
+
+  //----------PIPELINE DESCRIPTION-----------------------------------------------
+  // Pipeline Description specifies the stages in the machine's pipeline
+
+  // IF: fetch
+  // ID: decode
+  // RD: read
+  // CA: caculate
+  // WB: write back
+  // CM: commit
+
+  pipe_desc(IF, ID, RD, CA, WB, CM);
+
+
+  //----------PIPELINE CLASSES---------------------------------------------------
+  // Pipeline Classes describe the stages in which input and output are
+  // referenced by the hardware pipeline.
+
+  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
+  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
+    single_instruction;
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write)+1;
+    DECODE : ID;
+    ALU    : CA;
+  %}
+
+  //No.19 Integer mult operation : dst <-- reg1 mult reg2
+  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write)+5;
+    DECODE : ID;
+    ALU2   : CA;
+  %}
+
+  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write)+10;
+    DECODE : ID;
+    ALU2   : CA;
+  %}
+
+  //No.19 Integer div operation : dst <-- reg1 div reg2
+  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write)+10;
+    DECODE : ID;
+    ALU2   : CA;
+  %}
+
+  //No.19 Integer mod operation : dst <-- reg1 mod reg2
+  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
+    instruction_count(2);
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write)+10;
+    DECODE : ID;
+    ALU2   : CA;
+  %}
+
+  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
+  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
+    instruction_count(2);
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    ALU    : CA;
+  %}
+
+  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
+  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
+    instruction_count(2);
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    ALU    : CA;
+  %}
+
+  //no.16 load Long from memory :
+  pipe_class ialu_loadL(mRegL dst, memory mem) %{
+    instruction_count(2);
+    mem    : RD(read);
+    dst    : WB(write)+5;
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  //No.17 Store Long to Memory :
+  pipe_class ialu_storeL(mRegL src, memory mem) %{
+    instruction_count(2);
+    mem    : RD(read);
+    src    : RD(read);
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
+  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
+         single_instruction;
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    ALU    : CA;
+  %}
+
+  //No.3 Integer move operation : dst <-- reg
+  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    ALU    : CA;
+  %}
+
+  //No.4 No instructions : do nothing
+  pipe_class empty( ) %{
+    instruction_count(0);
+  %}
+
+  //No.5 UnConditional branch :
+  pipe_class pipe_jump( label labl ) %{
+    multiple_bundles;
+    DECODE : ID;
+    BR     : RD;
+  %}
+
+  //No.6 ALU Conditional branch :
+  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
+    multiple_bundles;
+    src1   : RD(read);
+    src2   : RD(read);
+    DECODE : ID;
+    BR     : RD;
+  %}
+
+  //no.7 load integer from memory :
+  pipe_class ialu_loadI(mRegI dst, memory mem) %{
+    mem    : RD(read);
+    dst    : WB(write)+3;
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  //No.8 Store Integer to Memory :
+  pipe_class ialu_storeI(mRegI src, memory mem) %{
+    mem    : RD(read);
+    src    : RD(read);
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+
+  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
+  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU    : CA;
+  %}
+
+  //No.22 Floating div operation : dst <-- reg1 div reg2
+  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU2   : CA;
+  %}
+
+  pipe_class fcvt_I2D(regD dst, mRegI src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU1   : CA;
+  %}
+
+  pipe_class fcvt_D2I(mRegI dst, regD src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU1   : CA;
+  %}
+
+  pipe_class pipe_mfc1(mRegI dst, regD src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  pipe_class pipe_mtc1(regD dst, mRegI src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    MEM    : RD(5);
+  %}
+
+  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
+  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
+    multiple_bundles;
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU2   : CA;
+  %}
+
+  //No.11 Load Floating from Memory :
+  pipe_class fpu_loadF(regF dst, memory mem) %{
+    instruction_count(1);
+    mem    : RD(read);
+    dst    : WB(write)+3;
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  //No.12 Store Floating to Memory :
+  pipe_class fpu_storeF(regF src, memory mem) %{
+    instruction_count(1);
+    mem    : RD(read);
+    src    : RD(read);
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  //No.13 FPU Conditional branch :
+  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
+    multiple_bundles;
+    src1   : RD(read);
+    src2   : RD(read);
+    DECODE : ID;
+    BR     : RD;
+  %}
+
+//No.14 Floating FPU reg operation : dst <-- op reg
+  pipe_class fpu1_regF(regF dst, regF src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU    : CA;
+  %}
+
+  pipe_class long_memory_op() %{
+    instruction_count(10); multiple_bundles; force_serialization;
+    fixed_latency(30);
+  %}
+
+  pipe_class simple_call() %{
+   instruction_count(10); multiple_bundles; force_serialization;
+   fixed_latency(200);
+   BR     : RD;
+  %}
+
+  pipe_class call() %{
+    instruction_count(10); multiple_bundles; force_serialization;
+    fixed_latency(200);
+  %}
+
+  //FIXME:
+  //No.9 Piple slow : for multi-instructions
+  pipe_class pipe_slow(  ) %{
+    instruction_count(20);
+    force_serialization;
+    multiple_bundles;
+    fixed_latency(50);
+  %}
+
+%}
+
+
+
+//----------INSTRUCTIONS-------------------------------------------------------
+//
+// match      -- States which machine-independent subtree may be replaced
+//               by this instruction.
+// ins_cost   -- The estimated cost of this instruction is used by instruction
+//               selection to identify a minimum cost tree of machine
+//               instructions that matches a tree of machine-independent
+//               instructions.
+// format     -- A string providing the disassembly for this instruction.
+//               The value of an instruction's operand may be inserted
+//               by referring to it with a '$' prefix.
+// opcode     -- Three instruction opcodes may be provided.  These are referred
+//               to within an encode class as $primary, $secondary, and $tertiary
+//               respectively.  The primary opcode is commonly used to
+//               indicate the type of machine instruction, while secondary
+//               and tertiary are often used for prefix options or addressing
+//               modes.
+// ins_encode -- A list of encode classes with parameters. The encode class
+//               name must have been defined in an 'enc_class' specification
+//               in the encode section of the architecture description.
+
+
+// Load Integer
+instruct loadI(mRegI dst, memory mem) %{
+  match(Set dst (LoadI mem));
+
+  ins_cost(125);
+  format %{ "ld_w    $dst, $mem   #@loadI" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+instruct loadI_convI2L(mRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadI mem)));
+
+  ins_cost(125);
+  format %{ "ld_w    $dst, $mem   #@loadI_convI2L" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Integer (32 bit signed) to Byte (8 bit signed)
+instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
+  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
+
+  ins_cost(125);
+  format %{ "ld_b  $dst, $mem\t# int -> byte #@loadI2B" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
+instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
+  match(Set dst (AndI (LoadI mem) mask));
+
+  ins_cost(125);
+  format %{ "ld_bu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
+    ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+// Load Integer (32 bit signed) to Short (16 bit signed)
+instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
+  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
+
+  ins_cost(125);
+  format %{ "ld_h  $dst, $mem\t# int -> short #@loadI2S" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
+instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
+  match(Set dst (AndI (LoadI mem) mask));
+
+  ins_cost(125);
+  format %{ "ld_hu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+// Load Long.
+instruct loadL(mRegL dst, memory mem) %{
+//  predicate(!((LoadLNode*)n)->require_atomic_access());
+  match(Set dst (LoadL mem));
+
+  ins_cost(250);
+  format %{ "ld_d    $dst, $mem   #@loadL" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
+  %}
+  ins_pipe( ialu_loadL );
+%}
+
+// Load Long - UNaligned
+instruct loadL_unaligned(mRegL dst, memory mem) %{
+  match(Set dst (LoadL_unaligned mem));
+
+  // FIXME: Need more effective ldl/ldr
+  ins_cost(450);
+  format %{ "ld_d    $dst, $mem   #@loadL_unaligned\n\t" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
+  %}
+  ins_pipe( ialu_loadL );
+%}
+
+// Store Long
+instruct storeL_reg(memory mem, mRegL src) %{
+  match(Set mem (StoreL mem src));
+
+  ins_cost(200);
+  format %{ "st_d    $mem,   $src #@storeL_reg\n" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
+  %}
+  ins_pipe( ialu_storeL );
+%}
+
+instruct storeL_immL_0(memory mem, immL_0 zero) %{
+  match(Set mem (StoreL mem zero));
+
+  ins_cost(180);
+  format %{ "st_d    zero, $mem #@storeL_immL_0" %}
+  ins_encode %{
+     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
+  %}
+  ins_pipe( ialu_storeL );
+%}
+
+// Load Compressed Pointer
+instruct loadN(mRegN dst, memory mem)
+%{
+   match(Set dst (LoadN mem));
+
+   ins_cost(125); // XXX
+   format %{ "ld_wu    $dst, $mem\t# compressed ptr @ loadN" %}
+   ins_encode %{
+     relocInfo::relocType disp_reloc = $mem->disp_reloc();
+     assert(disp_reloc == relocInfo::none, "cannot have disp");
+     __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
+   %}
+   ins_pipe( ialu_loadI ); // XXX
+%}
+
+instruct loadN2P(mRegP dst, memory mem)
+%{
+   match(Set dst (DecodeN (LoadN mem)));
+   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+
+   ins_cost(125); // XXX
+   format %{ "ld_wu    $dst, $mem\t# @ loadN2P" %}
+   ins_encode %{
+     relocInfo::relocType disp_reloc = $mem->disp_reloc();
+     assert(disp_reloc == relocInfo::none, "cannot have disp");
+     __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
+   %}
+   ins_pipe( ialu_loadI ); // XXX
+%}
+
+// Load Pointer
+instruct loadP(mRegP dst, memory mem) %{
+  match(Set dst (LoadP mem));
+
+  ins_cost(125);
+  format %{ "ld_d    $dst, $mem #@loadP" %}
+  ins_encode %{
+    relocInfo::relocType disp_reloc = $mem->disp_reloc();
+    assert(disp_reloc == relocInfo::none, "cannot have disp");
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Klass Pointer
+instruct loadKlass(mRegP dst, memory mem) %{
+  match(Set dst (LoadKlass mem));
+
+  ins_cost(125);
+  format %{ "MOV    $dst,$mem @ loadKlass" %}
+  ins_encode %{
+    relocInfo::relocType disp_reloc = $mem->disp_reloc();
+    assert(disp_reloc == relocInfo::none, "cannot have disp");
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+// Load narrow Klass Pointer
+instruct loadNKlass(mRegN dst, memory mem)
+%{
+  match(Set dst (LoadNKlass mem));
+
+  ins_cost(125); // XXX
+  format %{ "ld_wu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
+  ins_encode %{
+    relocInfo::relocType disp_reloc = $mem->disp_reloc();
+    assert(disp_reloc == relocInfo::none, "cannot have disp");
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
+  %}
+  ins_pipe( ialu_loadI ); // XXX
+%}
+
+instruct loadN2PKlass(mRegP dst, memory mem)
+%{
+  match(Set dst (DecodeNKlass (LoadNKlass mem)));
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+
+  ins_cost(125); // XXX
+  format %{ "ld_wu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
+  ins_encode %{
+    relocInfo::relocType disp_reloc = $mem->disp_reloc();
+    assert(disp_reloc == relocInfo::none, "cannot have disp");
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
+  %}
+  ins_pipe( ialu_loadI ); // XXX
+%}
+
+// Load Constant
+instruct loadConI(mRegI dst, immI src) %{
+  match(Set dst src);
+
+  ins_cost(120);
+  format %{ "mov    $dst, $src #@loadConI" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    int    value = $src$$constant;
+    __ li(dst, value);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+
+instruct loadConL(mRegL dst, immL src) %{
+  match(Set dst src);
+  ins_cost(120);
+  format %{ "li   $dst, $src @ loadConL" %}
+  ins_encode %{
+    __ li($dst$$Register, $src$$constant);
+  %}
+  ins_pipe(ialu_regL_regL);
+%}
+
+// Load Range
+instruct loadRange(mRegI dst, memory_loadRange mem) %{
+  match(Set dst (LoadRange mem));
+
+  ins_cost(125);
+  format %{ "MOV    $dst,$mem @ loadRange" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+
+instruct storeP(memory mem, mRegP src ) %{
+  match(Set mem (StoreP mem src));
+
+  ins_cost(125);
+  format %{ "st_d    $src, $mem #@storeP" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+// Store NULL Pointer, mark word, or other simple pointer constant.
+instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
+  match(Set mem (StoreP mem zero));
+
+  ins_cost(125);
+  format %{ "mov    $mem, $zero #@storeImmP_0" %}
+    ins_encode %{
+     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+// Store Compressed Pointer
+instruct storeN(memory mem, mRegN src)
+%{
+  match(Set mem (StoreN mem src));
+
+  ins_cost(125); // XXX
+  format %{ "st_w    $mem, $src\t# compressed ptr @ storeN" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeP2N(memory mem, mRegP src)
+%{
+  match(Set mem (StoreN mem (EncodeP src)));
+  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+
+  ins_cost(125); // XXX
+  format %{ "st_w    $mem, $src\t# @ storeP2N" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeNKlass(memory mem, mRegN src)
+%{
+  match(Set mem (StoreNKlass mem src));
+
+  ins_cost(125); // XXX
+  format %{ "st_w    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeP2NKlass(memory mem, mRegP src)
+%{
+  match(Set mem (StoreNKlass mem (EncodePKlass src)));
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+
+  ins_cost(125); // XXX
+  format %{ "st_w    $mem, $src\t# @ storeP2NKlass" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeImmN_immN_0(memory mem, immN_0 zero)
+%{
+  match(Set mem (StoreN mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
+  ins_encode %{
+     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+// Store Byte
+instruct storeB_immB_0(memory mem, immI_0 zero) %{
+  match(Set mem (StoreB mem zero));
+
+  format %{ "mov    $mem, zero #@storeB_immB_0" %}
+  ins_encode %{
+    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeB(memory mem, mRegI src) %{
+  match(Set mem (StoreB mem src));
+
+  ins_cost(125);
+  format %{ "st_b    $src, $mem #@storeB" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeB_convL2I(memory mem, mRegL src) %{
+  match(Set mem (StoreB mem (ConvL2I src)));
+
+  ins_cost(125);
+  format %{ "st_b    $src, $mem #@storeB_convL2I" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+// Load Byte (8bit signed)
+instruct loadB(mRegI dst, memory mem) %{
+  match(Set dst (LoadB mem));
+
+  ins_cost(125);
+  format %{ "ld_b   $dst, $mem #@loadB" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+instruct loadB_convI2L(mRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadB mem)));
+
+  ins_cost(125);
+  format %{ "ld_b   $dst, $mem #@loadB_convI2L" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Byte (8bit UNsigned)
+instruct loadUB(mRegI dst, memory mem) %{
+  match(Set dst (LoadUB mem));
+
+  ins_cost(125);
+  format %{ "ld_bu   $dst, $mem #@loadUB" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+instruct loadUB_convI2L(mRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadUB mem)));
+
+  ins_cost(125);
+  format %{ "ld_bu   $dst, $mem #@loadUB_convI2L" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Short (16bit signed)
+instruct loadS(mRegI dst, memory mem) %{
+  match(Set dst (LoadS mem));
+
+  ins_cost(125);
+  format %{ "ld_h   $dst, $mem #@loadS" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Short (16 bit signed) to Byte (8 bit signed)
+instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
+  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
+
+  ins_cost(125);
+  format %{ "ld_b $dst, $mem\t# short -> byte #@loadS2B" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct loadS_convI2L(mRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadS mem)));
+
+  ins_cost(125);
+  format %{ "ld_h   $dst, $mem #@loadS_convI2L" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+// Store Integer Immediate
+instruct storeI_immI_0(memory mem, immI_0 zero) %{
+  match(Set mem (StoreI mem zero));
+
+  format %{ "mov    $mem, zero #@storeI_immI_0" %}
+  ins_encode %{
+    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+// Store Integer
+instruct storeI(memory mem, mRegI src) %{
+  match(Set mem (StoreI mem src));
+
+  ins_cost(125);
+  format %{ "st_w    $mem, $src #@storeI" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeI_convL2I(memory mem, mRegL src) %{
+  match(Set mem (StoreI mem (ConvL2I src)));
+
+  ins_cost(125);
+  format %{ "st_w    $mem, $src #@storeI_convL2I" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+// Load Float
+instruct loadF(regF dst, memory mem) %{
+  match(Set dst (LoadF mem));
+
+  ins_cost(150);
+  format %{ "loadF $dst, $mem #@loadF" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+instruct loadConP_general(mRegP dst, immP src) %{
+  match(Set dst src);
+
+  ins_cost(120);
+  format %{ "li   $dst, $src #@loadConP_general" %}
+
+  ins_encode %{
+    Register dst = $dst$$Register;
+    long* value = (long*)$src$$constant;
+
+    if($src->constant_reloc() == relocInfo::metadata_type){
+      int klass_index = __ oop_recorder()->find_index((Klass*)value);
+      RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+
+      __ relocate(rspec);
+      __ patchable_li52(dst, (long)value);
+    } else if($src->constant_reloc() == relocInfo::oop_type){
+      int oop_index = __ oop_recorder()->find_index((jobject)value);
+      RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+      __ relocate(rspec);
+      __ patchable_li52(dst, (long)value);
+    } else if ($src->constant_reloc() == relocInfo::none) {
+        __ li(dst, (long)value);
+    }
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
+  match(Set dst src);
+
+  ins_cost(80);
+  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
+
+  ins_encode %{
+    if ($src->constant_reloc() == relocInfo::metadata_type) {
+      __ mov_metadata($dst$$Register, (Metadata*)$src$$constant);
+    } else {
+      __ li($dst$$Register, $src$$constant);
+    }
+  %}
+
+  ins_pipe(ialu_regI_regI);
+%}
+
+
+instruct loadConP_poll(mRegP dst, immP_poll src) %{
+  match(Set dst src);
+
+  ins_cost(50);
+  format %{ "li   $dst, $src #@loadConP_poll" %}
+
+  ins_encode %{
+    Register dst = $dst$$Register;
+    intptr_t value = (intptr_t)$src$$constant;
+
+    __ li(dst, (jlong)value);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct loadConP_immP_0(mRegP dst, immP_0 src)
+%{
+  match(Set dst src);
+
+  ins_cost(50);
+  format %{ "mov    $dst, R0\t# ptr" %}
+  ins_encode %{
+     Register dst_reg = $dst$$Register;
+     __ add_d(dst_reg, R0, R0);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
+  match(Set dst src);
+  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
+  ins_encode %{
+    __ move($dst$$Register, R0);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct loadConN(mRegN dst, immN src) %{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    __ set_narrow_oop(dst, (jobject)$src$$constant);
+  %}
+  ins_pipe( ialu_regI_regI ); // XXX
+%}
+
+instruct loadConNKlass(mRegN dst, immNKlass src) %{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    __ set_narrow_klass(dst, (Klass*)$src$$constant);
+  %}
+  ins_pipe( ialu_regI_regI ); // XXX
+%}
+
+//FIXME
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{
+  match(TailCall jump_target method_oop );
+  ins_cost(300);
+  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
+
+  ins_encode %{
+    Register target = $jump_target$$Register;
+    Register    oop = $method_oop$$Register;
+
+    // RA will be used in generate_forward_exception()
+    __ push(RA);
+
+    __ move(S3, oop);
+    __ jr(target);
+  %}
+
+  ins_pipe( pipe_jump );
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler.  No code emitted.
+instruct CreateException( a0_RegP ex_oop )
+%{
+  match(Set ex_oop (CreateEx));
+
+  // use the following format syntax
+  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
+  ins_encode %{
+    // X86 leaves this function empty
+    __ block_comment("CreateException is empty in LA");
+  %}
+  ins_pipe( empty );
+//  ins_pipe( pipe_jump );
+%}
+
+
+/* The mechanism of exception handling is clear now.
+
+- Common try/catch:
+  [stubGenerator_loongarch.cpp] generate_forward_exception()
+      |- V0, V1 are created
+      |- T4 <= SharedRuntime::exception_handler_for_return_address
+      `- jr T4
+           `- the caller's exception_handler
+                 `- jr OptoRuntime::exception_blob
+                        `- here
+- Rethrow(e.g. 'unwind'):
+  * The callee:
+     |- an exception is triggered during execution
+     `- exits the callee method through RethrowException node
+          |- The callee pushes exception_oop(T0) and exception_pc(RA)
+          `- The callee jumps to OptoRuntime::rethrow_stub()
+  * In OptoRuntime::rethrow_stub:
+     |- The VM calls _rethrow_Java to determine the return address in the caller method
+     `- exits the stub with tailjmpInd
+          |- pops exception_oop(V0) and exception_pc(V1)
+          `- jumps to the return address(usually an exception_handler)
+  * The caller:
+     `- continues processing the exception_blob with V0/V1
+*/
+
+// Rethrow exception:
+// The exception oop will come in the first argument position.
+// Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException()
+%{
+  match(Rethrow);
+
+  // use the following format syntax
+  format %{ "JMP    rethrow_stub #@RethrowException" %}
+  ins_encode %{
+    __ block_comment("@ RethrowException");
+
+    cbuf.set_insts_mark();
+    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
+
+    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
+    __ patchable_jump((address)OptoRuntime::rethrow_stub());
+  %}
+  ins_pipe( pipe_jump );
+%}
+
+// ============================================================================
+// Branch Instructions --- long offset versions
+
+// Jump Direct
+instruct jmpDir_long(label labl) %{
+  match(Goto);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "JMP    $labl #@jmpDir_long" %}
+
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jmp_far(*L);
+  %}
+
+  ins_pipe( pipe_jump );
+  //ins_pc_relative(1);
+%}
+
+// Jump Direct Conditional - Label defines a relative address from Jcc+1
+instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
+  match(CountedLoopEnd cop (CmpI src1 src2));
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label*     L = $labl$$label;
+    int     flag = $cop$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      case 0x03: //above
+        __ blt_long(op2, op1, *L, true /* signed */);
+        break;
+      case 0x04: //above_equal
+        __ bge_long(op1, op2, *L, true /* signed */);
+        break;
+      case 0x05: //below
+        __ blt_long(op1, op2, *L, true /* signed */);
+        break;
+      case 0x06: //below_equal
+        __ bge_long(op2, op1, *L, true /* signed */);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+%}
+
+instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
+  match(CountedLoopEnd cop (CmpI src1 src2));
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = AT;
+    Label*     L = $labl$$label;
+    int     flag = $cop$$cmpcode;
+
+    __ li(op2, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      case 0x03: //above
+        __ blt_long(op2, op1, *L, true /* signed */);
+        break;
+      case 0x04: //above_equal
+        __ bge_long(op1, op2, *L, true /* signed */);
+        break;
+      case 0x05: //below
+        __ blt_long(op1, op2, *L, true /* signed */);
+        break;
+      case 0x06: //below_equal
+        __ bge_long(op2, op1, *L, true /* signed */);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+%}
+
+
+// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
+instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{
+  match(If cop cr);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %}
+
+  ins_encode %{
+    Label*    L =  $labl$$label;
+    switch($cop$$cmpcode) {
+      case 0x01: //equal
+        __ bne_long($cr$$Register, R0, *L);
+        break;
+      case 0x02: //not equal
+        __ beq_long($cr$$Register, R0, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+%}
+
+// Conditional jumps
+instruct branchConP_0_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
+  match(If cmp (CmpP op1 zero));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "b$cmp   $op1, R0, $labl #@branchConP_0_long" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = R0;
+    Label*    L  = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConN2P_0_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
+  match(If cmp (CmpP (DecodeN op1) zero));
+  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_0_long" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = R0;
+    Label*    L  = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag)
+    {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+
+instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
+  match(If cmp (CmpP op1 op2));
+//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
+  effect(USE labl);
+
+  ins_cost(200);
+  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = $op2$$Register;
+    Label*    L  = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      case 0x03: //above
+        __ blt_long(op2, op1, *L, false /* unsigned */);
+        break;
+      case 0x04: //above_equal
+        __ bge_long(op1, op2, *L, false /* unsigned */);
+        break;
+      case 0x05: //below
+        __ blt_long(op1, op2, *L, false /* unsigned */);
+        break;
+      case 0x06: //below_equal
+        __ bge_long(op2, op1, *L, false /* unsigned */);
+       break;
+      default:
+          Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
+  match(If cmp (CmpN op1 null));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
+            "BP$cmp   $labl @ cmpN_null_branch_long" %}
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = R0;
+    Label*    L  = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+    case 0x01: //equal
+      __ beq_long(op1, op2, *L);
+      break;
+    case 0x02: //not_equal
+      __ bne_long(op1, op2, *L);
+      break;
+    default:
+          Unimplemented();
+    }
+  %}
+//TODO: pipe_branchP or create pipe_branchN LEE
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
+  match(If cmp (CmpN op1 op2));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
+            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
+  ins_encode %{
+    Register op1_reg = $op1$$Register;
+    Register op2_reg = $op2$$Register;
+    Label*    L  = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+    case 0x01: //equal
+      __ beq_long(op1_reg, op2_reg, *L);
+      break;
+    case 0x02: //not_equal
+      __ bne_long(op1_reg, op2_reg, *L);
+      break;
+    case 0x03: //above
+      __ blt_long(op2_reg, op1_reg, *L, false /* unsigned */);
+      break;
+    case 0x04: //above_equal
+      __ bge_long(op1_reg, op2_reg, *L, false /* unsigned */);
+      break;
+    case 0x05: //below
+      __ blt_long(op1_reg, op2_reg, *L, false /* unsigned */);
+      break;
+    case 0x06: //below_equal
+      __ bge_long(op2_reg, op1_reg, *L, false /* unsigned */);
+      break;
+    default:
+      Unimplemented();
+    }
+  %}
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
+  match( If cmp (CmpU src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label*     L = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      case 0x03: //above
+        __ blt_long(op2, op1, *L, false /* unsigned */);
+        break;
+      case 0x04: //above_equal
+        __ bge_long(op1, op2, *L, false /* unsigned */);
+        break;
+      case 0x05: //below
+        __ blt_long(op1, op2, *L, false /* unsigned */);
+        break;
+      case 0x06: //below_equal
+        __ bge_long(op2, op1, *L, false /* unsigned */);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+
+instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
+  match( If cmp (CmpU src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    int      val = $src2$$constant;
+    Label*     L = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    __ li(AT, val);
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, AT, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, AT, *L);
+        break;
+      case 0x03: //above
+        __ blt_long(AT, op1, *L, false /* unsigned */);
+        break;
+      case 0x04: //above_equal
+        __ bge_long(op1, AT, *L, false /* unsigned */);
+        break;
+      case 0x05: //below
+        __ blt_long(op1, AT, *L, false /* unsigned */);
+        break;
+      case 0x06: //below_equal
+        __ bge_long(AT, op1, *L, false /* unsigned */);
+       break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label*     L = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      case 0x03: //above
+        __ blt_long(op2, op1, *L, true /* signed */);
+        break;
+      case 0x04: //above_equal
+        __ bge_long(op1, op2, *L, true /* signed */);
+        break;
+      case 0x05: //below
+        __ blt_long(op1, op2, *L, true /* signed */);
+        break;
+      case 0x06: //below_equal
+        __ bge_long(op2, op1, *L, true /* signed */);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  ins_cost(170);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Label*     L =  $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, R0, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, R0, *L);
+        break;
+      case 0x03: //greater
+        __ blt_long(R0, op1, *L, true /* signed */);
+        break;
+      case 0x04: //greater_equal
+        __ bge_long(op1, R0, *L, true /* signed */);
+        break;
+      case 0x05: //less
+        __ blt_long(op1, R0, *L, true /* signed */);
+        break;
+      case 0x06: //less_equal
+        __ bge_long(R0, op1, *L, true /* signed */);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  ins_cost(200);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    int      val = $src2$$constant;
+    Label*     L =  $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    __ li(AT, val);
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, AT, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, AT, *L);
+        break;
+      case 0x03: //greater
+        __ blt_long(AT, op1, *L, true /* signed */);
+        break;
+      case 0x04: //greater_equal
+        __ bge_long(op1, AT, *L, true /* signed */);
+        break;
+      case 0x05: //less
+        __ blt_long(op1, AT, *L, true /* signed */);
+        break;
+      case 0x06: //less_equal
+        __ bge_long(AT, op1, *L, true /* signed */);
+        break;
+      default:
+          Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
+  match( If cmp (CmpU src1 zero) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Label*     L = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, R0, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, R0, *L);
+        break;
+      case 0x03: //above
+        __ bne_long(R0, op1, *L);
+        break;
+      case 0x04: //above_equal
+        __ beq_long(R0, R0, *L);
+        break;
+      case 0x05: //below
+        return;
+        break;
+      case 0x06: //below_equal
+        __ beq_long(op1, R0, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+
+instruct branchConL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
+  match( If cmp (CmpL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
+  ins_cost(250);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = as_Register($src2$$reg);
+
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: //not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: //greater
+        __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */);
+        break;
+
+      case 0x04: //greater_equal
+        __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */);
+        break;
+
+      case 0x05: //less
+        __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */);
+        break;
+
+      case 0x06: //less_equal
+        __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConUL_regL_regL_long(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
+  match( If cmp (CmpUL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
+  ins_cost(250);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = as_Register($src2$$reg);
+
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: //not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: //greater
+        __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */);
+        break;
+
+      case 0x04: //greater_equal
+        __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */);
+        break;
+
+      case 0x05: //less
+        __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */);
+        break;
+
+      case 0x06: //less_equal
+        __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
+  match( If cmp (CmpL src1 zero) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_long" %}
+  ins_cost(150);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = R0;
+
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: //not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: //greater
+        __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */);
+        break;
+
+      case 0x04: //greater_equal
+        __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */);
+        break;
+
+      case 0x05: //less
+        __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */);
+        break;
+
+      case 0x06: //less_equal
+        __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
+  match( If cmp (CmpUL src1 zero) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_long" %}
+  ins_cost(150);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = R0;
+
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: //not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: //greater
+        __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */);
+        break;
+
+      case 0x04: //greater_equal
+        __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */);
+        break;
+
+      case 0x05: //less
+        __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */);
+        break;
+
+      case 0x06: //less_equal
+        __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
+  match( If cmp (CmpL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
+  ins_cost(180);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = AT;
+
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    __ li(opr2_reg, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: //not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: //greater
+        __ blt_long(opr2_reg, opr1_reg, *target, true /* signed */);
+        break;
+
+      case 0x04: //greater_equal
+        __ bge_long(opr1_reg, opr2_reg, *target, true /* signed */);
+        break;
+
+      case 0x05: //less
+        __ blt_long(opr1_reg, opr2_reg, *target, true /* signed */);
+        break;
+
+      case 0x06: //less_equal
+        __ bge_long(opr2_reg, opr1_reg, *target, true /* signed */);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
+  match( If cmp (CmpUL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
+  ins_cost(180);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = AT;
+
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    __ li(opr2_reg, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: //not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: //greater
+        __ blt_long(opr2_reg, opr1_reg, *target, false /* signed */);
+        break;
+
+      case 0x04: //greater_equal
+        __ bge_long(opr1_reg, opr2_reg, *target, false /* signed */);
+        break;
+
+      case 0x05: //less
+        __ blt_long(opr1_reg, opr2_reg, *target, false /* signed */);
+        break;
+
+      case 0x06: //less_equal
+        __ bge_long(opr2_reg, opr1_reg, *target, false /* signed */);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+//FIXME
+instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
+  match( If cmp (CmpF src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $src1$$FloatRegister;
+    FloatRegister reg_op2 = $src2$$FloatRegister;
+    Label*     L =  $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      case 0x02: //not_equal
+        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x03: //greater
+        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x04: //greater_equal
+        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x05: //less
+        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      case 0x06: //less_equal
+        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_slow);
+%}
+
+instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
+  match( If cmp (CmpD src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $src1$$FloatRegister;
+    FloatRegister reg_op2 = $src2$$FloatRegister;
+    Label*     L =  $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      case 0x02: //not_equal
+        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
+        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x03: //greater
+        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x04: //greater_equal
+        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x05: //less
+        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      case 0x06: //less_equal
+        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_slow);
+%}
+
+
+// ============================================================================
+// Branch Instructions -- short offset versions
+
+// Jump Direct
+instruct jmpDir_short(label labl) %{
+  match(Goto);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "JMP    $labl #@jmpDir_short" %}
+
+  ins_encode %{
+    Label &L = *($labl$$label);
+    if(&L)
+       __ b(L);
+    else
+       __ b(int(0));
+  %}
+
+    ins_pipe( pipe_jump );
+    ins_pc_relative(1);
+    ins_short_branch(1);
+%}
+
+// Jump Direct Conditional - Label defines a relative address from Jcc+1
+instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
+  match(CountedLoopEnd cop (CmpI src1 src2));
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label     &L = *($labl$$label);
+    int     flag = $cop$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      case 0x03: //above
+        if (&L)
+          __ blt(op2, op1, L);
+        else
+          __ blt(op2, op1, (int)0);
+        break;
+      case 0x04: //above_equal
+        if (&L)
+          __ bge(op1, op2, L);
+        else
+          __ bge(op1, op2, (int)0);
+        break;
+      case 0x05: //below
+        if (&L)
+          __ blt(op1, op2, L);
+        else
+          __ blt(op1, op2, (int)0);
+        break;
+      case 0x06: //below_equal
+        if (&L)
+          __ bge(op2, op1, L);
+        else
+          __ bge(op2, op1, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
+  match(CountedLoopEnd cop (CmpI src1 src2));
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = AT;
+    Label     &L = *($labl$$label);
+    int     flag = $cop$$cmpcode;
+
+    __ li(op2, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      case 0x03: //above
+        if (&L)
+          __ blt(op2, op1, L);
+        else
+          __ blt(op2, op1, (int)0);
+        break;
+      case 0x04: //above_equal
+        if (&L)
+          __ bge(op1, op2, L);
+        else
+          __ bge(op1, op2, (int)0);
+        break;
+      case 0x05: //below
+        if (&L)
+          __ blt(op1, op2, L);
+        else
+          __ blt(op1, op2, (int)0);
+        break;
+      case 0x06: //below_equal
+        if (&L)
+          __ bge(op2, op1, L);
+        else
+          __ bge(op2, op1, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+
+// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
+instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{
+  match(If cop cr);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %}
+
+  ins_encode %{
+    Label    &L =  *($labl$$label);
+    switch($cop$$cmpcode) {
+      case 0x01: //equal
+        if (&L)
+          __ bnez($cr$$Register, L);
+        else
+          __ bnez($cr$$Register, (int)0);
+        break;
+      case 0x02: //not equal
+        if (&L)
+          __ beqz($cr$$Register, L);
+        else
+          __ beqz($cr$$Register, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+// Conditional jumps
+instruct branchConP_0_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
+  match(If cmp (CmpP op1 zero));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "b$cmp   $op1, R0, $labl #@branchConP_0_short" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Label    &L  = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beqz(op1, L);
+        else
+          __ beqz(op1, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bnez(op1, L);
+        else
+          __ bnez(op1, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConN2P_0_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
+  match(If cmp (CmpP (DecodeN op1) zero));
+  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_0_short" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Label    &L  = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag)
+    {
+      case 0x01: //equal
+        if (&L)
+          __ beqz(op1, L);
+        else
+          __ beqz(op1, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bnez(op1, L);
+        else
+          __ bnez(op1, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+
+instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
+  match(If cmp (CmpP op1 op2));
+//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
+  effect(USE labl);
+
+  ins_cost(200);
+  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = $op2$$Register;
+    Label    &L  = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      case 0x03: //above
+        if (&L)
+          __ bltu(op2, op1, L);
+        else
+          __ bltu(op2, op1, (int)0);
+        break;
+      case 0x04: //above_equal
+        if (&L)
+          __ bgeu(op1, op2, L);
+        else
+          __ bgeu(op1, op2, (int)0);
+        break;
+      case 0x05: //below
+        if (&L)
+          __ bltu(op1, op2, L);
+        else
+          __ bltu(op1, op2, (int)0);
+        break;
+      case 0x06: //below_equal
+        if (&L)
+          __ bgeu(op2, op1, L);
+        else
+          __ bgeu(op2, op1, (int)0);
+       break;
+      default:
+          Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
+  match(If cmp (CmpN op1 null));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
+            "BP$cmp   $labl @ cmpN_null_branch_short" %}
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Label    &L  = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+    case 0x01: //equal
+      if (&L)
+        __ beqz(op1, L);
+      else
+        __ beqz(op1, (int)0);
+      break;
+    case 0x02: //not_equal
+      if (&L)
+        __ bnez(op1, L);
+      else
+        __ bnez(op1, (int)0);
+      break;
+    default:
+          Unimplemented();
+    }
+  %}
+//TODO: pipe_branchP or create pipe_branchN LEE
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
+  match(If cmp (CmpN op1 op2));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
+            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
+  ins_encode %{
+    Register op1_reg = $op1$$Register;
+    Register op2_reg = $op2$$Register;
+    Label    &L  = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+    case 0x01: //equal
+      if (&L)
+        __ beq(op1_reg, op2_reg, L);
+      else
+        __ beq(op1_reg, op2_reg, (int)0);
+      break;
+    case 0x02: //not_equal
+      if (&L)
+        __ bne(op1_reg, op2_reg, L);
+      else
+        __ bne(op1_reg, op2_reg, (int)0);
+      break;
+    case 0x03: //above
+      if (&L)
+        __ bltu(op2_reg, op1_reg, L);
+      else
+        __ bltu(op2_reg, op1_reg, (int)0);
+      break;
+    case 0x04: //above_equal
+      if (&L)
+        __ bgeu(op1_reg, op2_reg, L);
+      else
+        __ bgeu(op1_reg, op2_reg, (int)0);
+      break;
+    case 0x05: //below
+      if (&L)
+        __ bltu(op1_reg, op2_reg, L);
+      else
+        __ bltu(op1_reg, op2_reg, (int)0);
+      break;
+    case 0x06: //below_equal
+      if (&L)
+        __ bgeu(op2_reg, op1_reg, L);
+      else
+        __ bgeu(op2_reg, op1_reg, (int)0);
+      break;
+    default:
+      Unimplemented();
+    }
+  %}
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
+  match( If cmp (CmpU src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label     &L = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      case 0x03: //above
+        if (&L)
+          __ bltu(op2, op1, L);
+        else
+          __ bltu(op2, op1, (int)0);
+        break;
+      case 0x04: //above_equal
+        if (&L)
+          __ bgeu(op1, op2, L);
+        else
+          __ bgeu(op1, op2, (int)0);
+        break;
+      case 0x05: //below
+        if (&L)
+           __ bltu(op1, op2, L);
+        else
+           __ bltu(op1, op2, (int)0);
+        break;
+      case 0x06: //below_equal
+        if (&L)
+          __ bgeu(op2, op1, L);
+        else
+          __ bgeu(op2, op1, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+
+instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
+  match( If cmp (CmpU src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    int      val = $src2$$constant;
+    Label     &L = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    __ li(AT, val);
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, AT, L);
+        else
+          __ beq(op1, AT, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, AT, L);
+        else
+          __ bne(op1, AT, (int)0);
+        break;
+      case 0x03: //above
+        if (&L)
+          __ bltu(AT, op1, L);
+        else
+          __ bltu(AT, op1, (int)0);
+        break;
+      case 0x04: //above_equal
+        if (&L)
+          __ bgeu(op1, AT, L);
+        else
+          __ bgeu(op1, AT, (int)0);
+        break;
+      case 0x05: //below
+        if (&L)
+           __ bltu(op1, AT, L);
+        else
+           __ bltu(op1, AT, (int)0);
+        break;
+      case 0x06: //below_equal
+        if (&L)
+          __ bgeu(AT, op1, L);
+        else
+          __ bgeu(AT, op1, (int)0);
+       break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label     &L = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      case 0x03: //above
+        if (&L)
+          __ blt(op2, op1, L);
+        else
+          __ blt(op2, op1, (int)0);
+        break;
+      case 0x04: //above_equal
+        if (&L)
+          __ bge(op1, op2, L);
+        else
+          __ bge(op1, op2, (int)0);
+        break;
+      case 0x05: //below
+        if (&L)
+          __ blt(op1, op2, L);
+        else
+          __ blt(op1, op2, (int)0);
+        break;
+      case 0x06: //below_equal
+        if (&L)
+          __ bge(op2, op1, L);
+        else
+          __ bge(op2, op1, (int)0);
+       break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  ins_cost(170);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Label     &L =  *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beqz(op1, L);
+        else
+          __ beqz(op1, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bnez(op1, L);
+        else
+          __ bnez(op1, (int)0);
+        break;
+      case 0x03: //greater
+        if (&L)
+          __ blt(R0, op1, L);
+        else
+          __ blt(R0, op1, (int)0);
+        break;
+      case 0x04: //greater_equal
+        if (&L)
+          __ bge(op1, R0, L);
+        else
+          __ bge(op1, R0, (int)0);
+        break;
+      case 0x05: //less
+        if (&L)
+          __ blt(op1, R0, L);
+        else
+          __ blt(op1, R0, (int)0);
+        break;
+      case 0x06: //less_equal
+        if (&L)
+          __ bge(R0, op1, L);
+        else
+          __ bge(R0, op1, (int)0);
+       break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+
+instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  ins_cost(200);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    int      val = $src2$$constant;
+    Label     &L =  *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    __ li(AT, val);
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, AT, L);
+        else
+          __ beq(op1, AT, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, AT, L);
+        else
+          __ bne(op1, AT, (int)0);
+        break;
+      case 0x03: //greater
+        if (&L)
+          __ blt(AT, op1, L);
+        else
+          __ blt(AT, op1, (int)0);
+        break;
+      case 0x04: //greater_equal
+        if (&L)
+          __ bge(op1, AT, L);
+        else
+          __ bge(op1, AT, (int)0);
+        break;
+      case 0x05: //less
+        if (&L)
+          __ blt(op1, AT, L);
+        else
+          __ blt(op1, AT, (int)0);
+        break;
+      case 0x06: //less_equal
+        if (&L)
+          __ bge(AT, op1, L);
+        else
+          __ bge(AT, op1, (int)0);
+       break;
+      default:
+          Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
+  match( If cmp (CmpU src1 zero) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Label     &L = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beqz(op1, L);
+        else
+          __ beqz(op1, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bnez(op1, L);
+        else
+          __ bnez(op1, (int)0);
+        break;
+      case 0x03: //above
+        if (&L)
+          __ bnez(op1, L);
+        else
+          __ bnez(op1, (int)0);
+        break;
+      case 0x04: //above_equal
+        if (&L)
+          __ b(L);
+        else
+          __ b((int)0);
+        break;
+      case 0x05: //below
+        return;
+        break;
+      case 0x06: //below_equal
+        if (&L)
+          __ beqz(op1, L);
+        else
+          __ beqz(op1, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+
+instruct branchConL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
+  match( If cmp (CmpL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
+  ins_cost(250);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = as_Register($src2$$reg);
+
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&target)
+          __ beq(opr1_reg, opr2_reg, target);
+        else
+          __ beq(opr1_reg, opr2_reg, (int)0);
+        break;
+      case 0x02: //not_equal
+        if(&target)
+          __ bne(opr1_reg, opr2_reg, target);
+        else
+          __ bne(opr1_reg, opr2_reg, (int)0);
+        break;
+      case 0x03: //greater
+        if (&target)
+          __ blt(opr2_reg, opr1_reg, target);
+        else
+          __ blt(opr2_reg, opr1_reg, (int)0);
+        break;
+      case 0x04: //greater_equal
+        if (&target)
+          __ bge(opr1_reg, opr2_reg, target);
+        else
+          __ bge(opr1_reg, opr2_reg, (int)0);
+        break;
+      case 0x05: //less
+        if (&target)
+          __ blt(opr1_reg, opr2_reg, target);
+        else
+          __ blt(opr1_reg, opr2_reg, (int)0);
+        break;
+      case 0x06: //less_equal
+        if (&target)
+          __ bge(opr2_reg, opr1_reg, target);
+        else
+          __ bge(opr2_reg, opr1_reg, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConUL_regL_regL_short(cmpOp cmp, mRegLorI2L src1, mRegLorI2L src2, label labl) %{
+  match( If cmp (CmpUL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
+  ins_cost(250);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = as_Register($src2$$reg);
+
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&target)
+          __ beq(opr1_reg, opr2_reg, target);
+        else
+          __ beq(opr1_reg, opr2_reg, (int)0);
+        break;
+      case 0x02: //not_equal
+        if(&target)
+          __ bne(opr1_reg, opr2_reg, target);
+        else
+          __ bne(opr1_reg, opr2_reg, (int)0);
+        break;
+      case 0x03: //greater
+        if (&target)
+          __ bltu(opr2_reg, opr1_reg, target);
+        else
+          __ bltu(opr2_reg, opr1_reg, (int)0);
+        break;
+      case 0x04: //greater_equal
+        if (&target)
+          __ bgeu(opr1_reg, opr2_reg, target);
+        else
+          __ bgeu(opr1_reg, opr2_reg, (int)0);
+        break;
+      case 0x05: //less
+        if (&target)
+          __ bltu(opr1_reg, opr2_reg, target);
+        else
+          __ bltu(opr1_reg, opr2_reg, (int)0);
+        break;
+      case 0x06: //less_equal
+        if (&target)
+          __ bgeu(opr2_reg, opr1_reg, target);
+        else
+          __ bgeu(opr2_reg, opr1_reg, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
+  match( If cmp (CmpL src1 zero) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_short" %}
+  ins_cost(150);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&target)
+           __ beqz(opr1_reg, target);
+        else
+           __ beqz(opr1_reg, int(0));
+        break;
+
+      case 0x02: //not_equal
+        if (&target)
+           __ bnez(opr1_reg, target);
+        else
+           __ bnez(opr1_reg, (int)0);
+        break;
+
+      case 0x03: //greater
+        if (&target)
+           __ blt(R0, opr1_reg, target);
+        else
+           __ blt(R0, opr1_reg, (int)0);
+       break;
+
+      case 0x04: //greater_equal
+        if (&target)
+           __ bge(opr1_reg, R0, target);
+        else
+           __ bge(opr1_reg, R0, (int)0);
+        break;
+
+      case 0x05: //less
+        if (&target)
+           __ blt(opr1_reg, R0, target);
+        else
+           __ blt(opr1_reg, R0, (int)0);
+        break;
+
+      case 0x06: //less_equal
+        if (&target)
+           __ bge(R0, opr1_reg, target);
+        else
+           __ bge(R0, opr1_reg, int(0));
+        break;
+
+      default:
+          Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
+  match( If cmp (CmpUL src1 zero) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_short" %}
+  ins_cost(150);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&target)
+           __ beqz(opr1_reg, target);
+        else
+           __ beqz(opr1_reg, int(0));
+        break;
+
+      case 0x02: //not_equal
+        if (&target)
+           __ bnez(opr1_reg, target);
+        else
+           __ bnez(opr1_reg, (int)0);
+        break;
+
+      case 0x03: //greater
+        if (&target)
+           __ bltu(R0, opr1_reg, target);
+        else
+           __ bltu(R0, opr1_reg, (int)0);
+       break;
+
+      case 0x04: //greater_equal
+        if (&target)
+           __ bgeu(opr1_reg, R0, target);
+        else
+           __ bgeu(opr1_reg, R0, (int)0);
+        break;
+
+      case 0x05: //less
+        if (&target)
+           __ bltu(opr1_reg, R0, target);
+        else
+           __ bltu(opr1_reg, R0, (int)0);
+        break;
+
+      case 0x06: //less_equal
+        if (&target)
+           __ bgeu(R0, opr1_reg, target);
+        else
+           __ bgeu(R0, opr1_reg, int(0));
+        break;
+
+      default:
+          Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
+  match( If cmp (CmpL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
+  ins_cost(180);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = AT;
+
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    __ li(opr2_reg, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&target)
+          __ beq(opr1_reg, opr2_reg, target);
+        else
+          __ beq(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x02: //not_equal
+        if(&target)
+          __ bne(opr1_reg, opr2_reg, target);
+        else
+          __ bne(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x03: //greater
+        if (&target)
+          __ blt(opr2_reg, opr1_reg, target);
+        else
+          __ blt(opr2_reg, opr1_reg, (int)0);
+        break;
+
+      case 0x04: //greater_equal
+        if (&target)
+          __ bge(opr1_reg, opr2_reg, target);
+        else
+          __ bge(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x05: //less
+        if (&target)
+          __ blt(opr1_reg, opr2_reg, target);
+        else
+          __ blt(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x06: //less_equal
+        if (&target)
+          __ bge(opr2_reg, opr1_reg, target);
+        else
+          __ bge(opr2_reg, opr1_reg, (int)0);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
+  match( If cmp (CmpUL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
+  ins_cost(180);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = AT;
+
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    __ li(opr2_reg, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&target)
+          __ beq(opr1_reg, opr2_reg, target);
+        else
+          __ beq(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x02: //not_equal
+        if(&target)
+          __ bne(opr1_reg, opr2_reg, target);
+        else
+          __ bne(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x03: //greater
+        if (&target)
+          __ bltu(opr2_reg, opr1_reg, target);
+        else
+          __ bltu(opr2_reg, opr1_reg, (int)0);
+        break;
+
+      case 0x04: //greater_equal
+        if (&target)
+          __ bgeu(opr1_reg, opr2_reg, target);
+        else
+          __ bgeu(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x05: //less
+        if (&target)
+          __ bltu(opr1_reg, opr2_reg, target);
+        else
+          __ bltu(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x06: //less_equal
+        if (&target)
+          __ bgeu(opr2_reg, opr1_reg, target);
+        else
+          __ bgeu(opr2_reg, opr1_reg, (int)0);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+//FIXME
+instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
+  match( If cmp (CmpF src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $src1$$FloatRegister;
+    FloatRegister reg_op2 = $src2$$FloatRegister;
+    Label     &L =  *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
+        if (&L)
+          __ bcnez(FCC0, L);
+        else
+          __ bcnez(FCC0, (int)0);
+        break;
+      case 0x02: //not_equal
+        __ fcmp_ceq_s(FCC0, reg_op1, reg_op2);
+        if (&L)
+          __ bceqz(FCC0, L);
+        else
+          __ bceqz(FCC0, (int)0);
+        break;
+      case 0x03: //greater
+        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
+        if(&L)
+          __ bceqz(FCC0, L);
+        else
+          __ bceqz(FCC0, (int)0);
+        break;
+      case 0x04: //greater_equal
+        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
+        if(&L)
+          __ bceqz(FCC0, L);
+        else
+          __ bceqz(FCC0, (int)0);
+        break;
+      case 0x05: //less
+        __ fcmp_cult_s(FCC0, reg_op1, reg_op2);
+        if(&L)
+          __ bcnez(FCC0, L);
+        else
+          __ bcnez(FCC0, (int)0);
+        break;
+      case 0x06: //less_equal
+        __ fcmp_cule_s(FCC0, reg_op1, reg_op2);
+        if(&L)
+          __ bcnez(FCC0, L);
+        else
+          __ bcnez(FCC0, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_fpu_branch);
+  ins_short_branch(1);
+%}
+
+instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
+  match( If cmp (CmpD src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $src1$$FloatRegister;
+    FloatRegister reg_op2 = $src2$$FloatRegister;
+    Label     &L =  *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
+        if (&L)
+          __ bcnez(FCC0, L);
+        else
+          __ bcnez(FCC0, (int)0);
+        break;
+      case 0x02: //not_equal
+        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
+        __ fcmp_ceq_d(FCC0, reg_op1, reg_op2);
+        if (&L)
+          __ bceqz(FCC0, L);
+        else
+          __ bceqz(FCC0, (int)0);
+        break;
+      case 0x03: //greater
+        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
+        if(&L)
+          __ bceqz(FCC0, L);
+        else
+          __ bceqz(FCC0, (int)0);
+        break;
+      case 0x04: //greater_equal
+        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
+        if(&L)
+          __ bceqz(FCC0, L);
+        else
+          __ bceqz(FCC0, (int)0);
+        break;
+      case 0x05: //less
+        __ fcmp_cult_d(FCC0, reg_op1, reg_op2);
+        if(&L)
+          __ bcnez(FCC0, L);
+        else
+          __ bcnez(FCC0, (int)0);
+        break;
+      case 0x06: //less_equal
+        __ fcmp_cule_d(FCC0, reg_op1, reg_op2);
+        if(&L)
+          __ bcnez(FCC0, L);
+        else
+          __ bcnez(FCC0, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_fpu_branch);
+  ins_short_branch(1);
+%}
+
+// =================== End of branch instructions ==========================
+
+// Call Runtime Instruction
+instruct CallRuntimeDirect(method meth) %{
+  match(CallRuntime );
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL,runtime #@CallRuntimeDirect" %}
+  ins_encode( Java_To_Runtime( meth ) );
+  ins_pipe( pipe_slow );
+  ins_alignment(4);
+%}
+
+
+
+//------------------------MemBar Instructions-------------------------------
+//Memory barrier flavors
+
+instruct membar_acquire() %{
+  match(MemBarAcquire);
+  ins_cost(400);
+
+  format %{ "MEMBAR-acquire @ membar_acquire" %}
+  ins_encode %{
+    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
+  %}
+  ins_pipe(empty);
+%}
+
+instruct load_fence() %{
+  match(LoadFence);
+  ins_cost(400);
+
+  format %{ "MEMBAR @ load_fence" %}
+  ins_encode %{
+    __ membar(Assembler::Membar_mask_bits(__ LoadLoad|__ LoadStore));
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct membar_acquire_lock()
+%{
+  match(MemBarAcquireLock);
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
+  ins_encode();
+  ins_pipe(empty);
+%}
+
+instruct membar_release() %{
+  match(MemBarRelease);
+  ins_cost(400);
+
+  format %{ "MEMBAR-release @ membar_release" %}
+
+  ins_encode %{
+    // Attention: DO NOT DELETE THIS GUY!
+    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct store_fence() %{
+  match(StoreFence);
+  ins_cost(400);
+
+  format %{ "MEMBAR @ store_fence" %}
+
+  ins_encode %{
+    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore));
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct membar_release_lock()
+%{
+  match(MemBarReleaseLock);
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
+  ins_encode();
+  ins_pipe(empty);
+%}
+
+
+instruct membar_volatile() %{
+  match(MemBarVolatile);
+  ins_cost(400);
+
+  format %{ "MEMBAR-volatile" %}
+  ins_encode %{
+    if( !os::is_MP() ) return;     // Not needed on single CPU
+    __ membar(__ StoreLoad);
+
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct unnecessary_membar_volatile() %{
+  match(MemBarVolatile);
+  predicate(Matcher::post_store_load_barrier(n));
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+
+  ins_cost(400);
+  format %{ "MEMBAR-storestore @ membar_storestore" %}
+  ins_encode %{
+    __ membar(__ StoreStore);
+  %}
+  ins_pipe(empty);
+%}
+
+//----------Move Instructions--------------------------------------------------
+instruct castX2P(mRegP dst, mRegL src) %{
+  match(Set dst (CastX2P src));
+  format %{ "castX2P  $dst, $src @ castX2P" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+  if(src != dst)
+    __ move(dst, src);
+  %}
+  ins_cost(10);
+  ins_pipe( ialu_regI_mov );
+%}
+
+instruct castP2X(mRegL dst, mRegP src ) %{
+  match(Set dst (CastP2X src));
+
+  format %{ "mov    $dst, $src\t  #@castP2X" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+  if(src != dst)
+    __ move(dst, src);
+  %}
+  ins_pipe( ialu_regI_mov );
+%}
+
+instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
+  match(Set dst (MoveF2I src));
+  effect(DEF dst, USE src);
+  ins_cost(85);
+  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+
+    __ movfr2gr_s(dst, src);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
+  match(Set dst (MoveI2F src));
+  effect(DEF dst, USE src);
+  ins_cost(85);
+  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
+  ins_encode %{
+    Register src = as_Register($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ movgr2fr_w(dst, src);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
+  match(Set dst (MoveD2L src));
+  effect(DEF dst, USE src);
+  ins_cost(85);
+  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+
+    __ movfr2gr_d(dst, src);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
+  match(Set dst (MoveL2D src));
+  effect(DEF dst, USE src);
+  ins_cost(85);
+  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
+  ins_encode %{
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    __ movgr2fr_d(dst, src);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+//----------Conditional Move---------------------------------------------------
+// Conditional move
+instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovI_cmpI_reg_reg"
+         %}
+
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
+  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
+  effect(TEMP tmp3, TEMP tmp4);
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
+         %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $tmp1$$FloatRegister;
+    FloatRegister reg_op2 = $tmp2$$FloatRegister;
+    FloatRegister tmp1 = $tmp3$$FloatRegister;
+    FloatRegister tmp2 = $tmp4$$FloatRegister;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
+  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
+  effect(TEMP tmp3, TEMP tmp4);
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
+         %}
+  ins_encode %{
+    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
+    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
+    FloatRegister tmp1 = $tmp3$$FloatRegister;
+    FloatRegister tmp2 = $tmp4$$FloatRegister;
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = $dst$$Register;
+    Register src  = $src$$Register;
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = $dst$$Register;
+    Register src  = $src$$Register;
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = $dst$$Register;
+    Register src  = $src$$Register;
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegLorI2L tmp1, mRegLorI2L tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = $dst$$Register;
+    Register src  = $src$$Register;
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
+  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
+  effect(TEMP tmp3, TEMP tmp4);
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
+         %}
+  ins_encode %{
+    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
+    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
+    FloatRegister tmp1 = $tmp3$$FloatRegister;
+    FloatRegister tmp2 = $tmp4$$FloatRegister;
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+    Label L;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
+  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = $dst$$Register;
+    Register src  = $src$$Register;
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
+  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = $dst$$Register;
+    Register src  = $src$$Register;
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
+  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
+  effect(TEMP tmp3, TEMP tmp4);
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
+         %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $tmp1$$FloatRegister;
+    FloatRegister reg_op2 = $tmp2$$FloatRegister;
+    FloatRegister tmp1 = $tmp3$$FloatRegister;
+    FloatRegister tmp2 = $tmp4$$FloatRegister;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
+         %}
+
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovL_cmpL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = as_Register($dst$$reg);
+    Register src  = as_Register($src$$reg);
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovL_cmpUL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = as_Register($dst$$reg);
+    Register src  = as_Register($src$$reg);
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
+  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
+  effect(TEMP tmp3, TEMP tmp4);
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
+         %}
+  ins_encode %{
+    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
+    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
+    FloatRegister tmp1 = $tmp3$$FloatRegister;
+    FloatRegister tmp2 = $tmp4$$FloatRegister;
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, false /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
+  ins_cost(200);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
+         %}
+  ins_encode %{
+    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
+    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+    int flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
+  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  effect(TEMP tmp3, TEMP tmp4);
+  ins_cost(200);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
+             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
+         %}
+
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
+    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
+  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  effect(TEMP tmp3, TEMP tmp4);
+  ins_cost(200);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
+             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
+         %}
+
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
+    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop, regF tmp3, regF tmp4) %{
+  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
+  effect(TEMP tmp3, TEMP tmp4);
+  ins_cost(200);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
+             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
+         %}
+
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister tmp1 = as_FloatRegister($tmp3$$reg);
+    FloatRegister tmp2 = as_FloatRegister($tmp4$$reg);
+    int     flag = $cop$$cmpcode;
+
+    // Use signed comparison here, because the most significant bit of the
+    // user-space virtual address must be 0.
+    __ cmp_cmov(op1, op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+//FIXME
+instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop, regD tmp3, regD tmp4) %{
+  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
+  effect(TEMP tmp3, TEMP tmp4);
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
+         %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $tmp1$$FloatRegister;
+    FloatRegister reg_op2 = $tmp2$$FloatRegister;
+    FloatRegister tmp1 = $tmp3$$FloatRegister;
+    FloatRegister tmp2 = $tmp4$$FloatRegister;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, tmp1, tmp2, (MacroAssembler::CMCompare) flag, true /* is_float */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
+  ins_cost(200);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
+         %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $tmp1$$FloatRegister;
+    FloatRegister reg_op2 = $tmp2$$FloatRegister;
+    FloatRegister dst = $dst$$FloatRegister;
+    FloatRegister src = $src$$FloatRegister;
+    int flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Manifest a CmpL result in an integer register.  Very painful.
+// This is the test to avoid.
+instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
+  match(Set dst (CmpL3 src1 src2));
+  ins_cost(1000);
+  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
+  ins_encode %{
+    Register opr1 = as_Register($src1$$reg);
+    Register opr2 = as_Register($src2$$reg);
+    Register dst  = as_Register($dst$$reg);
+
+    __ slt(AT, opr1, opr2);
+    __ slt(dst, opr2, opr1);
+    __ sub_d(dst, dst, AT);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+//
+// less_rsult     = -1
+// greater_result =  1
+// equal_result   =  0
+// nan_result     = -1
+//
+instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
+  match(Set dst (CmpF3 src1 src2));
+  ins_cost(1000);
+  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    Register dst = as_Register($dst$$reg);
+
+    __ fcmp_clt_s(FCC0, src2, src1);
+    __ fcmp_cult_s(FCC1, src1, src2);
+    __ movcf2gr(dst, FCC0);
+    __ movcf2gr(AT, FCC1);
+    __ sub_d(dst, dst, AT);
+
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
+  match(Set dst (CmpD3 src1 src2));
+  ins_cost(1000);
+  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    Register dst = as_Register($dst$$reg);
+
+    __ fcmp_clt_d(FCC0, src2, src1);
+    __ fcmp_cult_d(FCC1, src1, src2);
+    __ movcf2gr(dst, FCC0);
+    __ movcf2gr(AT, FCC1);
+    __ sub_d(dst, dst, AT);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{
+  match(Set dummy (ClearArray cnt base));
+  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
+  ins_encode %{
+    //Assume cnt is the number of bytes in an array to be cleared,
+    //and base points to the starting address of the array.
+    Register base = $base$$Register;
+    Register num  = $cnt$$Register;
+    Label Loop, done;
+
+    __ add_d(AT, base, R0);
+    __ beq(num, R0, done);
+
+    __ move(T4, num);  /* T4 = words */
+
+    __ bind(Loop);
+    __ st_d(R0, AT, 0);
+    __ addi_d(T4, T4, -1);
+    __ addi_d(AT, AT, wordSize);
+    __ bne(T4, R0, Loop);
+
+    __ bind(done);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_compare(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
+
+  format %{ "String Compare $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compare" %}
+  ins_encode %{
+    // Get the first character position in both strings
+    //         [8] char array, [12] offset, [16] count
+    Register str1   = $str1$$Register;
+    Register str2   = $str2$$Register;
+    Register cnt1   = $cnt1$$Register;
+    Register cnt2   = $cnt2$$Register;
+    Register result = $result$$Register;
+
+    Label L, Loop, haveResult, done;
+
+   // compute the and difference of lengths (in result)
+   __ sub_d(result, cnt1, cnt2); // result holds the difference of two lengths
+
+   // compute the shorter length (in cnt1)
+   __ bge(cnt2, cnt1, Loop);
+   __ move(cnt1, cnt2);
+
+   // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
+   __ bind(Loop);                        // Loop begin
+   __ beq(cnt1, R0, done);
+   __ ld_hu(AT, str1, 0);
+   // compare current character
+   __ ld_hu(cnt2, str2, 0);
+   __ addi_d(str1, str1, 2);
+   __ bne(AT, cnt2, haveResult);
+   __ addi_d(str2, str2, 2);
+   __ addi_d(cnt1, cnt1, -1);  // Loop end
+   __ b(Loop);
+
+   __ bind(haveResult);
+   __ sub_d(result, AT, cnt2);
+
+   __ bind(done);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+// intrinsic optimization
+instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp);
+
+  format %{ "String Equal $str1, $str2, len:$cnt  tmp:$temp -> $result @ string_equals" %}
+  ins_encode %{
+    // Get the first character position in both strings
+    //         [8] char array, [12] offset, [16] count
+    Register str1   = $str1$$Register;
+    Register str2   = $str2$$Register;
+    Register cnt    = $cnt$$Register;
+    Register tmp    = $temp$$Register;
+    Register result = $result$$Register;
+
+    Label Loop, True, False;
+
+    __ addi_d(result, R0, 1);
+    __ beq(str1, str2, True);  // same char[] ?
+
+    __ beq(cnt, R0, True);
+
+    __ bind(Loop);
+
+    // compare current character
+    __ ld_hu(AT, str1, 0);
+    __ ld_hu(tmp, str2, 0);
+    __ addi_d(str1, str1, 2);
+    __ bne(AT, tmp, False);
+    __ addi_d(cnt, cnt, -1);
+    __ addi_d(str2, str2, 2);
+    __ bne(cnt, R0, Loop);
+
+    __ b(True);
+
+    __ bind(False);
+    __ addi_d(result, R0, 0);
+
+    __ bind(True);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+//----------Arithmetic Instructions-------------------------------------------
+//----------Addition Instructions---------------------------------------------
+instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (AddI src1 src2));
+
+  format %{ "add   $dst, $src1, $src2 #@addI_Reg_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ add_w(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct addI_Reg_imm(mRegI dst, mRegI src1,  immI12 src2) %{
+  match(Set dst (AddI src1 src2));
+
+  format %{ "add    $dst, $src1, $src2 #@addI_Reg_imm12" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    int       imm = $src2$$constant;
+
+    __ addi_w(dst, src1, imm);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct addI_salI_Reg_Reg_immI_1_4(mRegI dst, mRegI src1, mRegI src2, immI_1_4 shift) %{
+  match(Set dst (AddI src1 (LShiftI src2 shift)));
+
+  format %{ "alsl    $dst, $src1, $src2, $shift #@addI_salI_Reg_Reg_immI_1_4" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    int        sh = $shift$$constant;
+    __ alsl_w(dst, src2, src1, sh - 1);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{
+  match(Set dst (AddP src1 src2));
+
+  format %{ "ADD    $dst, $src1, $src2 #@addP_reg_reg" %}
+
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ add_d(dst, src1, src2);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct addP_reg_reg_M8(mRegP dst, mRegP src1, mRegLorI2L src2, immL_M8 M8) %{
+  match(Set dst (AddP src1 (AndL src2 M8)));
+  format %{ "ADD    $dst, $src1, $src2 #@addP_reg_reg_M8" %}
+
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ bstrins_d(src2, R0, 2, 0);
+    __ add_d(dst, src1, src2);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct addP_reg_imm12(mRegP dst, mRegP src1,  immL12 src2) %{
+  match(Set dst (AddP src1 src2));
+
+  format %{ "ADD   $dst, $src1, $src2 #@addP_reg_imm12" %}
+  ins_encode %{
+    Register src1 = $src1$$Register;
+    long     src2 = $src2$$constant;
+    Register  dst = $dst$$Register;
+
+    __ addi_d(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_imm16 );
+%}
+
+instruct addP_salL_Reg_RegI2L_immI_1_4(mRegP dst, mRegP src1, mRegI src2, immI_1_4 shift) %{
+  match(Set dst (AddP src1 (LShiftL (ConvI2L src2) shift)));
+
+  format %{ "alsl    $dst, $src1, $src2, $shift #@addP_salL_Reg_RegI2L_immI_1_4" %}
+
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    int        sh = $shift$$constant;
+    __ alsl_d(dst, src2, src1, sh - 1);
+  %}
+
+  ins_pipe(ialu_regI_regI);
+%}
+
+// Add Long Register with Register
+instruct addL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
+  match(Set dst (AddL src1 src2));
+  ins_cost(200);
+  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
+
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+
+    __ add_d(dst_reg, src1_reg, src2_reg);
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct addL_Reg_imm(mRegL dst, mRegLorI2L src1, immL12 src2)
+%{
+  match(Set dst (AddL src1 src2));
+
+  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
+  ins_encode %{
+    Register dst_reg  = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    int      src2_imm = $src2$$constant;
+
+    __ addi_d(dst_reg, src1_reg, src2_imm);
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+//----------Subtraction Instructions-------------------------------------------
+// Integer Subtraction Instructions
+instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (SubI src1 src2));
+  ins_cost(100);
+
+  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ sub_w(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct subI_Reg_immI_M2047_2048(mRegI dst, mRegI src1,  immI_M2047_2048 src2) %{
+  match(Set dst (SubI src1 src2));
+  ins_cost(80);
+
+  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_immI_M2047_2048" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    __ addi_w(dst, src1, -1 * $src2$$constant);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct negI_Reg(mRegI dst, immI_0 zero,  mRegI src) %{
+  match(Set dst (SubI zero src));
+  ins_cost(80);
+
+  format %{ "neg    $dst, $src #@negI_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register  src = $src$$Register;
+    __ sub_w(dst, R0, src);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct negL_Reg(mRegL dst, immL_0 zero,  mRegLorI2L src) %{
+  match(Set dst (SubL zero src));
+  ins_cost(80);
+
+  format %{ "neg    $dst, $src #@negL_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register  src = $src$$Register;
+    __ sub_d(dst, R0, src);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct subL_Reg_immL_M2047_2048(mRegL dst, mRegL src1,  immL_M2047_2048 src2) %{
+  match(Set dst (SubL src1 src2));
+  ins_cost(80);
+
+  format %{ "sub    $dst, $src1, $src2 #@subL_Reg_immL_M2047_2048" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    __ addi_d(dst, src1, -1 * $src2$$constant);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Subtract Long Register with Register.
+instruct subL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
+  match(Set dst (SubL src1 src2));
+  ins_cost(100);
+  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
+  ins_encode %{
+    Register dst  = as_Register($dst$$reg);
+    Register src1 = as_Register($src1$$reg);
+    Register src2 = as_Register($src2$$reg);
+
+    __ sub_d(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Integer MOD with Register
+instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (ModI src1 src2));
+  ins_cost(300);
+  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ mod_w(dst, src1, src2);
+  %}
+
+  //ins_pipe( ialu_mod );
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct modL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
+  match(Set dst (ModL src1 src2));
+  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
+
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register op1 = as_Register($src1$$reg);
+    Register op2 = as_Register($src2$$reg);
+
+    __ mod_d(dst, op1, op2);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (MulI src1 src2));
+
+  ins_cost(300);
+  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
+  ins_encode %{
+     Register src1 = $src1$$Register;
+     Register src2 = $src2$$Register;
+     Register dst  = $dst$$Register;
+
+     __ mul_w(dst, src1, src2);
+  %}
+  ins_pipe( ialu_mult );
+%}
+
+instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (DivI src1 src2));
+
+  ins_cost(300);
+  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
+  ins_encode %{
+     Register src1 = $src1$$Register;
+     Register src2 = $src2$$Register;
+     Register dst  = $dst$$Register;
+
+    __ div_w(dst, src1, src2);
+
+  %}
+  ins_pipe( ialu_mod );
+%}
+
+instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (DivF src1 src2));
+
+  ins_cost(300);
+  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
+  ins_encode %{
+     FloatRegister src1 = $src1$$FloatRegister;
+     FloatRegister src2 = $src2$$FloatRegister;
+     FloatRegister dst  = $dst$$FloatRegister;
+
+    __ fdiv_s(dst, src1, src2);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (DivD src1 src2));
+
+  ins_cost(300);
+  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
+  ins_encode %{
+     FloatRegister src1 = $src1$$FloatRegister;
+     FloatRegister src2 = $src2$$FloatRegister;
+     FloatRegister dst  = $dst$$FloatRegister;
+
+    __ fdiv_d(dst, src1, src2);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mulL_reg_reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
+  match(Set dst (MulL src1 src2));
+  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register op1 = as_Register($src1$$reg);
+    Register op2 = as_Register($src2$$reg);
+
+    __ mul_d(dst, op1, op2);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mulHiL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (MulHiL src1 src2));
+  format %{ "mulHiL  $dst, $src1, $src2 @mulL_reg_reg" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register op1 = as_Register($src1$$reg);
+    Register op2 = as_Register($src2$$reg);
+
+    __ mulh_d(dst, op1, op2);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (DivL src1 src2));
+  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
+
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register op1 = as_Register($src1$$reg);
+    Register op2 = as_Register($src2$$reg);
+
+    __ div_d(dst, op1, op2);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (AddF src1 src2));
+  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    FloatRegister dst  = as_FloatRegister($dst$$reg);
+
+    __ fadd_s(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (SubF src1 src2));
+  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    FloatRegister dst  = as_FloatRegister($dst$$reg);
+
+    __ fsub_s(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (AddD src1 src2));
+  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    FloatRegister dst  = as_FloatRegister($dst$$reg);
+
+    __ fadd_d(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (SubD src1 src2));
+  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    FloatRegister dst  = as_FloatRegister($dst$$reg);
+
+    __ fsub_d(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct negF_reg(regF dst, regF src) %{
+  match(Set dst (NegF src));
+  format %{ "negF  $dst, $src @negF_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ fneg_s(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct negD_reg(regD dst, regD src) %{
+  match(Set dst (NegD src));
+  format %{ "negD  $dst, $src @negD_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ fneg_d(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+
+instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (MulF src1 src2));
+  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = $src1$$FloatRegister;
+    FloatRegister src2 = $src2$$FloatRegister;
+    FloatRegister dst  = $dst$$FloatRegister;
+
+    __ fmul_s(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
+  match(Set dst (AddF (MulF src1 src2) src3));
+  // For compatibility reason (e.g. on the Loongson platform), disable this guy.
+  ins_cost(44444);
+  format %{ "maddF  $dst, $src1, $src2, $src3 @maddF_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = $src1$$FloatRegister;
+    FloatRegister src2 = $src2$$FloatRegister;
+    FloatRegister src3 = $src3$$FloatRegister;
+    FloatRegister dst  = $dst$$FloatRegister;
+
+    __ fmadd_s(dst, src1, src2, src3);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+// Mul two double precision floating piont number
+instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (MulD src1 src2));
+  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = $src1$$FloatRegister;
+    FloatRegister src2 = $src2$$FloatRegister;
+    FloatRegister dst  = $dst$$FloatRegister;
+
+    __ fmul_d(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
+  match(Set dst (AddD (MulD src1 src2) src3));
+  // For compatibility reason (e.g. on the Loongson platform), disable this guy.
+  ins_cost(44444);
+  format %{ "maddD  $dst, $src1, $src2, $src3 @maddD_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = $src1$$FloatRegister;
+    FloatRegister src2 = $src2$$FloatRegister;
+    FloatRegister src3 = $src3$$FloatRegister;
+    FloatRegister dst  = $dst$$FloatRegister;
+
+    __ fmadd_d(dst, src1, src2, src3);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct absF_reg(regF dst, regF src) %{
+  match(Set dst (AbsF src));
+  ins_cost(100);
+  format %{ "absF  $dst, $src @absF_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ fabs_s(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+
+// intrinsics for math_native.
+// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
+
+instruct absD_reg(regD dst, regD src) %{
+  match(Set dst (AbsD src));
+  ins_cost(100);
+  format %{ "absD  $dst, $src @absD_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ fabs_d(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct sqrtD_reg(regD dst, regD src) %{
+  match(Set dst (SqrtD src));
+  ins_cost(100);
+  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ fsqrt_d(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct sqrtF_reg(regF dst, regF src) %{
+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+  ins_cost(100);
+  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ fsqrt_s(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+//----------------------------------Logical Instructions----------------------
+//__________________________________Integer Logical Instructions-------------
+
+//And Instuctions
+// And Register with Immediate
+instruct andI_Reg_imm_0_4095(mRegI dst, mRegI src1,  immI_0_4095 src2) %{
+  match(Set dst (AndI src1 src2));
+  ins_cost(60);
+
+  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_4095" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int      val = $src2$$constant;
+
+    __ andi(dst, src, val);
+
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
+  match(Set dst (AndI src1 mask));
+  ins_cost(60);
+
+  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int     size = Assembler::is_int_mask($mask$$constant);
+
+    __ bstrpick_w(dst, src, size-1, 0);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
+  match(Set dst (AndL src1 mask));
+  ins_cost(60);
+
+  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int     size = Assembler::is_jlong_mask($mask$$constant);
+
+    __ bstrpick_d(dst, src, size-1, 0);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct xorI_Reg_imm_0_4095(mRegI dst, mRegI src1,  immI_0_4095 src2) %{
+  match(Set dst (XorI src1 src2));
+  ins_cost(60);
+
+  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_4095" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int      val = $src2$$constant;
+
+       __ xori(dst, src, val);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1,  immI_M1 M1) %{
+  match(Set dst (XorI src1 M1));
+  ins_cost(60);
+
+  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+
+    __ orn(dst, R0, src);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1,  immI_M1 M1) %{
+  match(Set dst (XorI (ConvL2I src1) M1));
+  ins_cost(60);
+
+  format %{ "xor  $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+
+    __ orn(dst, R0, src);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct xorL_Reg_imm_0_4095(mRegL dst, mRegL src1,  immL_0_4095 src2) %{
+  match(Set dst (XorL src1 src2));
+  ins_cost(60);
+
+  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_4095" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int      val = $src2$$constant;
+
+    __ xori(dst, src, val);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+
+instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
+  match(Set dst (AndI mask (LoadB mem)));
+  ins_cost(60);
+
+  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
+  match(Set dst (AndI (LoadB mem) mask));
+  ins_cost(60);
+
+  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
+  match(Set dst (AndI src1 src2));
+
+  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ andr(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
+  match(Set dst (AndI src1 (XorI src2 M1)));
+
+  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ andn(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
+  match(Set dst (OrI src1 (XorI src2 M1)));
+
+  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ orn(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
+  match(Set dst (AndI (XorI src1 M1) src2));
+
+  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ andn(dst, src2, src1);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
+  match(Set dst (OrI (XorI src1 M1) src2));
+
+  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ orn(dst, src2, src1);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+// And Long Register with Register
+instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegLorI2L src2) %{
+  match(Set dst (AndL src1 src2));
+  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+
+    __ andr(dst_reg, src1_reg, src2_reg);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct andL_Reg_imm_0_4095(mRegL dst, mRegL src1,  immL_0_4095 src2) %{
+  match(Set dst (AndL src1 src2));
+  ins_cost(60);
+
+  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_4095" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    long     val = $src2$$constant;
+
+    __ andi(dst, src, val);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL2I_Reg_imm_0_4095(mRegI dst, mRegL src1,  immL_0_4095 src2) %{
+  match(Set dst (ConvL2I (AndL src1 src2)));
+  ins_cost(60);
+
+  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_4095" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    long     val = $src2$$constant;
+
+    __ andi(dst, src, val);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+
+instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
+  match(Set dst (AndL dst M8));
+  ins_cost(60);
+
+  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+
+    __ bstrins_d(dst, R0, 2, 0);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
+  match(Set dst (AndL dst M5));
+  ins_cost(60);
+
+  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+
+    __ bstrins_d(dst, R0, 2, 2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
+  match(Set dst (AndL dst M7));
+  ins_cost(60);
+
+  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+
+    __ bstrins_d(dst, R0, 2, 1);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
+  match(Set dst (AndL dst M4));
+  ins_cost(60);
+
+  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+
+    __ bstrins_d(dst, R0, 1, 0);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
+  match(Set dst (AndL dst M121));
+  ins_cost(60);
+
+  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+
+    __ bstrins_d(dst, R0, 6, 3);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Or Long Register with Register
+instruct orL_Reg_Reg(mRegL dst, mRegLorI2L src1, mRegLorI2L src2) %{
+  match(Set dst (OrL src1 src2));
+  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
+  ins_encode %{
+    Register dst_reg  = $dst$$Register;
+    Register src1_reg = $src1$$Register;
+    Register src2_reg = $src2$$Register;
+
+    __ orr(dst_reg, src1_reg, src2_reg);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegLorI2L src2) %{
+  match(Set dst (OrL (CastP2X src1) src2));
+  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
+  ins_encode %{
+    Register dst_reg  = $dst$$Register;
+    Register src1_reg = $src1$$Register;
+    Register src2_reg = $src2$$Register;
+
+    __ orr(dst_reg, src1_reg, src2_reg);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Xor Long Register with Register
+instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (XorL src1 src2));
+  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+
+    __ xorr(dst_reg, src1_reg, src2_reg);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Shift Left by 5-bit immediate
+instruct salI_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
+  match(Set dst (LShiftI src shift));
+
+  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int    shamt = $shift$$constant;
+
+    __ slli_w(dst, src, shamt);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct salL2I_Reg_imm(mRegI dst, mRegL src, immIU5 shift) %{
+  match(Set dst (LShiftI (ConvL2I src) shift));
+
+  format %{ "SHL    $dst, $src, $shift #@salL2I_Reg_imm" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int    shamt = $shift$$constant;
+
+    __ slli_w(dst, src, shamt);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
+  match(Set dst (AndI (LShiftI src shift) mask));
+
+  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+    __ slli_w(dst, src, 16);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
+%{
+  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
+
+  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+    __ andi(dst, src, 7);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
+// This idiom is used by the compiler the i2s bytecode.
+instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
+%{
+  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
+
+  format %{ "i2s  $dst, $src\t# @i2s" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+    __ ext_w_h(dst, src);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
+// This idiom is used by the compiler for the i2b bytecode.
+instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
+%{
+  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
+
+  format %{ "i2b  $dst, $src\t# @i2b" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+    __ ext_w_b(dst, src);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+
+instruct salI_RegL2I_imm(mRegI dst, mRegL src, immIU5 shift) %{
+  match(Set dst (LShiftI (ConvL2I src) shift));
+
+  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int    shamt = $shift$$constant;
+
+    __ slli_w(dst, src, shamt);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Shift Left by 8-bit immediate
+instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
+  match(Set dst (LShiftI src shift));
+
+  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    Register shamt = $shift$$Register;
+    __ sll_w(dst, src, shamt);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+
+// Shift Left Long 6-bit immI
+instruct salL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{
+  match(Set dst (LShiftL src shift));
+  ins_cost(100);
+  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int      shamt = $shift$$constant;
+
+    __ slli_d(dst_reg, src_reg, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Shift Left Long
+instruct salL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{
+  match(Set dst (LShiftL src shift));
+  ins_cost(100);
+  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+
+    __ sll_d(dst_reg, src_reg, $shift$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Shift Right Long 6-bit
+instruct sarL_Reg_imm(mRegL dst, mRegLorI2L src, immIU6 shift) %{
+  match(Set dst (RShiftL src shift));
+  ins_cost(100);
+  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int      shamt = $shift$$constant;
+
+    __ srai_d(dst_reg, src_reg, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{
+  match(Set dst (ConvL2I (RShiftL src shift)));
+  ins_cost(100);
+  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int      shamt   = $shift$$constant;
+
+    __ srai_d(dst_reg, src_reg, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Shift Right Long arithmetically
+instruct sarL_Reg_Reg(mRegL dst, mRegLorI2L src, mRegI shift) %{
+  match(Set dst (RShiftL src shift));
+  ins_cost(100);
+  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+
+    __ sra_d(dst_reg, src_reg, $shift$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Shift Right Long logically
+instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
+  match(Set dst (URShiftL src shift));
+  ins_cost(100);
+  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+
+    __ srl_d(dst_reg, src_reg, $shift$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 shift) %{
+  match(Set dst (URShiftL src shift));
+  ins_cost(80);
+  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ srli_d(dst_reg, src_reg, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegLorI2L src, immI_0_31 shift, immI_MaxI max_int) %{
+  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
+  ins_cost(80);
+  format %{ "bstrpick_d    $dst, $src, $shift+30, shift @ slrL_Reg_immI_0_31_and_max_int" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ bstrpick_d(dst_reg, src_reg, shamt+30, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
+  match(Set dst (URShiftL (CastP2X src) shift));
+  ins_cost(80);
+  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ srli_d(dst_reg, src_reg, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 shift) %{
+  match(Set dst (URShiftL src shift));
+  ins_cost(80);
+  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ srli_d(dst_reg, src_reg, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_Reg_immI_convL2I(mRegI dst, mRegLorI2L src, immI_32_63 shift) %{
+  match(Set dst (ConvL2I (URShiftL src shift)));
+  predicate(n->in(1)->in(2)->get_int() > 32);
+  ins_cost(80);
+  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ srli_d(dst_reg, src_reg, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
+  match(Set dst (URShiftL (CastP2X src) shift));
+  ins_cost(80);
+  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ srli_d(dst_reg, src_reg, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Xor Instructions
+// Xor Register with Register
+instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (XorI src1 src2));
+
+  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
+
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ xorr(dst, src1, src2);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Or Instructions
+instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_4095 src2) %{
+  match(Set dst (OrI src1 src2));
+
+  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
+  ins_encode %{
+    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Or Register with Register
+instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (OrI src1 src2));
+
+  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ orr(dst, src1, src2);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
+  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
+  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
+
+  format %{ "rotri_w     $dst, $src, 1 ...\n\t"
+            "srli_w      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
+  ins_encode %{
+    Register   dst = $dst$$Register;
+    Register   src = $src$$Register;
+    int     rshift = $rshift$$constant;
+
+    __ rotri_w(dst, src, 1);
+    if (rshift - 1) {
+      __ srli_w(dst, dst, rshift - 1);
+    }
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
+  match(Set dst (OrI src1 (CastP2X src2)));
+
+  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ orr(dst, src1, src2);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Logical Shift Right by 5-bit immediate
+instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
+  match(Set dst (URShiftI src shift));
+  //effect(KILL cr);
+
+  format %{ "SRLI_W    $dst, $src, $shift #@shr_logical_Reg_imm" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int    shift = $shift$$constant;
+
+    __ srli_w(dst, src, shift);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
+  match(Set dst (AndI (URShiftI src shift) mask));
+
+  format %{ "bstrpick_w    $dst, $src, $shift+one-bits($mask)-1, shift #@shr_logical_Reg_imm_nonneg_mask" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int      pos = $shift$$constant;
+    int     size = Assembler::is_int_mask($mask$$constant);
+
+    __ bstrpick_w(dst, src, pos+size-1, pos);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rolI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 lshift, immI_0_31 rshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
+  match(Set dst (OrI (LShiftI src lshift) (URShiftI src rshift)));
+
+  ins_cost(100);
+  format %{ "rotri_w    $dst, $src, $rshift #@rolI_Reg_immI_0_31" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ rotri_w(dst, src, sa);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rolL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_32_63 lshift, immI_0_31 rshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
+
+  ins_cost(100);
+  format %{ "rotri_d    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ rotri_d(dst, src, sa);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rolL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_0_31 lshift, immI_32_63 rshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
+
+  ins_cost(100);
+  format %{ "rotri_d    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ rotri_d(dst, src, sa);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
+  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
+
+  ins_cost(100);
+  format %{ "rotri_w    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ rotri_w(dst, src, sa);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rorL_Reg_immI_0_31(mRegL dst, mRegLorI2L src, immI_0_31 rshift, immI_32_63 lshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
+
+  ins_cost(100);
+  format %{ "rotri_d    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ rotri_d(dst, src, sa);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rorL_Reg_immI_32_63(mRegL dst, mRegLorI2L src, immI_32_63 rshift, immI_0_31 lshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
+
+  ins_cost(100);
+  format %{ "rotri_d    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ rotri_d(dst, src, sa);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Logical Shift Right
+instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
+  match(Set dst (URShiftI src shift));
+
+  format %{ "SRL_W    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    Register shift = $shift$$Register;
+    __ srl_w(dst, src, shift);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+
+instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immIU5 shift) %{
+  match(Set dst (RShiftI src shift));
+ // effect(KILL cr);
+
+  format %{ "SRAI_W    $dst, $src, $shift #@shr_arith_Reg_imm" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int    shift = $shift$$constant;
+    __ srai_w(dst, src, shift);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
+  match(Set dst (RShiftI src shift));
+ // effect(KILL cr);
+
+  format %{ "SRA_W    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    Register shift = $shift$$Register;
+    __ sra_w(dst, src, shift);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+//----------Convert Int to Boolean---------------------------------------------
+
+instruct convI2B(mRegI dst, mRegI src) %{
+  match(Set dst (Conv2B src));
+
+  ins_cost(100);
+  format %{ "convI2B    $dst, $src @ convI2B"  %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    if (dst != src) {
+      __ addi_d(dst, R0, 1);
+      __ maskeqz(dst, dst, src);
+    } else {
+      __ move(AT, src);
+      __ addi_d(dst, R0, 1);
+      __ maskeqz(dst, dst, AT);
+    }
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct convI2L_reg( mRegL dst, mRegI src) %{
+  match(Set dst (ConvI2L src));
+
+  ins_cost(100);
+  format %{ "SLLI_W    $dst, $src @ convI2L_reg\t"  %}
+
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    if(dst != src) __ slli_w(dst, src, 0);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct convL2I_reg( mRegI dst, mRegLorI2L src ) %{
+  match(Set dst (ConvL2I src));
+
+  format %{ "MOV    $dst, $src @ convL2I_reg" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    __ slli_w(dst, src, 0);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct convL2D_reg( regD dst, mRegL src ) %{
+  match(Set dst (ConvL2D src));
+  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
+  ins_encode %{
+    Register src = as_Register($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ movgr2fr_d(dst, src);
+    __ ffint_d_l(dst, dst);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+// Convert double to int.
+// If the double is NaN, stuff a zero in instead.
+instruct convD2I_reg_reg(mRegI dst, regD src, regD tmp) %{
+  match(Set dst (ConvD2I src));
+  effect(USE src, TEMP tmp);
+
+  format %{ "convd2i    $dst, $src, using $tmp as TEMP @ convD2I_reg_reg" %}
+
+  ins_encode %{
+    __ ftintrz_w_d($tmp$$FloatRegister, $src$$FloatRegister);
+    __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct convD2L_reg_reg(mRegL dst, regD src, regD tmp) %{
+  match(Set dst (ConvD2L src));
+  effect(USE src, TEMP tmp);
+
+  format %{ "convd2l    $dst, $src, using $tmp as TEMP @ convD2L_reg_reg" %}
+
+  ins_encode %{
+    __ ftintrz_l_d($tmp$$FloatRegister, $src$$FloatRegister);
+    __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+// Convert float to int.
+// If the float is NaN, stuff a zero in instead.
+instruct convF2I_reg_reg(mRegI dst, regF src, regF tmp) %{
+  match(Set dst (ConvF2I src));
+  effect(USE src, TEMP tmp);
+
+  format %{ "convf2i    $dst, $src, using $tmp as TEMP @ convF2I_reg_reg" %}
+
+  ins_encode %{
+    __ ftintrz_w_s($tmp$$FloatRegister, $src$$FloatRegister);
+    __ movfr2gr_s($dst$$Register, $tmp$$FloatRegister);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct convF2L_reg_reg(mRegL dst, regF src, regF tmp) %{
+  match(Set dst (ConvF2L src));
+  effect(USE src, TEMP tmp);
+
+  format %{ "convf2l    $dst, $src, using $tmp as TEMP @ convF2L_reg_reg" %}
+
+  ins_encode %{
+    __ ftintrz_l_s($tmp$$FloatRegister, $src$$FloatRegister);
+    __ movfr2gr_d($dst$$Register, $tmp$$FloatRegister);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct convL2F_reg( regF dst, mRegL src ) %{
+  match(Set dst (ConvL2F src));
+  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
+  ins_encode %{
+    FloatRegister dst = $dst$$FloatRegister;
+    Register src = as_Register($src$$reg);
+    Label L;
+
+    __ movgr2fr_d(dst, src);
+    __ ffint_s_l(dst, dst);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct convI2F_reg( regF dst, mRegI src ) %{
+  match(Set dst (ConvI2F src));
+  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
+  ins_encode %{
+    Register      src = $src$$Register;
+    FloatRegister dst = $dst$$FloatRegister;
+
+    __ movgr2fr_w(dst, src);
+    __ ffint_s_w(dst, dst);
+  %}
+
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
+  match(Set dst (CmpLTMask p zero));
+  ins_cost(100);
+
+  format %{ "srai_w    $dst, $p, 31 @ cmpLTMask_immI_0" %}
+    ins_encode %{
+       Register src = $p$$Register;
+       Register dst = $dst$$Register;
+
+       __ srai_w(dst, src, 31);
+    %}
+    ins_pipe( pipe_slow );
+%}
+
+
+instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
+  match(Set dst (CmpLTMask p q));
+  ins_cost(400);
+
+  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
+  ins_encode %{
+    Register p   = $p$$Register;
+    Register q   = $q$$Register;
+    Register dst = $dst$$Register;
+
+    __ slt(dst, p, q);
+    __ sub_d(dst, R0, dst);
+    %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convP2B(mRegI dst, mRegP src) %{
+  match(Set dst (Conv2B src));
+
+  ins_cost(100);
+  format %{ "convP2B    $dst, $src @ convP2B"  %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    if (dst != src) {
+      __ addi_d(dst, R0, 1);
+      __ maskeqz(dst, dst, src);
+    } else {
+      __ move(AT, src);
+      __ addi_d(dst, R0, 1);
+      __ maskeqz(dst, dst, AT);
+    }
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+
+instruct convI2D_reg_reg(regD dst, mRegI src) %{
+  match(Set dst (ConvI2D src));
+  format %{ "conI2D $dst, $src @convI2D_reg" %}
+  ins_encode %{
+    Register      src = $src$$Register;
+    FloatRegister dst = $dst$$FloatRegister;
+    __ movgr2fr_w(dst ,src);
+    __ ffint_d_w(dst, dst);
+    %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct convF2D_reg_reg(regD dst, regF src) %{
+  match(Set dst (ConvF2D src));
+  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
+  ins_encode %{
+    FloatRegister dst = $dst$$FloatRegister;
+    FloatRegister src = $src$$FloatRegister;
+
+    __ fcvt_d_s(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct convD2F_reg_reg(regF dst, regD src) %{
+  match(Set dst (ConvD2F src));
+  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
+  ins_encode %{
+    FloatRegister dst = $dst$$FloatRegister;
+    FloatRegister src = $src$$FloatRegister;
+
+    __ fcvt_s_d(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+
+// Convert oop pointer into compressed form
+instruct encodeHeapOop(mRegN dst, mRegP src) %{
+  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
+  match(Set dst (EncodeP src));
+  format %{ "encode_heap_oop $dst,$src" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+    __ encode_heap_oop(dst, src);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
+  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
+  match(Set dst (EncodeP src));
+  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
+  ins_encode %{
+    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct decodeHeapOop(mRegP dst, mRegN src) %{
+  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
+            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
+  match(Set dst (DecodeN src));
+  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+
+    __ decode_heap_oop(d, s);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
+  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
+            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
+  match(Set dst (DecodeN src));
+  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    if (s != d) {
+      __ decode_heap_oop_not_null(d, s);
+    } else {
+      __ decode_heap_oop_not_null(d);
+    }
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
+  match(Set dst (EncodePKlass src));
+  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
+  ins_encode %{
+    __ encode_klass_not_null($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
+  match(Set dst (DecodeNKlass src));
+  format %{ "decode_heap_klass_not_null $dst,$src" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    if (s != d) {
+      __ decode_klass_not_null(d, s);
+    } else {
+      __ decode_klass_not_null(d);
+    }
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+//FIXME
+instruct tlsLoadP(mRegP dst) %{
+  match(Set dst (ThreadLocal));
+
+  ins_cost(0);
+  format %{ " get_thread in $dst #@tlsLoadP" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+#ifdef OPT_THREAD
+    __ move(dst, TREG);
+#else
+    __ get_thread(dst);
+#endif
+  %}
+
+  ins_pipe( ialu_loadI );
+%}
+
+
+instruct checkCastPP( mRegP dst ) %{
+  match(Set dst (CheckCastPP dst));
+
+  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
+  ins_encode( /*empty encoding*/ );
+  ins_pipe( empty );
+%}
+
+instruct castPP(mRegP dst)
+%{
+  match(Set dst (CastPP dst));
+
+  size(0);
+  format %{ "# castPP of $dst" %}
+  ins_encode(/* empty encoding */);
+  ins_pipe(empty);
+%}
+
+instruct castII( mRegI dst ) %{
+  match(Set dst (CastII dst));
+  format %{ "#castII of $dst  empty encoding" %}
+  ins_encode( /*empty encoding*/ );
+  ins_cost(0);
+  ins_pipe( empty );
+%}
+
+// Return Instruction
+// Remove the return address & jump to it.
+instruct Ret() %{
+  match(Return);
+  format %{ "RET #@Ret" %}
+
+  ins_encode %{
+   __ jr(RA);
+  %}
+
+  ins_pipe( pipe_jump );
+%}
+
+
+
+// Tail Jump; remove the return address; jump to target.
+// TailCall above leaves the return address around.
+// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
+// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
+// "restore" before this instruction (in Epilogue), we need to materialize it
+// in %i0.
+//FIXME
+instruct tailjmpInd(no_Ax_mRegP jump_target, mRegP ex_oop) %{
+  match( TailJump jump_target ex_oop );
+  ins_cost(200);
+  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
+  ins_encode %{
+    Register target = $jump_target$$Register;
+
+    // V0, V1 are indicated in:
+    //     [stubGenerator_loongarch.cpp] generate_forward_exception()
+    //     [runtime_loongarch.cpp] OptoRuntime::generate_exception_blob()
+    //
+    Register oop  = $ex_oop$$Register;
+    Register exception_oop = V0;
+    Register exception_pc = V1;
+
+    __ move(exception_pc, RA);
+    __ move(exception_oop, oop);
+
+    __ jr(target);
+  %}
+  ins_pipe( pipe_jump );
+%}
+
+// ============================================================================
+// Procedure Call/Return Instructions
+// Call Java Static Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
+instruct CallStaticJavaDirect(method meth) %{
+  match(CallStaticJava);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL,static #@CallStaticJavaDirect " %}
+  ins_encode( Java_Static_Call( meth ) );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(4);
+%}
+
+// Call Java Dynamic Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
+instruct CallDynamicJavaDirect(method meth) %{
+  match(CallDynamicJava);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
+           "CallDynamic @ CallDynamicJavaDirect" %}
+  ins_encode( Java_Dynamic_Call( meth ) );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(4);
+%}
+
+instruct CallLeafNoFPDirect(method meth) %{
+  match(CallLeafNoFP);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL_LEAF_NOFP,runtime " %}
+  ins_encode(Java_To_Runtime(meth));
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(4);
+%}
+
+// Prefetch instructions.
+
+instruct prefetchr( memory mem ) %{
+  match(PrefetchRead mem);
+  ins_cost(125);
+
+  format %{ "pref $mem\t# Prefetch into temporal cache for read @ prefetchr" %}
+  ins_encode %{
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ add_d(AT, as_Register(base), as_Register(index));
+      } else {
+        __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1);
+      }
+    } else {
+      __ move(AT, as_Register(base));
+    }
+    if( Assembler::is_simm(disp, 12) ) {
+      __ addi_d(AT, AT, disp);
+    } else {
+      __ li(T4, disp);
+      __ add_d(AT, AT, T4);
+    }
+    __ preld(0, AT, 0); //hint: 0:load
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct prefetchw( memory mem ) %{
+  match(PrefetchWrite mem);
+  ins_cost(125);
+  format %{ "pref $mem\t# Prefetch to temporal cache for write @ prefetchw" %}
+  ins_encode %{
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ add_d(AT, as_Register(base), as_Register(index));
+      } else {
+        __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1);
+      }
+    } else {
+      __ move(AT, as_Register(base));
+    }
+    if( Assembler::is_simm(disp, 12) ) {
+      __ addi_d(AT, AT, disp);
+    } else {
+      __ li(T4, disp);
+      __ add_d(AT, AT, T4);
+    }
+     __ preld(8, AT, 0); //hint: 8:store
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// Prefetch instructions for allocation.
+
+instruct prefetchAlloc(memory mem) %{
+  match(PrefetchAllocation mem);
+  ins_cost(125);
+  format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %}
+  ins_encode %{
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if (index != 0) {
+      if (scale == 0) {
+        __ add_d(AT, as_Register(base), as_Register(index));
+      } else {
+        __ alsl_d(AT, as_Register(index), as_Register(base), scale - 1);
+      }
+
+      if (Assembler::is_simm(disp, 12)) {
+        __ preld(8, AT, disp);
+      } else {
+        __ li(T4, disp);
+        __ add_d(AT, AT, T4);
+        __ preld(8, AT, 0);
+      }
+    } else {
+      if (Assembler::is_simm(disp, 12)) {
+        __ preld(8, as_Register(base), disp);
+      } else {
+        __ li(T4, disp);
+        __ add_d(AT, as_Register(base), T4);
+        __ preld(8, AT, 0);
+      }
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+
+// Call runtime without safepoint
+instruct CallLeafDirect(method meth) %{
+  match(CallLeaf);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
+  ins_encode(Java_To_Runtime(meth));
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(4);
+%}
+
+// Load Char (16bit unsigned)
+instruct loadUS(mRegI dst, memory mem) %{
+  match(Set dst (LoadUS mem));
+
+  ins_cost(125);
+  format %{ "loadUS  $dst,$mem @ loadC" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+instruct loadUS_convI2L(mRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadUS mem)));
+
+  ins_cost(125);
+  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+// Store Char (16bit unsigned)
+instruct storeC(memory mem, mRegI src) %{
+  match(Set mem (StoreC mem src));
+
+  ins_cost(125);
+  format %{ "storeC  $src, $mem @ storeC" %}
+  ins_encode %{
+    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+instruct storeC_0(memory mem, immI_0 zero) %{
+  match(Set mem (StoreC mem zero));
+
+  ins_cost(125);
+  format %{ "storeC  $zero, $mem @ storeC_0" %}
+  ins_encode %{
+     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+
+instruct loadConF_immF_0(regF dst, immF_0 zero) %{
+  match(Set dst zero);
+  ins_cost(100);
+
+  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
+  ins_encode %{
+    FloatRegister dst = $dst$$FloatRegister;
+
+    __ movgr2fr_w(dst, R0);
+  %}
+  ins_pipe( fpu_loadF );
+%}
+
+
+instruct loadConF(regF dst, immF src) %{
+  match(Set dst src);
+  ins_cost(125);
+
+  format %{ "fld_s  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
+  ins_encode %{
+    int con_offset = $constantoffset($src);
+
+    if (Assembler::is_simm(con_offset, 12)) {
+      __ fld_s($dst$$FloatRegister, $constanttablebase, con_offset);
+    } else {
+      __ li(AT, con_offset);
+      __ fldx_s($dst$$FloatRegister, $constanttablebase, AT);
+    }
+  %}
+  ins_pipe( fpu_loadF );
+%}
+
+
+instruct loadConD_immD_0(regD dst, immD_0 zero) %{
+  match(Set dst zero);
+  ins_cost(100);
+
+  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
+  ins_encode %{
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ movgr2fr_d(dst, R0);
+  %}
+  ins_pipe( fpu_loadF );
+%}
+
+instruct loadConD(regD dst, immD src) %{
+  match(Set dst src);
+  ins_cost(125);
+
+  format %{ "fld_d  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
+  ins_encode %{
+    int con_offset = $constantoffset($src);
+
+    if (Assembler::is_simm(con_offset, 12)) {
+      __ fld_d($dst$$FloatRegister, $constanttablebase, con_offset);
+    } else {
+      __ li(AT, con_offset);
+      __ fldx_d($dst$$FloatRegister, $constanttablebase, AT);
+    }
+  %}
+  ins_pipe( fpu_loadF );
+%}
+
+// Store register Float value (it is faster than store from FPU register)
+instruct storeF_reg( memory mem, regF src) %{
+  match(Set mem (StoreF mem src));
+
+  ins_cost(50);
+  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
+  ins_encode %{
+    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT);
+  %}
+  ins_pipe( fpu_storeF );
+%}
+
+instruct storeF_immF_0( memory mem, immF_0 zero) %{
+  match(Set mem (StoreF mem zero));
+
+  ins_cost(40);
+  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
+  ins_encode %{
+    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+// Load Double
+instruct loadD(regD dst, memory mem) %{
+  match(Set dst (LoadD mem));
+
+  ins_cost(150);
+  format %{ "loadD   $dst, $mem #@loadD" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Double - UNaligned
+instruct loadD_unaligned(regD dst, memory mem ) %{
+  match(Set dst (LoadD_unaligned mem));
+  ins_cost(250);
+  // FIXME: Need more effective ldl/ldr
+  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+instruct storeD_reg( memory mem, regD src) %{
+  match(Set mem (StoreD mem src));
+
+  ins_cost(50);
+  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
+  ins_encode %{
+    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE);
+  %}
+  ins_pipe( fpu_storeF );
+%}
+
+instruct storeD_immD_0( memory mem, immD_0 zero) %{
+  match(Set mem (StoreD mem zero));
+
+  ins_cost(40);
+  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
+  ins_encode %{
+    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+instruct loadSSI(mRegI dst, stackSlotI src)
+%{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "ld_w    $dst, $src\t# int stk @ loadSSI" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSI) !");
+    __ ld_w($dst$$Register, SP, $src$$disp);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct storeSSI(stackSlotI dst, mRegI src)
+%{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "st_w    $dst, $src\t# int stk @ storeSSI" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSI) !");
+    __ st_w($src$$Register, SP, $dst$$disp);
+  %}
+  ins_pipe(ialu_storeI);
+%}
+
+instruct loadSSL(mRegL dst, stackSlotL src)
+%{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "ld_d    $dst, $src\t# long stk @ loadSSL" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSL) !");
+    __ ld_d($dst$$Register, SP, $src$$disp);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct storeSSL(stackSlotL dst, mRegL src)
+%{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "st_d    $dst, $src\t# long stk @ storeSSL" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSL) !");
+    __ st_d($src$$Register, SP, $dst$$disp);
+  %}
+  ins_pipe(ialu_storeI);
+%}
+
+instruct loadSSP(mRegP dst, stackSlotP src)
+%{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "ld_d    $dst, $src\t# ptr stk @ loadSSP" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSP) !");
+    __ ld_d($dst$$Register, SP, $src$$disp);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct storeSSP(stackSlotP dst, mRegP src)
+%{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "sd    $dst, $src\t# ptr stk @ storeSSP" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSP) !");
+    __ st_d($src$$Register, SP, $dst$$disp);
+  %}
+  ins_pipe(ialu_storeI);
+%}
+
+instruct loadSSF(regF dst, stackSlotF src)
+%{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "fld_s   $dst, $src\t# float stk @ loadSSF" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !");
+    __ fld_s($dst$$FloatRegister, SP, $src$$disp);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct storeSSF(stackSlotF dst, regF src)
+%{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "fst_s    $dst, $src\t# float stk @ storeSSF" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !");
+    __ fst_s($src$$FloatRegister, SP, $dst$$disp);
+  %}
+  ins_pipe(fpu_storeF);
+%}
+
+// Use the same format since predicate() can not be used here.
+instruct loadSSD(regD dst, stackSlotD src)
+%{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "fld_d   $dst, $src\t# double stk @ loadSSD" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !");
+    __ fld_d($dst$$FloatRegister, SP, $src$$disp);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct storeSSD(stackSlotD dst, regD src)
+%{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "sdc1    $dst, $src\t# double stk @ storeSSD" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSD) !");
+    __ fst_d($src$$FloatRegister, SP, $dst$$disp);
+  %}
+  ins_pipe(fpu_storeF);
+%}
+
+instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
+  match(Set cr (FastLock object box));
+  effect(TEMP tmp, TEMP scr);
+  ins_cost(300);
+  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
+  %}
+
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+%}
+
+instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
+  match(Set cr (FastUnlock object box));
+  effect(TEMP tmp, TEMP scr);
+  ins_cost(300);
+  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
+  ins_encode %{
+    __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
+  %}
+
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+%}
+
+// Store CMS card-mark Immediate 0
+instruct storeImmCM_order(memory mem, immI_0 zero) %{
+  match(Set mem (StoreCM mem zero));
+  predicate(UseConcMarkSweepGC && !UseCondCardMark);
+  ins_cost(100);
+  format %{ "StoreCM MEMBAR storestore\n\t"
+            "st_b   $mem, zero\t! card-mark imm0" %}
+  ins_encode %{
+    __ membar(__ StoreStore);
+    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeImmCM(memory mem, immI_0 zero) %{
+  match(Set mem (StoreCM mem zero));
+
+  ins_cost(150);
+  format %{ "st_b   $mem, zero\t! card-mark imm0" %}
+  ins_encode %{
+    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+// Die now
+instruct ShouldNotReachHere( )
+%{
+  match(Halt);
+  ins_cost(300);
+
+  // Use the following format syntax
+  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
+  ins_encode %{
+    // Here we should emit illtrap!
+    __ brk(18);
+  %}
+  ins_pipe( pipe_jump );
+%}
+
+instruct leaP12Narrow(mRegP dst, indOffset12Narrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr off12narrow @ leaP12Narrow" %}
+  ins_encode %{
+    Register  dst  = $dst$$Register;
+    Register  base = as_Register($mem$$base);
+    int       disp = $mem$$disp;
+
+    __ addi_d(dst, base, disp);
+  %}
+  ins_pipe( ialu_regI_imm16 );
+%}
+
+instruct leaPIdxScale(mRegP dst, mRegP reg, mRegLorI2L lreg, immI_0_3 scale)
+%{
+  match(Set dst (AddP reg (LShiftL lreg scale)));
+
+  ins_cost(110);
+  format %{ "leaq    $dst, [$reg + $lreg << $scale]\t# @ leaPIdxScale" %}
+  ins_encode %{
+    Register  dst   = $dst$$Register;
+    Register  base  = $reg$$Register;
+    Register  index = $lreg$$Register;
+    int       scale = $scale$$constant;
+
+    if (scale == 0) {
+       __ add_d($dst$$Register, $reg$$Register, index);
+    } else {
+       __ alsl_d(dst, index, base, scale - 1);
+    }
+ %}
+
+  ins_pipe( ialu_regI_imm16 );
+%}
+
+
+// ============================================================================
+// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
+// array for an instance of the superklass.  Set a hidden internal cache on a
+// hit (cache is checked with exposed code in gen_subtype_check()).  Return
+// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
+instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
+  match(Set result (PartialSubtypeCheck sub super));
+  effect(KILL tmp);
+  ins_cost(1100);  // slightly larger than the next version
+  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
+
+  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
+  ins_pipe( pipe_slow );
+%}
+
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+
+instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{
+  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
+
+  format %{ "move AT, $newval\n\t"
+            "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t"
+            "move $cr, AT\n" %}
+  ins_encode%{
+    Register oldval = $oldval$$Register;
+    Register newval = $newval$$Register;
+    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
+
+    int     index = $heap_top_ptr$$index;
+    int     scale = $heap_top_ptr$$scale;
+    int      disp = $heap_top_ptr$$disp;
+
+    guarantee(Assembler::is_simm(disp, 12), "");
+
+    if (index != 0) {
+      __ stop("in storePConditional: index != 0");
+    } else {
+      __ move(AT, newval);
+      __ sc_d(AT, addr);
+      __ move($cr$$Register, AT);
+    }
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+// Conditional-store of an int value.
+// AT flag is set on success, reset otherwise.
+instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{
+  match(Set cr (StoreIConditional mem (Binary oldval newval)));
+  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
+
+  ins_encode %{
+    Register oldval = $oldval$$Register;
+    Register newval = $newval$$Register;
+    Register cr     = $cr$$Register;
+    Address  addr(as_Register($mem$$base), $mem$$disp);
+
+    int     index = $mem$$index;
+    int     scale = $mem$$scale;
+    int      disp = $mem$$disp;
+
+    guarantee(Assembler::is_simm(disp, 12), "");
+
+    if (index != 0) {
+      __ stop("in storeIConditional: index != 0");
+    } else {
+      if (cr != addr.base() && cr != oldval && cr != newval) {
+        __ cmpxchg32(addr, oldval, newval, cr, true, false, true);
+      } else {
+        __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
+        __ move(cr, AT);
+      }
+    }
+  %}
+
+  ins_pipe(long_memory_op);
+%}
+
+// Conditional-store of a long value.
+// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
+instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr)
+%{
+  match(Set cr (StoreLConditional mem (Binary oldval newval)));
+
+  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
+  ins_encode%{
+    Register oldval = $oldval$$Register;
+    Register newval = $newval$$Register;
+    Register cr     = $cr$$Register;
+    Address addr(as_Register($mem$$base), $mem$$disp);
+
+    int     index = $mem$$index;
+    int     scale = $mem$$scale;
+    int      disp = $mem$$disp;
+
+    guarantee(Assembler::is_simm(disp, 12), "");
+
+    if (index != 0) {
+      __ stop("in storeIConditional: index != 0");
+    } else {
+      if (cr != addr.base() && cr != oldval && cr != newval) {
+        __ cmpxchg(addr, oldval, newval, cr, false, true);
+      } else {
+        __ cmpxchg(addr, oldval, newval, AT, false, true);
+        __ move(cr, AT);
+      }
+    }
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+// Implement LoadPLocked. Must be ordered against changes of the memory location
+// by storePConditional.
+instruct loadPLocked(mRegP dst, memory mem) %{
+  match(Set dst (LoadPLocked mem));
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "ll_d    $dst, $mem #@loadPLocked\n\t" %}
+  size(12);
+  ins_encode %{
+    relocInfo::relocType disp_reloc = $mem->disp_reloc();
+    assert(disp_reloc == relocInfo::none, "cannot have disp");
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG);
+  %}
+  ins_pipe( ialu_loadI );
+%}
+
+
+instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{
+  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
+  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %}
+  ins_encode %{
+    Register newval = $newval$$Register;
+    Register oldval = $oldval$$Register;
+    Register res    = $res$$Register;
+    Address  addr($mem_ptr$$Register, 0);
+
+    if (res != addr.base() && res != oldval && res != newval) {
+      __ cmpxchg32(addr, oldval, newval, res, true, false, true);
+    } else {
+      __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
+      __ move(res, AT);
+    }
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{
+  predicate(VM_Version::supports_cx8());
+  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
+  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %}
+  ins_encode %{
+    Register newval = $newval$$Register;
+    Register oldval = $oldval$$Register;
+    Register res    = $res$$Register;
+    Address  addr($mem_ptr$$Register, 0);
+
+    if (res != addr.base() && res != oldval && res != newval) {
+      __ cmpxchg(addr, oldval, newval, res, false, true);
+    } else {
+      __ cmpxchg(addr, oldval, newval, AT, false, true);
+      __ move(res, AT);
+    }
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{
+  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %}
+  ins_encode %{
+    Register newval = $newval$$Register;
+    Register oldval = $oldval$$Register;
+    Register res    = $res$$Register;
+    Address  addr($mem_ptr$$Register, 0);
+
+    if (res != addr.base() && res != oldval && res != newval) {
+      __ cmpxchg(addr, oldval, newval, res, false, true);
+    } else {
+      __ cmpxchg(addr, oldval, newval, AT, false, true);
+      __ move(res, AT);
+    }
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{
+  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
+  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %}
+  ins_encode %{
+    Register newval = $newval$$Register;
+    Register oldval = $oldval$$Register;
+    Register res    = $res$$Register;
+    Address  addr($mem_ptr$$Register, 0);
+
+    if (res != addr.base() && res != oldval && res != newval) {
+      __ cmpxchg32(addr, oldval, newval, res, false, false, true);
+    } else {
+      __ cmpxchg32(addr, oldval, newval, AT, false, false, true);
+      __ move(res, AT);
+    }
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+//----------Max and Min--------------------------------------------------------
+
+// Min Register with Register (generic version)
+instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
+  match(Set dst (MinI dst src));
+  //effect(KILL flags);
+  ins_cost(80);
+
+  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
+  ins_encode %{
+    Register dst   = $dst$$Register;
+    Register src   = $src$$Register;
+
+    __ slt(AT, src, dst);
+    __ masknez(dst, dst, AT);
+    __ maskeqz(AT, src, AT);
+    __ OR(dst, dst, AT);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+// Max Register with Register (generic version)
+instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
+  match(Set dst (MaxI dst src));
+  ins_cost(80);
+
+  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
+
+  ins_encode %{
+    Register dst   = $dst$$Register;
+    Register src   = $src$$Register;
+
+    __ slt(AT, dst, src);
+    __ masknez(dst, dst, AT);
+    __ maskeqz(AT, src, AT);
+    __ OR(dst, dst, AT);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
+  match(Set dst (MaxI dst zero));
+  ins_cost(50);
+
+  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
+
+  ins_encode %{
+    Register dst   = $dst$$Register;
+
+    __ slt(AT, dst, R0);
+    __ masknez(dst, dst, AT);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
+%{
+  match(Set dst (AndL src mask));
+
+  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+
+    __ bstrpick_d(dst, src, 31, 0);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
+%{
+  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
+
+  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
+  ins_encode %{
+    Register dst  = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    if (src1 == dst) {
+       __ bstrins_d(dst, src2, 63, 32);
+    } else if (src2 == dst) {
+       __ slli_d(dst, dst, 32);
+       __ bstrins_d(dst, src1, 31, 0);
+    } else {
+       __ bstrpick_d(dst, src1, 31, 0);
+       __ bstrins_d(dst, src2, 63, 32);
+    }
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+// Zero-extend convert int to long
+instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
+%{
+  match(Set dst (AndL (ConvI2L src) mask));
+
+  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+
+    __ bstrpick_d(dst, src, 31, 0);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
+%{
+  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
+
+  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+
+    __ bstrpick_d(dst, src, 31, 0);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+// Match loading integer and casting it to unsigned int in long register.
+// LoadI + ConvI2L + AndL 0xffffffff.
+instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+
+  format %{ "ld_wu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
+  ins_encode %{
+    relocInfo::relocType disp_reloc = $mem->disp_reloc();
+    assert(disp_reloc == relocInfo::none, "cannot have disp");
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
+  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
+
+  format %{ "ld_wu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
+  ins_encode %{
+    relocInfo::relocType disp_reloc = $mem->disp_reloc();
+    assert(disp_reloc == relocInfo::none, "cannot have disp");
+    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+
+// ============================================================================
+// Safepoint Instruction
+instruct safePoint_poll_reg(mRegP poll) %{
+  match(SafePoint poll);
+  predicate(false);
+  effect(USE poll);
+
+  ins_cost(125);
+  format %{ "Safepoint @ [$poll] : poll for GC @ safePoint_poll_reg" %}
+
+  ins_encode %{
+    Register poll_reg = $poll$$Register;
+
+    __ block_comment("Safepoint:");
+    __ relocate(relocInfo::poll_type);
+    __ ld_w(AT, poll_reg, 0);
+  %}
+
+  ins_pipe( ialu_storeI );
+%}
+
+instruct safePoint_poll() %{
+  match(SafePoint);
+
+  ins_cost(105);
+  format %{ "poll for GC @ safePoint_poll" %}
+
+  ins_encode %{
+    __ block_comment("Safepoint:");
+    __ li(T4, (long)os::get_polling_page());
+    __ relocate(relocInfo::poll_type);
+    __ ld_w(AT, T4, 0);
+  %}
+
+  ins_pipe( ialu_storeI );
+%}
+
+//----------Arithmetic Conversion Instructions---------------------------------
+
+instruct roundFloat_nop(regF dst)
+%{
+  match(Set dst (RoundFloat dst));
+
+  ins_cost(0);
+  ins_encode();
+  ins_pipe(empty);
+%}
+
+instruct roundDouble_nop(regD dst)
+%{
+  match(Set dst (RoundDouble dst));
+
+  ins_cost(0);
+  ins_encode();
+  ins_pipe(empty);
+%}
+
+//---------- Zeros Count Instructions ------------------------------------------
+// CountLeadingZerosINode CountTrailingZerosINode
+instruct countLeadingZerosI(mRegI dst, mRegI src) %{
+  match(Set dst (CountLeadingZerosI src));
+
+  format %{ "clz_w  $dst, $src\t# count leading zeros (int)" %}
+  ins_encode %{
+    __ clz_w($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct countLeadingZerosL(mRegI dst, mRegL src) %{
+  match(Set dst (CountLeadingZerosL src));
+
+  format %{ "clz_d  $dst, $src\t# count leading zeros (long)" %}
+  ins_encode %{
+    __ clz_d($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct countTrailingZerosI(mRegI dst, mRegI src) %{
+  match(Set dst (CountTrailingZerosI src));
+
+  format %{ "ctz_w    $dst, $src\t# count trailing zeros (int)" %}
+  ins_encode %{
+    __ ctz_w($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct countTrailingZerosL(mRegI dst, mRegL src) %{
+  match(Set dst (CountTrailingZerosL src));
+
+  format %{ "ctz_d    $dst, $src\t# count trailing zeros (long)" %}
+  ins_encode %{
+    __ ctz_d($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// ====================VECTOR INSTRUCTIONS=====================================
+
+// --------------------------------- Load -------------------------------------
+
+instruct loadV16(vecX dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 16);
+  match(Set dst (LoadVector mem));
+  format %{ "vload    $dst, $mem\t# @loadV16" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORX);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct loadV32(vecY dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 32);
+  match(Set dst (LoadVector mem));
+  format %{ "xvload    $dst, $mem\t# @loadV32" %}
+  ins_encode %{
+    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_VECTORY);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- Store ------------------------------------
+
+instruct storeV16(memory mem, vecX src) %{
+  predicate(n->as_StoreVector()->memory_size() == 16);
+  match(Set mem (StoreVector mem src));
+  format %{ "vstore    $src, $mem\t# @storeV16" %}
+  ins_encode %{
+    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORX);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct storeV32(memory mem, vecY src) %{
+  predicate(n->as_StoreVector()->memory_size() == 32);
+  match(Set mem (StoreVector mem src));
+  format %{ "xvstore    $src, $mem\t# @storeV32" %}
+  ins_encode %{
+    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_VECTORY);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ------------------------------- Replicate ----------------------------------
+
+instruct repl16B(vecX dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateB src));
+  format %{ "vreplgr2vr.b    $dst, $src\t# @repl16B" %}
+  ins_encode %{
+    __ vreplgr2vr_b($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl16B_imm(vecX dst, immI_M128_255 imm) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateB imm));
+  format %{ "vldi    $dst, $imm\t# @repl16B_imm" %}
+  ins_encode %{
+    __ vldi($dst$$FloatRegister, ($imm$$constant & 0xff));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl8S(vecX dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateS src));
+  format %{ "vreplgr2vr.h    $dst, $src\t# @repl8S" %}
+  ins_encode %{
+    __ vreplgr2vr_h($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl8S_imm(vecX dst, immI10 imm) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateS imm));
+  format %{ "vldi    $dst, $imm\t# @repl8S_imm" %}
+  ins_encode %{
+    __ vldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl4I(vecX dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI src));
+  format %{ "vreplgr2vr.w    $dst, $src\t# @repl4I" %}
+  ins_encode %{
+    __ vreplgr2vr_w($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl4I_imm(vecX dst, immI10 imm) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI imm));
+  format %{ "vldi    $dst, $imm\t# @repl4I_imm" %}
+  ins_encode %{
+    __ vldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl2L(vecX dst, mRegL src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL src));
+  format %{ "vreplgr2vr.d    $dst, $src\t# @repl2L" %}
+  ins_encode %{
+    __ vreplgr2vr_d($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl2L_imm(vecX dst, immL10 imm) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL imm));
+  format %{ "vldi    $dst, $imm\t# @repl2L_imm" %}
+  ins_encode %{
+    __ vldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl4F(vecX dst, regF src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateF src));
+  format %{ "vreplvei.w    $dst, $src, 0\t# @repl4F" %}
+  ins_encode %{
+    __ vreplvei_w($dst$$FloatRegister, $src$$FloatRegister, 0);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl2D(vecX dst, regD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateD src));
+  format %{ "vreplvei.d    $dst, $src, 0\t# @repl2D" %}
+  ins_encode %{
+    __ vreplvei_d($dst$$FloatRegister, $src$$FloatRegister, 0);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl32B(vecY dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (ReplicateB src));
+  format %{ "xvreplgr2vr.b    $dst, $src\t# @repl32B" %}
+  ins_encode %{
+    __ xvreplgr2vr_b($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl32B_imm(vecY dst, immI_M128_255 imm) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (ReplicateB imm));
+  format %{ "xvldi    $dst, $imm\t# @repl32B_imm" %}
+  ins_encode %{
+    __ xvldi($dst$$FloatRegister, ($imm$$constant & 0xff));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl16S(vecY dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateS src));
+  format %{ "xvreplgr2vr.h    $dst, $src\t# @repl16S" %}
+  ins_encode %{
+    __ xvreplgr2vr_h($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl16S_imm(vecY dst, immI10 imm) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateS imm));
+  format %{ "xvldi    $dst, $imm\t# @repl16S_imm" %}
+  ins_encode %{
+    __ xvldi($dst$$FloatRegister, (0b001 << 10 ) | ($imm$$constant & 0x3ff));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl8I(vecY dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateI src));
+  format %{ "xvreplgr2vr.w    $dst, $src\t# @repl8I" %}
+  ins_encode %{
+    __ xvreplgr2vr_w($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl8I_imm(vecY dst, immI10 imm) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateI imm));
+  format %{ "xvldi    $dst, $imm\t# @repl8I_imm" %}
+  ins_encode %{
+    __ xvldi($dst$$FloatRegister, (0b010 << 10 ) | ($imm$$constant & 0x3ff));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl4L(vecY dst, mRegL src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL src));
+  format %{ "xvreplgr2vr.d    $dst, $src\t# @repl4L" %}
+  ins_encode %{
+    __ xvreplgr2vr_d($dst$$FloatRegister, $src$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl4L_imm(vecY dst, immL10 imm) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL imm));
+  format %{ "xvldi    $dst, $imm\t# @repl4L_imm" %}
+  ins_encode %{
+    __ xvldi($dst$$FloatRegister, (0b011 << 10 ) | ($imm$$constant & 0x3ff));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl8F(vecY dst, regF src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateF src));
+  format %{ "xvreplve0.w    $dst, $src\t# @repl8F" %}
+  ins_encode %{
+    __ xvreplve0_w($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct repl4D(vecY dst, regD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateD src));
+  format %{ "xvreplve0.d    $dst, $src\t# @repl4D" %}
+  ins_encode %{
+    __ xvreplve0_d($dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- ADD --------------------------------------
+
+instruct add16B(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vadd.b    $dst, $src1, $src2\t# @add16B" %}
+  ins_encode %{
+    __ vadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add16B_imm(vecX dst, vecX src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src (ReplicateB imm)));
+  format %{ "vaddi.bu    $dst, $src, $imm\t# @add16B_imm" %}
+  ins_encode %{
+    __ vaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add8S(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vadd.h    $dst, $src1, $src2\t# @add8S" %}
+  ins_encode %{
+    __ vadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add8S_imm(vecX dst, vecX src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src (ReplicateS imm)));
+  format %{ "vaddi.hu    $dst, $src, $imm\t# @add8S_imm" %}
+  ins_encode %{
+    __ vaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add4I(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVI src1 src2));
+  format %{ "vadd.w    $dst, $src1, src2\t# @add4I" %}
+  ins_encode %{
+    __ vadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add4I_imm(vecX dst, vecX src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVI src (ReplicateI imm)));
+  format %{ "vaddi.wu    $dst, $src, $imm\t# @add4I_imm" %}
+  ins_encode %{
+    __ vaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add2L(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVL src1 src2));
+  format %{ "vadd.d    $dst, $src1, $src2\t# @add2L" %}
+  ins_encode %{
+    __ vadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add2L_imm(vecX dst, vecX src, immLU5 imm) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVL src (ReplicateL imm)));
+  format %{ "vaddi.du    $dst, $src, $imm\t# @add2L_imm" %}
+  ins_encode %{
+    __ vaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add4F(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVF src1 src2));
+  format %{ "vfadd.s    $dst, $src1, $src2\t# @add4F" %}
+  ins_encode %{
+    __ vfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add2D(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVD src1 src2));
+  format %{ "vfadd.d    $dst, $src1, $src2\t# @add2D" %}
+  ins_encode %{
+    __ vfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add32B(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src1 src2));
+  format %{ "xvadd.b    $dst, $src1, $src2\t# @add32B" %}
+  ins_encode %{
+    __ xvadd_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add32B_imm(vecY dst, vecY src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src (ReplicateB imm)));
+  format %{ "xvaddi.bu    $dst, $src, $imm\t# @add32B_imm" %}
+  ins_encode %{
+    __ xvaddi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add16S(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src1 src2));
+  format %{ "xvadd.h    $dst, $src1, $src2\t# @add16S" %}
+  ins_encode %{
+    __ xvadd_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add16S_imm(vecY dst, vecY src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src (ReplicateS imm)));
+  format %{ "xvaddi.hu    $dst, $src, $imm\t# @add16S_imm" %}
+  ins_encode %{
+    __ xvaddi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add8I(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVI src1 src2));
+  format %{ "xvadd.wu    $dst, $src1, $src2\t# @add8I" %}
+  ins_encode %{
+    __ xvadd_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add8I_imm(vecY dst, vecY src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVI src (ReplicateI imm)));
+  format %{ "xvaddi.wu    $dst, $src, $imm\t# @add8I_imm" %}
+  ins_encode %{
+    __ xvaddi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add4L(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVL src1 src2));
+  format %{ "xvadd.d    $dst, $src1, $src2\t# @add4L" %}
+  ins_encode %{
+    __ xvadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add4L_imm(vecY dst, vecY src, immLU5 imm) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVL src (ReplicateL imm)));
+  format %{ "xvaddi.du    $dst, $src, $imm\t# @add4L_imm" %}
+  ins_encode %{
+    __ xvaddi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add8F(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVF src1 src2));
+  format %{ "xvfadd.s    $dst, $src1, $src2\t# @add8F" %}
+  ins_encode %{
+    __ xvfadd_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct add4D(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVD src1 src2));
+  format %{ "xvfadd.d    $dst, $src1, $src2\t# @add4D" %}
+  ins_encode %{
+    __ xvfadd_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- SUB --------------------------------------
+
+instruct sub16B(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vsub.b    $dst, $src1, $src2\t# @sub16B" %}
+  ins_encode %{
+    __ vsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub16B_imm(vecX dst, vecX src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src (ReplicateB imm)));
+  format %{ "vsubi.bu    $dst, $src, $imm\t# @sub16B_imm" %}
+  ins_encode %{
+    __ vsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub8S(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vsub.h    $dst, $src1, $src2\t# @sub8S" %}
+  ins_encode %{
+    __ vsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub8S_imm(vecX dst, vecX src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src (ReplicateS imm)));
+  format %{ "vsubi.hu    $dst, $src, $imm\t# @sub8S_imm" %}
+  ins_encode %{
+    __ vsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub4I(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVI src1 src2));
+  format %{ "vsub.w    $dst, $src1, src2\t# @sub4I" %}
+  ins_encode %{
+    __ vsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub4I_imm(vecX dst, vecX src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVI src (ReplicateI imm)));
+  format %{ "vsubi.wu    $dst, $src, $imm\t# @sub4I_imm" %}
+  ins_encode %{
+    __ vsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub2L(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVL src1 src2));
+  format %{ "vsub.d    $dst, $src1, $src2\t# @sub2L" %}
+  ins_encode %{
+    __ vsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub2L_imm(vecX dst, vecX src, immLU5 imm) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVL src (ReplicateL imm)));
+  format %{ "vsubi.du    $dst, $src, $imm\t# @sub2L_imm" %}
+  ins_encode %{
+    __ vsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub4F(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVF src1 src2));
+  format %{ "vfsub.s    $dst, $src1, $src2\t# @sub4F" %}
+  ins_encode %{
+    __ vfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub2D(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVD src1 src2));
+  format %{ "vfsub.d    $dst, $src1, $src2\t# @sub2D" %}
+  ins_encode %{
+    __ vfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub32B(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src1 src2));
+  format %{ "xvsub.b    $dst, $src1, $src2\t# @sub32B" %}
+  ins_encode %{
+    __ xvsub_b($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub32B_imm(vecY dst, vecY src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src (ReplicateB imm)));
+  format %{ "xvsubi.bu    $dst, $src, $imm\t# @sub32B_imm" %}
+  ins_encode %{
+    __ xvsubi_bu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub16S(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src1 src2));
+  format %{ "xvsub.h    $dst, $src1, $src2\t# @sub16S" %}
+  ins_encode %{
+    __ xvsub_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub16S_imm(vecY dst, vecY src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src (ReplicateS imm)));
+  format %{ "xvsubi.hu    $dst, $src, $imm\t# @sub16S_imm" %}
+  ins_encode %{
+    __ xvsubi_hu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub8I(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVI src1 src2));
+  format %{ "xvsub.w    $dst, $src1, $src2\t# @sub8I" %}
+  ins_encode %{
+    __ xvsub_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub8I_imm(vecY dst, vecY src, immIU5 imm) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVI src (ReplicateI imm)));
+  format %{ "xvsubi.wu    $dst, $src, $imm\t# @sub8I_imm" %}
+  ins_encode %{
+    __ xvsubi_wu($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub4L(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVL src1 src2));
+  format %{ "xvsub.d    $dst, $src1, $src2\t# @sub4L" %}
+  ins_encode %{
+    __ xvsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub4L_imm(vecY dst, vecY src, immLU5 imm) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVL src (ReplicateL imm)));
+  format %{ "xvsubi.du    $dst, $src, $imm\t# @sub4L_imm" %}
+  ins_encode %{
+    __ xvsubi_du($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub8F(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVF src1 src2));
+  format %{ "xvfsub.s    $dst, $src1, $src2\t# @sub8F" %}
+  ins_encode %{
+    __ xvfsub_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sub4D(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVD src1 src2));
+  format %{ "xvfsub.d    $dst,$src1,$src2\t# @sub4D" %}
+  ins_encode %{
+    __ xvfsub_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- MUL --------------------------------------
+instruct mul8S(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vmul.h    $dst, $src1, $src2\t# @mul8S" %}
+  ins_encode %{
+    __ vmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mul4I(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVI src1 src2));
+  format %{ "vmul.w    $dst, $src1, $src2\t# @mul4I" %}
+  ins_encode %{
+    __ vmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mul4F(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVF src1 src2));
+  format %{ "vfmul.s    $dst, $src1, $src2\t# @mul4F" %}
+  ins_encode %{
+    __ vfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mul2D(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVD src1 src2));
+  format %{ "vfmul.d    $dst, $src1, $src2\t# @mul2D" %}
+  ins_encode %{
+    __ vfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mul16S(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src1 src2));
+  format %{ "xvmul.h    $dst, $src1, $src2\t# @mul16S" %}
+  ins_encode %{
+    __ xvmul_h($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mul8I(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (MulVI src1 src2));
+  format %{ "xvmul.w    $dst, $src1, $src2\t# @mul8I" %}
+  ins_encode %{
+    __ xvmul_w($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mul8F(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (MulVF src1 src2));
+  format %{ "xvfmul.s    $dst, $src1, $src2\t# @mul8F" %}
+  ins_encode %{
+    __ xvfmul_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mul4D(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVD src1 src2));
+  format %{ "xvfmul.d    $dst, $src1, $src2\t# @mul4D" %}
+  ins_encode %{
+    __ xvfmul_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- DIV --------------------------------------
+instruct div4F(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (DivVF src1 src2));
+  format %{ "vfdiv.s    $dst, $src1, $src2\t# @div4F" %}
+  ins_encode %{
+    __ vfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct div2D(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (DivVD src1 src2));
+  format %{ "vfdiv.d    $dst, $src1, $src2\t# @div2D" %}
+  ins_encode %{
+    __ vfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct div8F(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (DivVF src1 src2));
+  format %{ "xvfdiv.s    $dst, $src1, $src2\t# @div8F" %}
+  ins_encode %{
+    __ xvfdiv_s($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct div4D(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (DivVD src1 src2));
+  format %{ "xvfdiv.d    $dst, $src1, $src2\t# @div4D" %}
+  ins_encode %{
+    __ xvfdiv_d($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ------------------------------ Shift ---------------------------------------
+
+instruct shiftcntX(vecX dst, mRegI cnt) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (LShiftCntV cnt));
+  match(Set dst (RShiftCntV cnt));
+  format %{ "vreplgr2vr.b    $dst, $cnt\t# @shiftcntX" %}
+  ins_encode %{
+    __ vreplgr2vr_b($dst$$FloatRegister, $cnt$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct shiftcntY(vecY dst, mRegI cnt) %{
+  predicate(n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (LShiftCntV cnt));
+  match(Set dst (RShiftCntV cnt));
+  format %{ "xvreplgr2vr.b    $dst, $cnt\t# @shiftcntY" %}
+  ins_encode %{
+    __ xvreplgr2vr_b($dst$$FloatRegister, $cnt$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ------------------------------ LeftShift -----------------------------------
+
+instruct sll16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVB src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vsll    $dst, $src, $shift\t# TEMP($tmp) @sll16B" %}
+  ins_encode %{
+    __ vsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
+    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll16B_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVB src shift));
+  format %{ "vslli.b    $dst, $src, $shift\t# @sll16B_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 8) {
+      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
+    } else {
+      __ vslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vsll    $dst, $src, $shift\t# TEMP($tmp) @sll8S" %}
+  ins_encode %{
+    __ vsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
+    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll8S_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vslli.h    $dst, $src, $shift\t# @sll8S_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 16) {
+      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
+    } else {
+      __ vslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll4I(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vsll.w    $dst, $src, $shift\t# @sll4I" %}
+  ins_encode %{
+    __ vsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll4I_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vslli.w    $dst, $src, $shift\t# @sll4I_imm" %}
+  ins_encode %{
+    __ vslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll2L(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vsll.d    $dst, $src, $shift\t# @sll2L" %}
+  ins_encode %{
+    __ vsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll2L_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vslli.d    $dst, $src, $shift\t# @sll2L_imm" %}
+  ins_encode %{
+    __ vslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (LShiftVB src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "xvsll    $dst, $src, $shift\t# TEMP($tmp) @sll32B" %}
+  ins_encode %{
+    __ xvsll_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
+    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll32B_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (LShiftVB src shift));
+  format %{ "xvslli.b    $dst, $src, $shift\t# @sll32B_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 8) {
+      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
+    } else {
+      __ xvslli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "xvsll    $dst, $src, $shift\t# TEMP($tmp) @sll16S" %}
+  ins_encode %{
+    __ xvsll_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
+    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll16S_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "xvslli.h    $dst, $src, $shift\t# @sll16S_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 16) {
+      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
+    } else {
+      __ xvslli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll8I(vecY dst, vecY src, vecY shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVI src shift));
+  format %{ "xvsll.w    $dst, $src, $shift\t# @sll8I" %}
+  ins_encode %{
+    __ xvsll_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll8I_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVI src shift));
+  format %{ "xvslli.w    $dst, $src, $shift\t# @sll8I_imm" %}
+  ins_encode %{
+    __ xvslli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll4L(vecY dst, vecY src, vecY shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVL src shift));
+  format %{ "xvsll.d    $dst, $src, $shift\t# @sll4L" %}
+  ins_encode %{
+    __ xvsll_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sll4L_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVL src shift));
+  format %{ "xvslli.d    $dst, $src, $shift\t# @sll4L_imm" %}
+  ins_encode %{
+    __ xvslli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ----------------------- LogicalRightShift ----------------------------------
+
+instruct srl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVB src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vsrl    $dst, $src, $shift\t# TEMP($tmp) @srl16B" %}
+  ins_encode %{
+    __ vsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
+    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl16B_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVB src shift));
+  format %{ "vsrli.b    $dst, $src, $shift\t# @srl16B_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 8) {
+      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
+    } else {
+      __ vsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "vsrl    $dst, $src, $shift\t# TEMP($tmp) @srl8S" %}
+  ins_encode %{
+    __ vsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+    __ vslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
+    __ vand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl8S_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vsrli.h    $dst, $src, $shift\t# @srl8S_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 16) {
+      __ vxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
+    } else {
+      __ vsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl4I(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vsrl.w    $dst, $src, $shift\t# @srl4I" %}
+  ins_encode %{
+    __ vsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl4I_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vsrli.w    $dst, $src, $shift\t# @srl4I_imm" %}
+  ins_encode %{
+    __ vsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl2L(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vsrl.d    $dst, $src, $shift\t# @srl2L" %}
+  ins_encode %{
+    __ vsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl2L_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vsrli.d    $dst, $src, $shift\t# @srl2L_imm" %}
+  ins_encode %{
+    __ vsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (URShiftVB src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "xvsrl    $dst, $src, $shift\t# TEMP($tmp) @srl32B" %}
+  ins_encode %{
+    __ xvsrl_b($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x8);
+    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl32B_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (URShiftVB src shift));
+  format %{ "xvsrli.b    $dst, $src, $shift\t# @srl32B_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 8) {
+      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
+    } else {
+      __ xvsrli_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS src shift));
+  effect(TEMP dst, TEMP tmp);
+  format %{ "xvsrl    $dst, $src, $shift\t# TEMP($tmp) @srl16S" %}
+  ins_encode %{
+    __ xvsrl_h($tmp$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+    __ xvslti_bu($dst$$FloatRegister, $shift$$FloatRegister, 0x10);
+    __ xvand_v($dst$$FloatRegister, $dst$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl16S_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS src shift));
+  format %{ "xvsrli.h    $dst, $src, $shift\t# @srl16S_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 16) {
+      __ xvxor_v($dst$$FloatRegister, $dst$$FloatRegister, $dst$$FloatRegister);
+    } else {
+      __ xvsrli_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl8I(vecY dst, vecY src, vecY shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVI src shift));
+  format %{ "xvsrl.w    $dst, $src, $shift\t# @srl8I" %}
+  ins_encode %{
+    __ xvsrl_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl8I_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVI src shift));
+  format %{ "xvsrli.w    $dst, $src, $shift\t# @srl8I_imm" %}
+  ins_encode %{
+    __ xvsrli_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl4L(vecY dst, vecY src, vecY shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVL src shift));
+  format %{ "xvsrl.d    $dst, $src, $shift\t# @srl4L" %}
+  ins_encode %{
+    __ xvsrl_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct srl4L_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVL src shift));
+  format %{ "xvsrli.d    $dst, $src, $shift\t# @srl4L_imm" %}
+  ins_encode %{
+    __ xvsrli_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ------------------------- ArithmeticRightShift -----------------------------
+
+instruct sra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVB src shift));
+  effect(TEMP tmp);
+  format %{ "vsra    $dst, $src, $shift\t# TEMP($tmp) @sra16B" %}
+  ins_encode %{
+    __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8);
+    __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
+    __ vsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra16B_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVB src shift));
+  format %{ "vsrai.b    $dst, $src, $shift\t# @sra16B_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 8) {
+      __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7);
+    } else {
+      __ vsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  effect(TEMP tmp);
+  format %{ "vsra    $dst, $src, $shift\t# TEMP($tmp) @sra8S" %}
+  ins_encode %{
+    __ vslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10);
+    __ vorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
+    __ vsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra8S_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vsrai.h    $dst, $src, $shift\t# @sra8S_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 16) {
+      __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15);
+    } else {
+      __ vsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra4I(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vsra.w    $dst, $src, $shift\t# @sra4I" %}
+  ins_encode %{
+    __ vsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra4I_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vsrai.w    $dst, $src, $shift\t# @sra4I_imm" %}
+  ins_encode %{
+    __ vsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra2L(vecX dst, vecX src, vecX shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVL src shift));
+  format %{ "vsra.d    $dst, $src, $shift\t# @sra2L" %}
+  ins_encode %{
+    __ vsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra2L_imm(vecX dst, vecX src, immI shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVL src shift));
+  format %{ "vsrai.d    $dst, $src, $shift\t# @sra2L_imm" %}
+  ins_encode %{
+    __ vsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra32B(vecY dst, vecY src, vecY shift, vecY tmp) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (RShiftVB src shift));
+  effect(TEMP tmp);
+  format %{ "xvsra    $dst, $src, $shift\t# TEMP($tmp) @sra32B" %}
+  ins_encode %{
+    __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x8);
+    __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
+    __ xvsra_b($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra32B_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (RShiftVB src shift));
+  format %{ "xvsrai.b    $dst, $src, $shift\t# @sra32B_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 8) {
+      __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, 7);
+    } else {
+      __ xvsrai_b($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra16S(vecY dst, vecY src, vecY shift, vecY tmp) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  effect(TEMP tmp);
+  format %{ "xvsra    $dst, $src, $shift\t# TEMP($tmp) @sra16S" %}
+  ins_encode %{
+    __ xvslti_bu($tmp$$FloatRegister, $shift$$FloatRegister, 0x10);
+    __ xvorn_v($tmp$$FloatRegister, $shift$$FloatRegister, $tmp$$FloatRegister);
+    __ xvsra_h($dst$$FloatRegister, $src$$FloatRegister, $tmp$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra16S_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "xvsrai.h    $dst, $src, $shift\t# @sra16S_imm" %}
+  ins_encode %{
+    if ($shift$$constant >= 16) {
+      __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, 15);
+    } else {
+      __ xvsrai_h($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra8I(vecY dst, vecY src, vecY shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVI src shift));
+  format %{ "xvsra.w    $dst, $src, $shift\t# @sra8I" %}
+  ins_encode %{
+    __ xvsra_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra8I_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVI src shift));
+  format %{ "xvsrai.w    $dst, $src, $shift\t# @sra8I_imm" %}
+  ins_encode %{
+    __ xvsrai_w($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra4L(vecY dst, vecY src, vecY shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVL src shift));
+  format %{ "xvsra.d    $dst, $src, $shift\t# @sra4L" %}
+  ins_encode %{
+    __ xvsra_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct sra4L_imm(vecY dst, vecY src, immI shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVL src shift));
+  format %{ "xvsrai.d    $dst, $src, $shift\t# @sra4L_imm" %}
+  ins_encode %{
+    __ xvsrai_d($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- AND --------------------------------------
+
+instruct andV16(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV src1 src2));
+  format %{ "vand.v    $dst, $src1, $src2\t# @andV16" %}
+  ins_encode %{
+    __ vand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct and16B_imm(vecX dst, vecX src, immIU8 imm) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (AndV src (ReplicateB imm)));
+  format %{ "vandi.b    $dst, $src, $imm\t# @and16B_imm" %}
+  ins_encode %{
+    __ vandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct andV32(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (AndV src1 src2));
+  format %{ "xvand.v    $dst, $src1, $src2\t# @andV32" %}
+  ins_encode %{
+    __ xvand_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct and32B_imm(vecY dst, vecY src, immIU8 imm) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (AndV src (ReplicateB imm)));
+  format %{ "xvandi.b    $dst, $src, $imm\t# @and32B_imm" %}
+  ins_encode %{
+    __ xvandi_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- OR ---------------------------------------
+
+instruct orV16(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV src1 src2));
+  format %{ "vor.v    $dst, $src1, $src2\t# @orV16" %}
+  ins_encode %{
+    __ vor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct or16B_imm(vecX dst, vecX src, immIU8 imm) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (OrV src (ReplicateB imm)));
+  format %{ "vori.b    $dst, $src, $imm\t# @or16B_imm" %}
+  ins_encode %{
+    __ vori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct orV32(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (OrV src1 src2));
+  format %{ "xvor.v    $dst, $src1, $src2\t# @orV32" %}
+  ins_encode %{
+    __ xvor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct or32B_imm(vecY dst, vecY src, immIU8 imm) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (OrV src (ReplicateB imm)));
+  format %{ "xvori.b    $dst, $src, $imm\t# @or32B_imm" %}
+  ins_encode %{
+    __ xvori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- XOR --------------------------------------
+
+instruct xorV16(vecX dst, vecX src1, vecX src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV src1 src2));
+  format %{ "vxor.v    $dst, $src1, $src2\t# @xorV16" %}
+  ins_encode %{
+    __ vxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct xor16B_imm(vecX dst, vecX src, immIU8 imm) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (XorV src (ReplicateB imm)));
+  format %{ "vxori.b    $dst, $src, $imm\t# @xor16B_imm" %}
+  ins_encode %{
+    __ vxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct xorV32(vecY dst, vecY src1, vecY src2) %{
+  predicate(n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (XorV src1 src2));
+  format %{ "xvxor.v    $dst, $src1, $src2\t# @xorV32" %}
+  ins_encode %{
+    __ xvxor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct xor32B_imm(vecX dst, vecX src, immIU8 imm) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (XorV src (ReplicateB imm)));
+  format %{ "xvxori.b    $dst, $src, $imm\t# @xor32B_imm" %}
+  ins_encode %{
+    __ xvxori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- NOR --------------------------------------
+
+instruct norV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV (OrV src1 src2) (ReplicateB m1)));
+  match(Set dst (XorV (OrV src1 src2) (ReplicateS m1)));
+  match(Set dst (XorV (OrV src1 src2) (ReplicateI m1)));
+  format %{ "vnor.v    $dst, $src1, $src2\t# @norV16" %}
+  ins_encode %{
+    __ vnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct nor16B_imm(vecX dst, vecX src, immIU8 imm, immI_M1 m1) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1)));
+  format %{ "vnori.b    $dst, $src, $imm\t# @nor16B_imm" %}
+  ins_encode %{
+    __ vnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct norV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
+  predicate(n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (XorV (OrV src1 src2) (ReplicateB m1)));
+  match(Set dst (XorV (OrV src1 src2) (ReplicateS m1)));
+  match(Set dst (XorV (OrV src1 src2) (ReplicateI m1)));
+  format %{ "xvnor.v    $dst, $src1, $src2\t# @norV32" %}
+  ins_encode %{
+    __ xvnor_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct nor32B_imm(vecY dst, vecY src, immIU8 imm, immI_M1 m1) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (XorV (OrV src (ReplicateB imm)) (ReplicateB m1)));
+  format %{ "xvnori.b    $dst, $src, $imm\t# @nor32B_imm" %}
+  ins_encode %{
+    __ xvnori_b($dst$$FloatRegister, $src$$FloatRegister, $imm$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- ANDN -------------------------------------
+
+instruct andnV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV src2 (XorV src1 (ReplicateB m1))));
+  match(Set dst (AndV src2 (XorV src1 (ReplicateS m1))));
+  match(Set dst (AndV src2 (XorV src1 (ReplicateI m1))));
+  format %{ "vandn.v    $dst, $src1, $src2\t# @andnV16" %}
+  ins_encode %{
+    __ vandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct andnV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
+  predicate(n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (AndV src2 (XorV src1 (ReplicateB m1))));
+  match(Set dst (AndV src2 (XorV src1 (ReplicateS m1))));
+  match(Set dst (AndV src2 (XorV src1 (ReplicateI m1))));
+  format %{ "xvandn.v    $dst, $src1, $src2\t# @andnV32" %}
+  ins_encode %{
+    __ xvandn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- ORN --------------------------------------
+
+instruct ornV16(vecX dst, vecX src1, vecX src2, immI_M1 m1) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV src1 (XorV src2 (ReplicateB m1))));
+  match(Set dst (OrV src1 (XorV src2 (ReplicateS m1))));
+  match(Set dst (OrV src1 (XorV src2 (ReplicateI m1))));
+  format %{ "vorn.v    $dst, $src1, $src2\t# @ornV16" %}
+  ins_encode %{
+    __ vorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct ornV32(vecY dst, vecY src1, vecY src2, immI_M1 m1) %{
+  predicate(n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (OrV src1 (XorV src2 (ReplicateB m1))));
+  match(Set dst (OrV src1 (XorV src2 (ReplicateS m1))));
+  match(Set dst (OrV src1 (XorV src2 (ReplicateI m1))));
+  format %{ "xvorn.v    $dst, $src1, $src2\t# @ornV32" %}
+  ins_encode %{
+    __ xvorn_v($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch ( root_instr_name [preceeding_instruction]* );
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+//  [, ...] );
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser.  An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == EAX_enc)
+// Only one replacement instruction
+//
+// ---------EXAMPLE----------------------------------------------------------
+//
+// // pertinent parts of existing instructions in architecture description
+// instruct movI(eRegI dst, eRegI src) %{
+//   match(Set dst (CopyI src));
+// %}
+//
+// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
+//   match(Set dst (AddI dst src));
+//   effect(KILL cr);
+// %}
+//
+// // Change (inc mov) to lea
+// peephole %{
+//   // increment preceeded by register-register move
+//   peepmatch ( incI_eReg movI );
+//   // require that the destination register of the increment
+//   // match the destination register of the move
+//   peepconstraint ( 0.dst == 1.dst );
+//   // construct a replacement instruction that sets
+//   // the destination to ( move's source register + one )
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// Implementation no longer uses movX instructions since
+// machine-independent system no longer uses CopyX nodes.
+//
+// peephole %{
+//   peepmatch ( incI_eReg movI );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// peephole %{
+//   peepmatch ( decI_eReg movI );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// peephole %{
+//   peepmatch ( addI_eReg_imm movI );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// peephole %{
+//   peepmatch ( addP_eReg_imm movP );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+
+// // Change load of spilled value to only a spill
+// instruct storeI(memory mem, eRegI src) %{
+//   match(Set mem (StoreI mem src));
+// %}
+//
+// instruct loadI(eRegI dst, memory mem) %{
+//   match(Set dst (LoadI mem));
+// %}
+//
+//peephole %{
+//  peepmatch ( loadI storeI );
+//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
+//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
+//%}
+
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+
diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp
new file mode 100644
index 00000000000..89295343ce0
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.cpp
@@ -0,0 +1,3895 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#ifdef COMPILER2
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+#endif
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+// Implementation of MacroAssembler
+
+intptr_t MacroAssembler::i[32] = {0};
+float MacroAssembler::f[32] = {0.0};
+
+void MacroAssembler::print(outputStream *s) {
+  unsigned int k;
+  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
+    s->print_cr("i%d = 0x%.16lx", k, i[k]);
+  }
+  s->cr();
+
+  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
+    s->print_cr("f%d = %f", k, f[k]);
+  }
+  s->cr();
+}
+
+int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
+int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
+
+void MacroAssembler::save_registers(MacroAssembler *masm) {
+#define __ masm->
+  for(int k=0; k<32; k++) {
+    __ st_w (as_Register(k), A0, i_offset(k));
+  }
+
+  for(int k=0; k<32; k++) {
+    __ fst_s (as_FloatRegister(k), A0, f_offset(k));
+  }
+#undef __
+}
+
+void MacroAssembler::restore_registers(MacroAssembler *masm) {
+#define __ masm->
+  for(int k=0; k<32; k++) {
+    __ ld_w (as_Register(k), A0, i_offset(k));
+  }
+
+  for(int k=0; k<32; k++) {
+    __ fld_s (as_FloatRegister(k), A0, f_offset(k));
+  }
+#undef __
+}
+
+
+void MacroAssembler::pd_patch_instruction(address branch, address target) {
+  jint& stub_inst = *(jint*)branch;
+  jint* pc = (jint*)branch;
+
+  if (high(stub_inst, 7) == pcaddu18i_op) {
+    // far:
+    //   pcaddu18i reg, si20
+    //   jirl  r0, reg, si18
+
+    assert(high(pc[1], 6) == jirl_op, "Not a branch label patch");
+    jlong offs = target - branch;
+    CodeBuffer cb(branch, 2 * BytesPerInstWord);
+    MacroAssembler masm(&cb);
+    if (reachable_from_branch_short(offs)) {
+      // convert far to short
+#define __ masm.
+      __ b(target);
+      __ nop();
+#undef __
+    } else {
+      masm.patchable_jump_far(R0, offs);
+    }
+    return;
+  } else if (high(stub_inst, 7) == pcaddi_op) {
+    // see MacroAssembler::set_last_Java_frame:
+    //   pcaddi reg, si20
+
+    jint offs = (target - branch) >> 2;
+    guarantee(is_simm(offs, 20), "Not signed 20-bit offset");
+    CodeBuffer cb(branch, 1 * BytesPerInstWord);
+    MacroAssembler masm(&cb);
+    masm.pcaddi(as_Register(low(stub_inst, 5)), offs);
+    return;
+  }
+
+  stub_inst = patched_branch(target - branch, stub_inst, 0);
+}
+
+bool MacroAssembler::reachable_from_branch_short(jlong offs) {
+  if (ForceUnreachable) {
+    return false;
+  }
+  return is_simm(offs >> 2, 26);
+}
+
+void MacroAssembler::patchable_jump_far(Register ra, jlong offs) {
+  jint si18, si20;
+  guarantee(is_simm(offs, 38), "Not signed 38-bit offset");
+  split_simm38(offs, si18, si20);
+  pcaddu18i(T4, si20);
+  jirl(ra, T4, si18);
+}
+
+void MacroAssembler::patchable_jump(address target, bool force_patchable) {
+  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
+  assert(CodeCache::find_blob(target) != NULL,
+         "destination of jump not found in code cache");
+  if (force_patchable || patchable_branches()) {
+    jlong offs = target - pc();
+    if (reachable_from_branch_short(offs)) { // Short jump
+      b(offset26(target));
+      nop();
+    } else {                                 // Far jump
+      patchable_jump_far(R0, offs);
+    }
+  } else {                                   // Real short jump
+    b(offset26(target));
+  }
+}
+
+void MacroAssembler::patchable_call(address target, address call_site) {
+  jlong offs = target - (call_site ? call_site : pc());
+  if (reachable_from_branch_short(offs - BytesPerInstWord)) { // Short call
+    nop();
+    bl((offs - BytesPerInstWord) >> 2);
+  } else {                                                    // Far call
+    patchable_jump_far(RA, offs);
+  }
+}
+
+// Maybe emit a call via a trampoline.  If the code cache is small
+// trampolines won't be emitted.
+
+address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
+  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
+  assert(entry.rspec().type() == relocInfo::runtime_call_type
+         || entry.rspec().type() == relocInfo::opt_virtual_call_type
+         || entry.rspec().type() == relocInfo::static_call_type
+         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
+
+  // We need a trampoline if branches are far.
+  if (far_branches()) {
+    bool in_scratch_emit_size = false;
+#ifdef COMPILER2
+    // We don't want to emit a trampoline if C2 is generating dummy
+    // code during its branch shortening phase.
+    CompileTask* task = ciEnv::current()->task();
+    in_scratch_emit_size =
+      (task != NULL && is_c2_compile(task->comp_level()) &&
+       Compile::current()->in_scratch_emit_size());
+#endif
+    if (!in_scratch_emit_size) {
+      address stub = emit_trampoline_stub(offset(), entry.target());
+      if (stub == NULL) {
+        return NULL; // CodeCache is full
+      }
+    }
+  }
+
+  if (cbuf) cbuf->set_insts_mark();
+  relocate(entry.rspec());
+  if (!far_branches()) {
+    bl(entry.target());
+  } else {
+    bl(pc());
+  }
+  // just need to return a non-null address
+  return pc();
+}
+
+// Emit a trampoline stub for a call to a target which is too far away.
+//
+// code sequences:
+//
+// call-site:
+//   branch-and-link to <destination> or <trampoline stub>
+//
+// Related trampoline stub for this call site in the stub section:
+//   load the call target from the constant pool
+//   branch (RA still points to the call site above)
+
+address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
+                                             address dest) {
+  // Start the stub
+  address stub = start_a_stub(NativeInstruction::nop_instruction_size
+                   + NativeCallTrampolineStub::instruction_size);
+  if (stub == NULL) {
+    return NULL;  // CodeBuffer::expand failed
+  }
+
+  // Create a trampoline stub relocation which relates this trampoline stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+  align(wordSize);
+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
+                                            + insts_call_instruction_offset));
+  const int stub_start_offset = offset();
+
+  // Now, create the trampoline stub's code:
+  // - load the call
+  // - call
+  pcaddi(T4, 0);
+  ld_d(T4, T4, 16);
+  jr(T4);
+  nop();  //align
+  assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
+         "should be");
+  emit_int64((int64_t)dest);
+
+  const address stub_start_addr = addr_at(stub_start_offset);
+
+  NativeInstruction* ni = nativeInstruction_at(stub_start_addr);
+  assert(ni->is_NativeCallTrampolineStub_at(), "doesn't look like a trampoline");
+
+  end_a_stub();
+  return stub_start_addr;
+}
+
+void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
+  if (is_simm16((entry - pc()) >> 2)) { // Short jump
+    beq(rs, rt, offset16(entry));
+  } else {                              // Far jump
+    Label not_jump;
+    bne(rs, rt, not_jump);
+    b_far(entry);
+    bind(not_jump);
+  }
+}
+
+void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
+  if (L.is_bound()) {
+    beq_far(rs, rt, target(L));
+  } else {
+    Label not_jump;
+    bne(rs, rt, not_jump);
+    b_far(L);
+    bind(not_jump);
+  }
+}
+
+void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
+  if (is_simm16((entry - pc()) >> 2)) { // Short jump
+    bne(rs, rt, offset16(entry));
+  } else {                              // Far jump
+    Label not_jump;
+    beq(rs, rt, not_jump);
+    b_far(entry);
+    bind(not_jump);
+  }
+}
+
+void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
+  if (L.is_bound()) {
+    bne_far(rs, rt, target(L));
+  } else {
+    Label not_jump;
+    beq(rs, rt, not_jump);
+    b_far(L);
+    bind(not_jump);
+  }
+}
+
+void MacroAssembler::blt_far(Register rs, Register rt, address entry, bool is_signed) {
+  if (is_simm16((entry - pc()) >> 2)) { // Short jump
+    if (is_signed) {
+      blt(rs, rt, offset16(entry));
+    } else {
+      bltu(rs, rt, offset16(entry));
+    }
+  } else {                              // Far jump
+    Label not_jump;
+    if (is_signed) {
+      bge(rs, rt, not_jump);
+    } else {
+      bgeu(rs, rt, not_jump);
+    }
+    b_far(entry);
+    bind(not_jump);
+  }
+}
+
+void MacroAssembler::blt_far(Register rs, Register rt, Label& L, bool is_signed) {
+  if (L.is_bound()) {
+    blt_far(rs, rt, target(L), is_signed);
+  } else {
+    Label not_jump;
+    if (is_signed) {
+      bge(rs, rt, not_jump);
+    } else {
+      bgeu(rs, rt, not_jump);
+    }
+    b_far(L);
+    bind(not_jump);
+  }
+}
+
+void MacroAssembler::bge_far(Register rs, Register rt, address entry, bool is_signed) {
+  if (is_simm16((entry - pc()) >> 2)) { // Short jump
+    if (is_signed) {
+      bge(rs, rt, offset16(entry));
+    } else {
+      bgeu(rs, rt, offset16(entry));
+    }
+  } else {                              // Far jump
+    Label not_jump;
+    if (is_signed) {
+      blt(rs, rt, not_jump);
+    } else {
+      bltu(rs, rt, not_jump);
+    }
+    b_far(entry);
+    bind(not_jump);
+  }
+}
+
+void MacroAssembler::bge_far(Register rs, Register rt, Label& L, bool is_signed) {
+  if (L.is_bound()) {
+    bge_far(rs, rt, target(L), is_signed);
+  } else {
+    Label not_jump;
+    if (is_signed) {
+      blt(rs, rt, not_jump);
+    } else {
+      bltu(rs, rt, not_jump);
+    }
+    b_far(L);
+    bind(not_jump);
+  }
+}
+
+void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
+  Label not_taken;
+  bne(rs, rt, not_taken);
+  jmp_far(L);
+  bind(not_taken);
+}
+
+void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
+  Label not_taken;
+  beq(rs, rt, not_taken);
+  jmp_far(L);
+  bind(not_taken);
+}
+
+void MacroAssembler::blt_long(Register rs, Register rt, Label& L, bool is_signed) {
+  Label not_taken;
+  if (is_signed) {
+    bge(rs, rt, not_taken);
+  } else {
+    bgeu(rs, rt, not_taken);
+  }
+  jmp_far(L);
+  bind(not_taken);
+}
+
+void MacroAssembler::bge_long(Register rs, Register rt, Label& L, bool is_signed) {
+  Label not_taken;
+  if (is_signed) {
+    blt(rs, rt, not_taken);
+  } else {
+    bltu(rs, rt, not_taken);
+  }
+  jmp_far(L);
+  bind(not_taken);
+}
+
+void MacroAssembler::bc1t_long(Label& L) {
+  Label not_taken;
+  bceqz(FCC0, not_taken);
+  jmp_far(L);
+  bind(not_taken);
+}
+
+void MacroAssembler::bc1f_long(Label& L) {
+  Label not_taken;
+  bcnez(FCC0, not_taken);
+  jmp_far(L);
+  bind(not_taken);
+}
+
+void MacroAssembler::b_far(Label& L) {
+  if (L.is_bound()) {
+    b_far(target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    if (ForceUnreachable) {
+      patchable_jump_far(R0, 0);
+    } else {
+      b(0);
+    }
+  }
+}
+
+void MacroAssembler::b_far(address entry) {
+  jlong offs = entry - pc();
+  if (reachable_from_branch_short(offs)) { // Short jump
+    b(offset26(entry));
+  } else {                                 // Far jump
+    patchable_jump_far(R0, offs);
+  }
+}
+
+void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
+  ldx_d(rt, base, offset);
+}
+
+void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
+  stx_d(rt, base, offset);
+}
+
+void MacroAssembler::ld_long(Register rt, Register offset, Register base) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+#if 0
+  add_d(AT, base, offset);
+  ld_long(rt, 0, AT);
+#endif
+}
+
+void MacroAssembler::st_long(Register rt, Register offset, Register base) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+#if 0
+  add_d(AT, base, offset);
+  st_long(rt, 0, AT);
+#endif
+}
+
+Address MacroAssembler::as_Address(AddressLiteral adr) {
+  return Address(adr.target(), adr.rspec());
+}
+
+Address MacroAssembler::as_Address(ArrayAddress adr) {
+  return Address::make_array(adr);
+}
+
+// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
+void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
+  li(tmp_reg1, inc);
+  li(tmp_reg2, counter_addr);
+  amadd_w(R0, tmp_reg1, tmp_reg2);
+}
+
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  bool need_tmp_reg = false;
+  if (tmp_reg == noreg) {
+    need_tmp_reg = true;
+    tmp_reg = T4;
+  }
+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+  Address saved_mark_addr(lock_reg, 0);
+
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  // First check to see whether biasing is even enabled for this object
+  Label cas_label;
+  int null_check_offset = -1;
+  if (!swap_reg_contains_mark) {
+    null_check_offset = offset();
+    ld_ptr(swap_reg, mark_addr);
+  }
+
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  move(tmp_reg, swap_reg);
+  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  addi_d(AT, R0, markOopDesc::biased_lock_pattern);
+  sub_d(AT, AT, tmp_reg);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+
+  bne(AT, R0, cas_label);
+
+
+  // The bias pattern is present in the object's header. Need to check
+  // whether the bias owner and the epoch are both still current.
+  // Note that because there is no current thread register on LA we
+  // need to store off the mark word we read out of the object to
+  // avoid reloading it and needing to recheck invariants below. This
+  // store is unfortunate but it makes the overall code shorter and
+  // simpler.
+  st_ptr(swap_reg, saved_mark_addr);
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  if (swap_reg_contains_mark) {
+    null_check_offset = offset();
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+  xorr(tmp_reg, tmp_reg, swap_reg);
+  get_thread(swap_reg);
+  xorr(swap_reg, swap_reg, tmp_reg);
+
+  li(AT, ~((int) markOopDesc::age_mask_in_place));
+  andr(swap_reg, swap_reg, AT);
+
+  if (PrintBiasedLockingStatistics) {
+    Label L;
+    bne(swap_reg, R0, L);
+    push(tmp_reg);
+    push(A0);
+    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
+    pop(A0);
+    pop(tmp_reg);
+    bind(L);
+  }
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  beq(swap_reg, R0, done);
+  Label try_revoke_bias;
+  Label try_rebias;
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+
+  li(AT, markOopDesc::biased_lock_mask_in_place);
+  andr(AT, swap_reg, AT);
+  bne(AT, R0, try_revoke_bias);
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+
+  li(AT, markOopDesc::epoch_mask_in_place);
+  andr(AT,swap_reg, AT);
+  bne(AT, R0, try_rebias);
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+
+  ld_ptr(swap_reg, saved_mark_addr);
+
+  li(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+  andr(swap_reg, swap_reg, AT);
+
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  get_thread(tmp_reg);
+  orr(tmp_reg, tmp_reg, swap_reg);
+  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+  if (PrintBiasedLockingStatistics) {
+    Label L;
+    bne(AT, R0, L);
+    push(tmp_reg);
+    push(A0);
+    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
+    pop(A0);
+    pop(tmp_reg);
+    bind(L);
+  }
+  if (slow_case != NULL) {
+    beq_far(AT, R0, *slow_case);
+  }
+  b(done);
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+  get_thread(swap_reg);
+  orr(tmp_reg, tmp_reg, swap_reg);
+  ld_ptr(swap_reg, saved_mark_addr);
+
+  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, then another thread
+  // succeeded in biasing it toward itself and we need to revoke that
+  // bias. The revocation will occur in the runtime in the slow case.
+  if (PrintBiasedLockingStatistics) {
+    Label L;
+    bne(AT, R0, L);
+    push(AT);
+    push(tmp_reg);
+    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
+    pop(tmp_reg);
+    pop(AT);
+    bind(L);
+  }
+  if (slow_case != NULL) {
+    beq_far(AT, R0, *slow_case);
+  }
+
+  b(done);
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  ld_ptr(swap_reg, saved_mark_addr);
+
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // Fall through to the normal CAS-based lock, because no matter what
+  // the result of the above CAS, some thread must have succeeded in
+  // removing the bias bit from the object's header.
+  if (PrintBiasedLockingStatistics) {
+    Label L;
+    bne(AT, R0, L);
+    push(AT);
+    push(tmp_reg);
+    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
+    pop(tmp_reg);
+    pop(AT);
+    bind(L);
+  }
+
+  bind(cas_label);
+  return null_check_offset;
+}
+
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+
+  // Check for biased locking unlock case, which is a no-op
+  // Note: we do not have to check the thread ID for two reasons.
+  // First, the interpreter checks for IllegalMonitorStateException at
+  // a higher level. Second, if the bias was revoked while we held the
+  // lock, the object could not be rebiased toward another thread, so
+  // the bias bit would be clear.
+  ld_d(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
+  addi_d(AT, R0, markOopDesc::biased_lock_pattern);
+
+  beq(AT, temp_reg, done);
+}
+
+// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
+// this method will handle the stack problem, you need not to preserve the stack space for the argument now
+void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
+  Label L, E;
+
+  assert(number_of_arguments <= 4, "just check");
+
+  andi(AT, SP, 0xf);
+  beq(AT, R0, L);
+  addi_d(SP, SP, -8);
+  call(entry_point, relocInfo::runtime_call_type);
+  addi_d(SP, SP, 8);
+  b(E);
+
+  bind(L);
+  call(entry_point, relocInfo::runtime_call_type);
+  bind(E);
+}
+
+
+void MacroAssembler::jmp(address entry) {
+  jlong offs = entry - pc();
+  if (reachable_from_branch_short(offs)) { // Short jump
+    b(offset26(entry));
+  } else {                                 // Far jump
+    patchable_jump_far(R0, offs);
+  }
+}
+
+void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
+  switch (rtype) {
+    case relocInfo::none:
+      jmp(entry);
+      break;
+    default:
+      {
+        InstructionMark im(this);
+        relocate(rtype);
+        patchable_jump(entry);
+      }
+      break;
+  }
+}
+
+void MacroAssembler::jmp_far(Label& L) {
+  if (L.is_bound()) {
+    assert(target(L) != NULL, "jmp most probably wrong");
+    patchable_jump(target(L), true /* force patchable */);
+  } else {
+    L.add_patch_at(code(), locator());
+    patchable_jump_far(R0, 0);
+  }
+}
+
+void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
+  int oop_index;
+  if (obj) {
+    oop_index = oop_recorder()->find_index(obj);
+  } else {
+    oop_index = oop_recorder()->allocate_metadata_index(obj);
+  }
+  relocate(metadata_Relocation::spec(oop_index));
+  patchable_li52(AT, (long)obj);
+  st_d(AT, dst);
+}
+
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
+  int oop_index;
+  if (obj) {
+    oop_index = oop_recorder()->find_index(obj);
+  } else {
+    oop_index = oop_recorder()->allocate_metadata_index(obj);
+  }
+  relocate(metadata_Relocation::spec(oop_index));
+  patchable_li52(dst, (long)obj);
+}
+
+void MacroAssembler::call(address entry) {
+  jlong offs = entry - pc();
+  if (reachable_from_branch_short(offs)) { // Short call (pc-rel)
+    bl(offset26(entry));
+  } else if (is_simm(offs, 38)) {          // Far call (pc-rel)
+    patchable_jump_far(RA, offs);
+  } else {                                 // Long call (absolute)
+    call_long(entry);
+  }
+}
+
+void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
+  switch (rtype) {
+    case relocInfo::none:
+      call(entry);
+      break;
+    case relocInfo::runtime_call_type:
+      if (!is_simm(entry - pc(), 38)) {
+        call_long(entry);
+        break;
+      }
+      // fallthrough
+    default:
+      {
+        InstructionMark im(this);
+        relocate(rtype);
+        patchable_call(entry);
+      }
+      break;
+  }
+}
+
+void MacroAssembler::call(address entry, RelocationHolder& rh) {
+  switch (rh.type()) {
+    case relocInfo::none:
+      call(entry);
+      break;
+    case relocInfo::runtime_call_type:
+      if (!is_simm(entry - pc(), 38)) {
+        call_long(entry);
+        break;
+      }
+      // fallthrough
+    default:
+      {
+        InstructionMark im(this);
+        relocate(rh);
+        patchable_call(entry);
+      }
+      break;
+  }
+}
+
+void MacroAssembler::call_long(address entry) {
+  jlong value = (jlong)entry;
+  lu12i_w(T4, split_low20(value >> 12));
+  lu32i_d(T4, split_low20(value >> 32));
+  jirl(RA, T4, split_low12(value));
+}
+
+address MacroAssembler::ic_call(address entry) {
+  RelocationHolder rh = virtual_call_Relocation::spec(pc());
+  patchable_li52(IC_Klass, (long)Universe::non_oop_word());
+  assert(entry != NULL, "call most probably wrong");
+  InstructionMark im(this);
+  return trampoline_call(AddressLiteral(entry, rh));
+}
+
+void MacroAssembler::c2bool(Register r) {
+  sltu(r, R0, r);
+}
+
+#ifndef PRODUCT
+extern "C" void findpc(intptr_t x);
+#endif
+
+void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
+  if ( ShowMessageBoxOnError ) {
+    JavaThreadState saved_state = JavaThread::current()->thread_state();
+    JavaThread::current()->set_thread_state(_thread_in_vm);
+    {
+      // In order to get locks work, we need to fake a in_VM state
+      ttyLocker ttyl;
+      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
+      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+  BytecodeCounter::print();
+      }
+
+    }
+    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
+  }
+  else
+    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
+}
+
+
+void MacroAssembler::stop(const char* msg) {
+  li(A0, (long)msg);
+  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
+  brk(17);
+}
+
+void MacroAssembler::warn(const char* msg) {
+  pushad();
+  li(A0, (long)msg);
+  push(S2);
+  li(AT, -(StackAlignmentInBytes));
+  move(S2, SP);     // use S2 as a sender SP holder
+  andr(SP, SP, AT); // align stack as required by ABI
+  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
+  move(SP, S2);     // use S2 as a sender SP holder
+  pop(S2);
+  popad();
+}
+
+void MacroAssembler::increment(Register reg, int imm) {
+  if (!imm) return;
+  if (is_simm(imm, 12)) {
+    addi_d(reg, reg, imm);
+  } else {
+    li(AT, imm);
+    add_d(reg, reg, AT);
+  }
+}
+
+void MacroAssembler::decrement(Register reg, int imm) {
+  increment(reg, -imm);
+}
+
+void MacroAssembler::increment(Address addr, int imm) {
+  if (!imm) return;
+  assert(is_simm(imm, 12), "must be");
+  ld_ptr(AT, addr);
+  addi_d(AT, AT, imm);
+  st_ptr(AT, addr);
+}
+
+void MacroAssembler::decrement(Address addr, int imm) {
+  increment(addr, -imm);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             bool check_exceptions) {
+  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  if (arg_1!=A1) move(A1, arg_1);
+  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+  if (arg_1!=A1) move(A1, arg_1);
+  if (arg_2!=A2) move(A2, arg_2);
+  assert(arg_2 != A1, "smashed argument");
+  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+  if (arg_1!=A1) move(A1, arg_1);
+  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
+  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
+  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             int number_of_arguments,
+                             bool check_exceptions) {
+  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  if (arg_1 != A1) move(A1, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+  if (arg_1 != A1) move(A1, arg_1);
+  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
+  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+  if (arg_1 != A1) move(A1, arg_1);
+  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
+  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
+  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+                                  Register java_thread,
+                                  Register last_java_sp,
+                                  address  entry_point,
+                                  int      number_of_arguments,
+                                  bool     check_exceptions) {
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+#ifndef OPT_THREAD
+    java_thread = T2;
+    get_thread(java_thread);
+#else
+    java_thread = TREG;
+#endif
+  }
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = SP;
+  }
+  // debugging support
+  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
+  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
+  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
+  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
+
+  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
+
+  // set last Java frame before call
+  Label before_call;
+  bind(before_call);
+  set_last_Java_frame(java_thread, last_java_sp, FP, before_call);
+
+  // do the call
+  move(A0, java_thread);
+  call(entry_point, relocInfo::runtime_call_type);
+
+  // restore the thread (cannot use the pushed argument since arguments
+  // may be overwritten by C code generated by an optimizing compiler);
+  // however can use the register value directly if it is callee saved.
+#ifndef OPT_THREAD
+  get_thread(java_thread);
+#else
+#ifdef ASSERT
+  {
+    Label L;
+    get_thread(AT);
+    beq(java_thread, AT, L);
+    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
+    bind(L);
+  }
+#endif
+#endif
+
+  // discard thread and arguments
+  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // reset last Java frame
+  reset_last_Java_frame(java_thread, false);
+
+  check_and_handle_popframe(java_thread);
+  check_and_handle_earlyret(java_thread);
+  if (check_exceptions) {
+    // check for pending exceptions (java_thread is set upon return)
+    Label L;
+    ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
+    beq(AT, R0, L);
+    li(AT, target(before_call));
+    push(AT);
+    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+    bind(L);
+  }
+
+  // get oop result if there is one and reset the value in the thread
+  if (oop_result->is_valid()) {
+    ld_d(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
+    st_d(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
+    verify_oop(oop_result);
+  }
+}
+
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+  move(V0, SP);
+  //we also reserve space for java_thread here
+  li(AT, -(StackAlignmentInBytes));
+  andr(SP, SP, AT);
+  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
+  call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
+  if (arg_0 != A0) move(A0, arg_0);
+  call_VM_leaf(entry_point, 1);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+  if (arg_0 != A0) move(A0, arg_0);
+  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
+  call_VM_leaf(entry_point, 2);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+  if (arg_0 != A0) move(A0, arg_0);
+  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
+  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
+  call_VM_leaf(entry_point, 3);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point) {
+  MacroAssembler::call_VM_leaf_base(entry_point, 0);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point,
+                                                   Register arg_1) {
+  if (arg_1 != A0) move(A0, arg_1);
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point,
+                                                   Register arg_1,
+                                                   Register arg_2) {
+  if (arg_1 != A0) move(A0, arg_1);
+  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
+  MacroAssembler::call_VM_leaf_base(entry_point, 2);
+}
+
+void MacroAssembler::super_call_VM_leaf(address entry_point,
+                                                   Register arg_1,
+                                                   Register arg_2,
+                                                   Register arg_3) {
+  if (arg_1 != A0) move(A0, arg_1);
+  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
+  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
+  MacroAssembler::call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
+}
+
+void MacroAssembler::check_and_handle_popframe(Register java_thread) {
+}
+
+void MacroAssembler::null_check(Register reg, int offset) {
+  if (needs_explicit_null_check(offset)) {
+    // provoke OS NULL exception if reg = NULL by
+    // accessing M[reg] w/o changing any (non-CC) registers
+    // NOTE: cmpl is plenty here to provoke a segv
+    ld_w(AT, reg, 0);
+  } else {
+    // nothing to do, (later) access of M[reg + offset]
+    // will provoke OS NULL exception if reg = NULL
+  }
+}
+
+void MacroAssembler::enter() {
+  push2(RA, FP);
+  move(FP, SP);
+}
+
+void MacroAssembler::leave() {
+  move(SP, FP);
+  pop2(RA, FP);
+}
+
+void MacroAssembler::build_frame(int framesize) {
+  assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA");
+  assert(framesize % (2 * wordSize) == 0, "must preserve 2 * wordSize alignment");
+  if (Assembler::is_simm(-framesize, 12)) {
+    addi_d(SP, SP, -framesize);
+    st_ptr(FP, Address(SP, framesize - 2 * wordSize));
+    st_ptr(RA, Address(SP, framesize - 1 * wordSize));
+    if (PreserveFramePointer)
+      addi_d(FP, SP, framesize - 2 * wordSize);
+  } else {
+    addi_d(SP, SP, -2 * wordSize);
+    st_ptr(FP, Address(SP, 0 * wordSize));
+    st_ptr(RA, Address(SP, 1 * wordSize));
+    if (PreserveFramePointer)
+      move(FP, SP);
+    li(SCR1, framesize - 2 * wordSize);
+    sub_d(SP, SP, SCR1);
+  }
+}
+
+void MacroAssembler::remove_frame(int framesize) {
+  assert(framesize >= 2 * wordSize, "framesize must include space for FP/RA");
+  assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
+  if (Assembler::is_simm(framesize, 12)) {
+    ld_ptr(FP, Address(SP, framesize - 2 * wordSize));
+    ld_ptr(RA, Address(SP, framesize - 1 * wordSize));
+    addi_d(SP, SP, framesize);
+  } else {
+    li(SCR1, framesize - 2 * wordSize);
+    add_d(SP, SP, SCR1);
+    ld_ptr(FP, Address(SP, 0 * wordSize));
+    ld_ptr(RA, Address(SP, 1 * wordSize));
+    addi_d(SP, SP, 2 * wordSize);
+  }
+}
+
+void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+#ifndef OPT_THREAD
+    java_thread = T1;
+    get_thread(java_thread);
+#else
+    java_thread = TREG;
+#endif
+  }
+  // we must set sp to zero to clear frame
+  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // must clear fp, so that compiled frames are not confused; it is possible
+  // that we need it only for debugging
+  if(clear_fp) {
+    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
+  }
+
+  // Always clear the pc because it could have been set by make_walkable()
+  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
+}
+
+void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
+  Register thread = TREG;
+#ifndef OPT_THREAD
+  get_thread(thread);
+#endif
+  // we must set sp to zero to clear frame
+  st_d(R0, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // must clear fp, so that compiled frames are not confused; it is
+  // possible that we need it only for debugging
+  if (clear_fp) {
+    st_d(R0, thread, in_bytes(JavaThread::last_Java_fp_offset()));
+  }
+
+  // Always clear the pc because it could have been set by make_walkable()
+  st_d(R0, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+}
+
+// Write serialization page so VM thread can do a pseudo remote membar.
+// We use the current thread pointer to calculate a thread specific
+// offset to write to within the page. This minimizes bus traffic
+// due to cache line collision.
+void MacroAssembler::serialize_memory(Register thread, Register tmp) {
+  assert_different_registers(AT, tmp);
+  juint sps = os::get_serialize_page_shift_count();
+  juint lsb = sps + 2;
+  juint msb = sps + log2_uint(os::vm_page_size()) - 1;
+  bstrpick_w(AT, thread, msb, lsb);
+  li(tmp, os::get_memory_serialize_page());
+  alsl_d(tmp, AT, tmp, Address::times_2 - 1);
+  st_w(R0, tmp, 0);
+}
+
+// Calls to C land
+//
+// When entering C land, the fp, & sp of the last Java frame have to be recorded
+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
+// has to be reset to 0. This is required to allow proper stack traversal.
+void MacroAssembler::set_last_Java_frame(Register java_thread,
+                                         Register last_java_sp,
+                                         Register last_java_fp,
+                                         Label& last_java_pc) {
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+#ifndef OPT_THREAD
+    java_thread = T2;
+    get_thread(java_thread);
+#else
+    java_thread = TREG;
+#endif
+  }
+
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = SP;
+  }
+
+  // last_java_fp is optional
+  if (last_java_fp->is_valid()) {
+    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
+  }
+
+  // last_java_pc
+  lipc(AT, last_java_pc);
+  st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() +
+                                   JavaFrameAnchor::last_Java_pc_offset()));
+
+  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+}
+
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+                                         Register last_java_fp,
+                                         Label& last_java_pc) {
+  set_last_Java_frame(NOREG, last_java_sp, last_java_fp, last_java_pc);
+}
+
+//////////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+                                          Register pre_val,
+                                          Register thread,
+                                          Register tmp,
+                                          bool tosca_live,
+                                          bool expand_call) {
+
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+  assert(thread == TREG, "must be");
+
+  Label done;
+  Label runtime;
+
+  assert(pre_val != noreg, "check this code");
+
+  if (obj != noreg) {
+    assert_different_registers(obj, pre_val, tmp);
+    assert(pre_val != V0, "check this code");
+  }
+
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+  // Is marking active?
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    ld_w(AT, in_progress);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    ld_b(AT, in_progress);
+  }
+  beqz(AT, done);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    load_heap_oop(pre_val, Address(obj, 0));
+  }
+
+  // Is the previous value null?
+  beqz(pre_val, done);
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  ld_d(tmp, index);
+  beqz(tmp, runtime);
+
+  addi_d(tmp, tmp, -1 * wordSize);
+  st_d(tmp, index);
+  ld_d(AT, buffer);
+
+  // Record the previous value
+  stx_d(pre_val, tmp, AT);
+  b(done);
+
+  bind(runtime);
+  // save the live input values
+  if (tosca_live) push(V0);
+
+  if (obj != noreg && obj != V0) push(obj);
+
+  if (pre_val != V0) push(pre_val);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then fp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+
+  if (expand_call) {
+    assert(pre_val != A1, "smashed arg");
+    if (thread != A1) move(A1, thread);
+    if (pre_val != A0) move(A0, pre_val);
+    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+  } else {
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  }
+
+  // save the live input values
+  if (pre_val != V0)
+    pop(pre_val);
+
+  if (obj != noreg && obj != V0)
+    pop(obj);
+
+  if(tosca_live) pop(V0);
+
+  bind(done);
+}
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+                                           Register new_val,
+                                           Register thread,
+                                           Register tmp,
+                                           Register tmp2) {
+  assert(tmp  != AT, "must be");
+  assert(tmp2 != AT, "must be");
+  assert(thread == TREG, "must be");
+
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+  xorr(AT, store_addr, new_val);
+  srli_d(AT, AT, HeapRegion::LogOfHRGrainBytes);
+  beqz(AT, done);
+
+
+  // crosses regions, storing NULL?
+  beq(new_val, R0, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+  const Register card_addr = tmp;
+  const Register cardtable = tmp2;
+
+  move(card_addr, store_addr);
+  srli_d(card_addr, card_addr, CardTableModRefBS::card_shift);
+  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
+  // a valid address and therefore is not properly handled by the relocation code.
+  li(cardtable, (intptr_t)ct->byte_map_base);
+  add_d(card_addr, card_addr, cardtable);
+
+  ld_b(AT, card_addr, 0);
+  addi_d(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
+  beqz(AT, done);
+
+  membar(StoreLoad);
+  ld_b(AT, card_addr, 0);
+  addi_d(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
+  beqz(AT, done);
+
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+  li(AT, (int)CardTableModRefBS::dirty_card_val());
+  st_b(AT, card_addr, 0);
+
+  ld_w(AT, queue_index);
+  beqz(AT, runtime);
+  addi_d(AT, AT, -1 * wordSize);
+  st_w(AT, queue_index);
+  ld_d(tmp2, buffer);
+  ld_d(AT, queue_index);
+  stx_d(card_addr, tmp2, AT);
+  b(done);
+
+  bind(runtime);
+  // save the live input values
+  push(store_addr);
+  push(new_val);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
+  pop(new_val);
+  pop(store_addr);
+
+  bind(done);
+}
+
+#endif // INCLUDE_ALL_GCS
+//////////////////////////////////////////////////////////////////////////////////
+
+
+void MacroAssembler::store_check(Register obj) {
+  // Does a store check for the oop in register obj. The content of
+  // register obj is destroyed afterwards.
+  store_check_part_1(obj);
+  store_check_part_2(obj);
+}
+
+void MacroAssembler::store_check(Register obj, Address dst) {
+  store_check(obj);
+}
+
+
+// split the store check operation so that other instructions can be scheduled inbetween
+void MacroAssembler::store_check_part_1(Register obj) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  srli_d(obj, obj, CardTableModRefBS::card_shift);
+}
+
+void MacroAssembler::store_check_part_2(Register obj) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+  li(AT, (long)ct->byte_map_base);
+  add_d(AT, AT, obj);
+  if (UseConcMarkSweepGC) membar(StoreStore);
+  st_b(R0, AT, 0);
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
+void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
+                                   Register t1, Register t2, Label& slow_case) {
+  assert_different_registers(obj, t2);
+  assert_different_registers(obj, var_size_in_bytes);
+
+  Register end = t2;
+  // verify_tlab();
+
+  ld_ptr(obj, Address(TREG, JavaThread::tlab_top_offset()));
+  if (var_size_in_bytes == noreg) {
+    lea(end, Address(obj, con_size_in_bytes));
+  } else {
+    lea(end, Address(obj, var_size_in_bytes, Address::times_1, 0));
+  }
+
+  ld_ptr(SCR1, Address(TREG, JavaThread::tlab_end_offset()));
+  blt_far(SCR1, end, slow_case, false);
+
+  // update the tlab top pointer
+  st_ptr(end, Address(TREG, JavaThread::tlab_top_offset()));
+
+  // recover var_size_in_bytes if necessary
+  if (var_size_in_bytes == end) {
+    sub_d(var_size_in_bytes, var_size_in_bytes, obj);
+  }
+  // verify_tlab();
+}
+
+// Defines obj, preserves var_size_in_bytes
+void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
+                                   Register t1, Label& slow_case) {
+  assert_different_registers(obj, var_size_in_bytes, t1, AT);
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    b_far(slow_case);
+  } else {
+    Register end = t1;
+    Register heap_end = SCR2;
+    Label retry;
+    bind(retry);
+
+    li(SCR1, (address)Universe::heap()->end_addr());
+    ld_d(heap_end, SCR1, 0);
+
+    // Get the current top of the heap
+    li(SCR1, (address) Universe::heap()->top_addr());
+    ll_d(obj, SCR1, 0);
+
+    // Adjust it my the size of our new object
+    if (var_size_in_bytes == noreg)
+      addi_d(end, obj, con_size_in_bytes);
+    else
+      add_d(end, obj, var_size_in_bytes);
+
+    // if end < obj then we wrapped around high memory
+    blt_far(end, obj, slow_case, false);
+    blt_far(heap_end, end, slow_case, false);
+
+    // If heap top hasn't been changed by some other thread, update it.
+    sc_d(end, SCR1, 0);
+    beqz(end, retry);
+
+    incr_allocated_bytes(TREG, var_size_in_bytes, con_size_in_bytes, t1);
+  }
+}
+
+void MacroAssembler::incr_allocated_bytes(Register thread,
+                                          Register var_size_in_bytes,
+                                          int con_size_in_bytes,
+                                          Register t1) {
+  if (!thread->is_valid()) {
+#ifndef OPT_THREAD
+    assert(t1->is_valid(), "need temp reg");
+    thread = t1;
+    get_thread(thread);
+#else
+    thread = TREG;
+#endif
+  }
+
+  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
+  if (var_size_in_bytes->is_valid()) {
+    add_d(AT, AT, var_size_in_bytes);
+  } else {
+    addi_d(AT, AT, con_size_in_bytes);
+  }
+  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
+}
+
+static const double     pi_4 =  0.7853981633974483;
+
+// must get argument(a double) in FA0/FA1
+//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
+//We need to preseve the register which maybe modified during the Call
+void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
+  // save all modified register here
+  // FIXME, in the disassembly of tirgfunc, only used V0, V1, T4, SP, RA, so we ony save V0, V1, T4
+  guarantee(0, "LA not implemented yet");
+#if 0
+  pushad();
+  // we should preserve the stack space before we call
+  addi_d(SP, SP, -wordSize * 2);
+  switch (trig){
+    case 's' :
+      call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
+      break;
+    case 'c':
+      call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
+      break;
+    case 't':
+      call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
+      break;
+    default:assert (false, "bad intrinsic");
+    break;
+
+  }
+
+  addi_d(SP, SP, wordSize * 2);
+  popad();
+#endif
+}
+
+void MacroAssembler::li(Register rd, jlong value) {
+  jlong hi12 = bitfield(value, 52, 12);
+  jlong lo52 = bitfield(value,  0, 52);
+
+  if (hi12 != 0 && lo52 == 0) {
+    lu52i_d(rd, R0, hi12);
+  } else {
+    jlong hi20 = bitfield(value, 32, 20);
+    jlong lo20 = bitfield(value, 12, 20);
+    jlong lo12 = bitfield(value,  0, 12);
+
+    if (lo20 == 0) {
+      ori(rd, R0, lo12);
+    } else if (bitfield(simm12(lo12), 12, 20) == lo20) {
+      addi_w(rd, R0, simm12(lo12));
+    } else {
+      lu12i_w(rd, lo20);
+      if (lo12 != 0)
+        ori(rd, rd, lo12);
+    }
+    if (hi20 != bitfield(simm20(lo20), 20, 20))
+      lu32i_d(rd, hi20);
+    if (hi12 != bitfield(simm20(hi20), 20, 12))
+      lu52i_d(rd, rd, hi12);
+  }
+}
+
+void MacroAssembler::patchable_li52(Register rd, jlong value) {
+  int count = 0;
+
+  if (value <= max_jint && value >= min_jint) {
+    if (is_simm(value, 12)) {
+      addi_d(rd, R0, value);
+      count++;
+    } else {
+      lu12i_w(rd, split_low20(value >> 12));
+      count++;
+      if (split_low12(value)) {
+        ori(rd, rd, split_low12(value));
+        count++;
+      }
+    }
+  } else if (is_simm(value, 52)) {
+    lu12i_w(rd, split_low20(value >> 12));
+    count++;
+    if (split_low12(value)) {
+      ori(rd, rd, split_low12(value));
+      count++;
+    }
+    lu32i_d(rd, split_low20(value >> 32));
+    count++;
+  } else {
+    tty->print_cr("value = 0x%lx", value);
+    guarantee(false, "Not supported yet !");
+  }
+
+  while (count < 3) {
+    nop();
+    count++;
+  }
+}
+
+void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+  assert(UseCompressedClassPointers, "should only be used for compressed header");
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+
+  int klass_index = oop_recorder()->find_index(k);
+  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+  long narrowKlass = (long)Klass::encode_klass(k);
+
+  relocate(rspec, Assembler::narrow_oop_operand);
+  patchable_li52(dst, narrowKlass);
+}
+
+void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+  assert(UseCompressedOops, "should only be used for compressed header");
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+  relocate(rspec, Assembler::narrow_oop_operand);
+  patchable_li52(dst, oop_index);
+}
+
+void MacroAssembler::lipc(Register rd, Label& L) {
+  if (L.is_bound()) {
+    jint offs = (target(L) - pc()) >> 2;
+    guarantee(is_simm(offs, 20), "Not signed 20-bit offset");
+    pcaddi(rd, offs);
+  } else {
+    InstructionMark im(this);
+    L.add_patch_at(code(), locator());
+    pcaddi(rd, 0);
+  }
+}
+
+void MacroAssembler::verify_oop(Register reg, const char* s) {
+  if (!VerifyOops) return;
+  const char * b = NULL;
+  stringStream ss;
+  ss.print("verify_oop: %s: %s", reg->name(), s);
+  b = code_string(ss.as_string());
+  pushad();
+  move(A1, reg);
+  patchable_li52(A0, (long)b);
+  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
+  ld_d(T4, AT, 0);
+  jalr(T4);
+  popad();
+}
+
+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+#if 0
+  if (!VerifyOops) {
+    nop();
+    return;
+  }
+  // Pass register number to verify_oop_subroutine
+  const char * b = NULL;
+  stringStream ss;
+  ss.print("verify_oop_addr: %s",  s);
+  b = code_string(ss.as_string());
+
+  st_ptr(T0, SP, - wordSize);
+  st_ptr(T1, SP, - 2*wordSize);
+  st_ptr(RA, SP, - 3*wordSize);
+  st_ptr(A0, SP, - 4*wordSize);
+  st_ptr(A1, SP, - 5*wordSize);
+  st_ptr(AT, SP, - 6*wordSize);
+  st_ptr(T9, SP, - 7*wordSize);
+  ld_ptr(A1, addr);   // addr may use SP, so load from it before change SP
+  addiu(SP, SP, - 7 * wordSize);
+
+  patchable_li52(A0, (long)b);
+  // call indirectly to solve generation ordering problem
+  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
+  ld_ptr(T9, AT, 0);
+  jalr(T9);
+  delayed()->nop();
+  ld_ptr(T0, SP, 6* wordSize);
+  ld_ptr(T1, SP, 5* wordSize);
+  ld_ptr(RA, SP, 4* wordSize);
+  ld_ptr(A0, SP, 3* wordSize);
+  ld_ptr(A1, SP, 2* wordSize);
+  ld_ptr(AT, SP, 1* wordSize);
+  ld_ptr(T9, SP, 0* wordSize);
+  addiu(SP, SP, 7 * wordSize);
+#endif
+}
+
+// used registers :  T0, T1
+void MacroAssembler::verify_oop_subroutine() {
+  // RA: ra
+  // A0: char* error message
+  // A1: oop   object to verify
+  Label exit, error;
+  // increment counter
+  li(T0, (long)StubRoutines::verify_oop_count_addr());
+  ld_w(AT, T0, 0);
+  addi_d(AT, AT, 1);
+  st_w(AT, T0, 0);
+
+  // make sure object is 'reasonable'
+  beq(A1, R0, exit);         // if obj is NULL it is ok
+
+  // Check if the oop is in the right area of memory
+  // const int oop_mask = Universe::verify_oop_mask();
+  // const int oop_bits = Universe::verify_oop_bits();
+  const uintptr_t oop_mask = Universe::verify_oop_mask();
+  const uintptr_t oop_bits = Universe::verify_oop_bits();
+  li(AT, oop_mask);
+  andr(T0, A1, AT);
+  li(AT, oop_bits);
+  bne(T0, AT, error);
+
+  // make sure klass is 'reasonable'
+  // add for compressedoops
+  reinit_heapbase();
+  // add for compressedoops
+  load_klass(T0, A1);
+  beq(T0, R0, error);                        // if klass is NULL it is broken
+  // return if everything seems ok
+  bind(exit);
+
+  jr(RA);
+
+  // handle errors
+  bind(error);
+  pushad();
+  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
+  popad();
+  jr(RA);
+}
+
+void MacroAssembler::verify_tlab(Register t1, Register t2) {
+#ifdef ASSERT
+  assert_different_registers(t1, t2, AT);
+  if (UseTLAB && VerifyOops) {
+    Label next, ok;
+
+    get_thread(t1);
+
+    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
+    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
+    bgeu(t2, AT, next);
+
+    stop("assert(top >= start)");
+
+    bind(next);
+    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
+    bgeu(AT, t2, ok);
+
+    stop("assert(top <= end)");
+
+    bind(ok);
+
+  }
+#endif
+}
+
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+                                                      Register tmp,
+                                                      int offset) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+  return RegisterOrConstant(tmp);
+}
+
+void MacroAssembler::hswap(Register reg) {
+  // TODO LA opt
+  //short
+  srli_w(AT, reg, 8);
+  slli_w(reg, reg, 24);
+  srai_w(reg, reg, 16);
+  orr(reg, reg, AT);
+}
+
+void MacroAssembler::huswap(Register reg) {
+  // TODO LA opt
+  srli_d(AT, reg, 8);
+  slli_d(reg, reg, 24);
+  srli_d(reg, reg, 16);
+  orr(reg, reg, AT);
+  bstrpick_d(reg, reg, 15, 0);
+}
+
+// something funny to do this will only one more register AT
+// 32 bits
+void MacroAssembler::swap(Register reg) {
+  //TODO: LA opt
+  srli_w(AT, reg, 8);
+  slli_w(reg, reg, 24);
+  orr(reg, reg, AT);
+  //reg : 4 1 2 3
+  srli_w(AT, AT, 16);
+  xorr(AT, AT, reg);
+  andi(AT, AT, 0xff);
+  //AT : 0 0 0 1^3);
+  xorr(reg, reg, AT);
+  //reg : 4 1 2 1
+  slli_w(AT, AT, 16);
+  xorr(reg, reg, AT);
+  //reg : 4 3 2 1
+}
+
+void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
+                             Register resflag, bool retold, bool barrier) {
+  assert(oldval != resflag, "oldval != resflag");
+  assert(newval != resflag, "newval != resflag");
+  Label again, succ, fail;
+
+  bind(again);
+  ll_d(resflag, addr);
+  bne(resflag, oldval, fail);
+  move(resflag, newval);
+  sc_d(resflag, addr);
+  beqz(resflag, again);
+  b(succ);
+
+  bind(fail);
+  if (barrier)
+    dbar(0x700);
+  if (retold && oldval != R0)
+    move(oldval, resflag);
+  move(resflag, R0);
+  bind(succ);
+}
+
+void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
+                             Register tmp, bool retold, bool barrier, Label& succ, Label* fail) {
+  assert(oldval != tmp, "oldval != tmp");
+  assert(newval != tmp, "newval != tmp");
+  Label again, neq;
+
+  bind(again);
+  ll_d(tmp, addr);
+  bne(tmp, oldval, neq);
+  move(tmp, newval);
+  sc_d(tmp, addr);
+  beqz(tmp, again);
+  b(succ);
+
+  bind(neq);
+  if (barrier)
+    dbar(0x700);
+  if (retold && oldval != R0)
+    move(oldval, tmp);
+  if (fail)
+    b(*fail);
+}
+
+void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval,
+                               Register resflag, bool sign, bool retold, bool barrier) {
+  assert(oldval != resflag, "oldval != resflag");
+  assert(newval != resflag, "newval != resflag");
+  Label again, succ, fail;
+
+  bind(again);
+  ll_w(resflag, addr);
+  if (!sign)
+    lu32i_d(resflag, 0);
+  bne(resflag, oldval, fail);
+  move(resflag, newval);
+  sc_w(resflag, addr);
+  beqz(resflag, again);
+  b(succ);
+
+  bind(fail);
+  if (barrier)
+    dbar(0x700);
+  if (retold && oldval != R0)
+    move(oldval, resflag);
+  move(resflag, R0);
+  bind(succ);
+}
+
+void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
+                               bool sign, bool retold, bool barrier, Label& succ, Label* fail) {
+  assert(oldval != tmp, "oldval != tmp");
+  assert(newval != tmp, "newval != tmp");
+  Label again, neq;
+
+  bind(again);
+  ll_w(tmp, addr);
+  if (!sign)
+    lu32i_d(tmp, 0);
+  bne(tmp, oldval, neq);
+  move(tmp, newval);
+  sc_w(tmp, addr);
+  beqz(tmp, again);
+  b(succ);
+
+  bind(neq);
+  if (barrier)
+    dbar(0x700);
+  if (retold && oldval != R0)
+    move(oldval, tmp);
+  if (fail)
+    b(*fail);
+}
+
+// be sure the three register is different
+void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+}
+
+// be sure the three register is different
+void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+}
+
+// Fast_Lock and Fast_Unlock used by C2
+
+// Because the transitions from emitted code to the runtime
+// monitorenter/exit helper stubs are so slow it's critical that
+// we inline both the stack-locking fast-path and the inflated fast path.
+//
+// See also: cmpFastLock and cmpFastUnlock.
+//
+// What follows is a specialized inline transliteration of the code
+// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
+// another option would be to emit TrySlowEnter and TrySlowExit methods
+// at startup-time.  These methods would accept arguments as
+// (Obj, Self, box, Scratch) and return success-failure
+// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
+// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
+// In practice, however, the # of lock sites is bounded and is usually small.
+// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
+// if the processor uses simple bimodal branch predictors keyed by EIP
+// Since the helper routines would be called from multiple synchronization
+// sites.
+//
+// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
+// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
+// to those specialized methods.  That'd give us a mostly platform-independent
+// implementation that the JITs could optimize and inline at their pleasure.
+// Done correctly, the only time we'd need to cross to native could would be
+// to park() or unpark() threads.  We'd also need a few more unsafe operators
+// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
+// (b) explicit barriers or fence operations.
+//
+// TODO:
+//
+// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
+//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
+//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
+//    the lock operators would typically be faster than reifying Self.
+//
+// *  Ideally I'd define the primitives as:
+//       fast_lock   (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED.
+//       fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED
+//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
+//    Instead, we're stuck with a rather awkward and brittle register assignments below.
+//    Furthermore the register assignments are overconstrained, possibly resulting in
+//    sub-optimal code near the synchronization site.
+//
+// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
+//    Alternately, use a better sp-proximity test.
+//
+// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
+//    Either one is sufficient to uniquely identify a thread.
+//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
+//
+// *  Intrinsify notify() and notifyAll() for the common cases where the
+//    object is locked by the calling thread but the waitlist is empty.
+//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
+//
+// *  use jccb and jmpb instead of jcc and jmp to improve code density.
+//    But beware of excessive branch density on AMD Opterons.
+//
+// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
+//    or failure of the fast-path.  If the fast-path fails then we pass
+//    control to the slow-path, typically in C.  In Fast_Lock and
+//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
+//    will emit a conditional branch immediately after the node.
+//    So we have branches to branches and lots of ICC.ZF games.
+//    Instead, it might be better to have C2 pass a "FailureLabel"
+//    into Fast_Lock and Fast_Unlock.  In the case of success, control
+//    will drop through the node.  ICC.ZF is undefined at exit.
+//    In the case of failure, the node will branch directly to the
+//    FailureLabel
+
+// obj: object to lock
+// box: on-stack box address (displaced header location)
+// tmp: tmp -- KILLED
+// scr: tmp -- KILLED
+void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg,
+                               Register tmpReg, Register scrReg) {
+  Label IsInflated, DONE, DONE_SET;
+
+  // Ensure the register assignents are disjoint
+  guarantee(objReg != boxReg, "");
+  guarantee(objReg != tmpReg, "");
+  guarantee(objReg != scrReg, "");
+  guarantee(boxReg != tmpReg, "");
+  guarantee(boxReg != scrReg, "");
+
+  block_comment("FastLock");
+
+  if (PrintBiasedLockingStatistics) {
+    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg);
+  }
+
+  if (EmitSync & 1) {
+    move(AT, R0);
+    return;
+  } else
+    if (EmitSync & 2) {
+      Label DONE_LABEL ;
+      if (UseBiasedLocking) {
+        // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
+        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
+      }
+
+      ld_d(tmpReg, Address(objReg, 0)) ;          // fetch markword
+      ori(tmpReg, tmpReg, 0x1);
+      st_d(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
+
+      cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg
+
+      // Recursive locking
+      sub_d(tmpReg, tmpReg, SP);
+      li(AT, (7 - os::vm_page_size() ));
+      andr(tmpReg, tmpReg, AT);
+      st_d(tmpReg, Address(boxReg, 0));
+      bind(DONE_LABEL) ;
+    } else {
+      // Possible cases that we'll encounter in fast_lock
+      // ------------------------------------------------
+      // * Inflated
+      //    -- unlocked
+      //    -- Locked
+      //       = by self
+      //       = by other
+      // * biased
+      //    -- by Self
+      //    -- by other
+      // * neutral
+      // * stack-locked
+      //    -- by self
+      //       = sp-proximity test hits
+      //       = sp-proximity test generates false-negative
+      //    -- by other
+      //
+
+      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
+      // order to reduce the number of conditional branches in the most common cases.
+      // Beware -- there's a subtle invariant that fetch of the markword
+      // at [FETCH], below, will never observe a biased encoding (*101b).
+      // If this invariant is not held we risk exclusion (safety) failure.
+      if (UseBiasedLocking && !UseOptoBiasInlining) {
+        Label succ, fail;
+        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL);
+        b(fail);
+        bind(succ);
+        li(resReg, 1);
+        b(DONE);
+        bind(fail);
+      }
+
+      ld_d(tmpReg, Address(objReg, 0)); //Fetch the markword of the object.
+      andi(AT, tmpReg, markOopDesc::monitor_value);
+      bnez(AT, IsInflated); // inflated vs stack-locked|neutral|bias
+
+      // Attempt stack-locking ...
+      ori(tmpReg, tmpReg, markOopDesc::unlocked_value);
+      st_d(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
+
+      if (PrintBiasedLockingStatistics) {
+        Label SUCC, FAIL;
+        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg
+        bind(SUCC);
+        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
+        li(resReg, 1);
+        b(DONE);
+        bind(FAIL);
+      } else {
+        // If cmpxchg is succ, then scrReg = 1
+        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg
+      }
+
+      // Recursive locking
+      // The object is stack-locked: markword contains stack pointer to BasicLock.
+      // Locked by current thread if difference with current SP is less than one page.
+      sub_d(tmpReg, tmpReg, SP);
+      li(AT, 7 - os::vm_page_size());
+      andr(tmpReg, tmpReg, AT);
+      st_d(tmpReg, Address(boxReg, 0));
+
+      if (PrintBiasedLockingStatistics) {
+        Label L;
+        // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
+        bnez(tmpReg, L);
+        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
+        bind(L);
+      }
+
+      sltui(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0
+      b(DONE);
+
+      bind(IsInflated);
+      // The object's monitor m is unlocked iff m->owner == NULL,
+      // otherwise m->owner may contain a thread or a stack address.
+
+      // TODO: someday avoid the ST-before-CAS penalty by
+      // relocating (deferring) the following ST.
+      // We should also think about trying a CAS without having
+      // fetched _owner.  If the CAS is successful we may
+      // avoid an RTO->RTS upgrade on the $line.
+      // Without cast to int32_t a movptr will destroy r10 which is typically obj
+      li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
+      st_d(AT, Address(boxReg, 0));
+
+      ld_d(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
+      // if (m->owner != 0) => AT = 0, goto slow path.
+      move(scrReg, R0);
+      bnez(AT, DONE_SET);
+
+#ifndef OPT_THREAD
+      get_thread(TREG) ;
+#endif
+      // It's inflated and appears unlocked
+      addi_d(tmpReg, tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2);
+      cmpxchg(Address(tmpReg, 0), R0, TREG, scrReg, false, false);
+      // Intentional fall-through into DONE ...
+
+      bind(DONE_SET);
+      move(resReg, scrReg);
+
+      // DONE is a hot target - we'd really like to place it at the
+      // start of cache line by padding with NOPs.
+      // See the AMD and Intel software optimization manuals for the
+      // most efficient "long" NOP encodings.
+      // Unfortunately none of our alignment mechanisms suffice.
+      bind(DONE);
+      // At DONE the resReg is set as follows ...
+      // Fast_Unlock uses the same protocol.
+      // resReg == 1 -> Success
+      // resREg == 0 -> Failure - force control through the slow-path
+
+      // Avoid branch-to-branch on AMD processors
+      // This appears to be superstition.
+      if (EmitSync & 32) nop() ;
+
+    }
+}
+
+// obj: object to unlock
+// box: box address (displaced header location), killed.
+// tmp: killed tmp; cannot be obj nor box.
+//
+// Some commentary on balanced locking:
+//
+// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
+// Methods that don't have provably balanced locking are forced to run in the
+// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
+// The interpreter provides two properties:
+// I1:  At return-time the interpreter automatically and quietly unlocks any
+//      objects acquired the current activation (frame).  Recall that the
+//      interpreter maintains an on-stack list of locks currently held by
+//      a frame.
+// I2:  If a method attempts to unlock an object that is not held by the
+//      the frame the interpreter throws IMSX.
+//
+// Lets say A(), which has provably balanced locking, acquires O and then calls B().
+// B() doesn't have provably balanced locking so it runs in the interpreter.
+// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
+// is still locked by A().
+//
+// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
+// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
+// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
+// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
+
+void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg,
+                                 Register tmpReg, Register scrReg) {
+  Label DONE, DONE_SET, Stacked, Inflated;
+
+  guarantee(objReg != boxReg, "");
+  guarantee(objReg != tmpReg, "");
+  guarantee(objReg != scrReg, "");
+  guarantee(boxReg != tmpReg, "");
+  guarantee(boxReg != scrReg, "");
+
+  block_comment("FastUnlock");
+
+  if (EmitSync & 4) {
+    // Disable - inhibit all inlining.  Force control through the slow-path
+    move(AT, R0);
+    return;
+  } else
+    if (EmitSync & 8) {
+      Label DONE_LABEL ;
+      if (UseBiasedLocking) {
+        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+      }
+      // classic stack-locking code ...
+      ld_d(tmpReg, Address(boxReg, 0)) ;
+      assert_different_registers(AT, tmpReg);
+      li(AT, 0x1);
+      beq(tmpReg, R0, DONE_LABEL) ;
+
+      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
+      bind(DONE_LABEL);
+    } else {
+      Label CheckSucc;
+
+      // Critically, the biased locking test must have precedence over
+      // and appear before the (box->dhw == 0) recursive stack-lock test.
+      if (UseBiasedLocking && !UseOptoBiasInlining) {
+        Label succ, fail;
+        biased_locking_exit(objReg, tmpReg, succ);
+        b(fail);
+        bind(succ);
+        li(resReg, 1);
+        b(DONE);
+        bind(fail);
+      }
+
+      ld_d(tmpReg, Address(boxReg, 0)); // Examine the displaced header
+      sltui(AT, tmpReg, 1);
+      beqz(tmpReg, DONE_SET); // 0 indicates recursive stack-lock
+
+      ld_d(tmpReg, Address(objReg, 0)); // Examine the object's markword
+      andi(AT, tmpReg, markOopDesc::monitor_value);
+      beqz(AT, Stacked); // Inflated?
+
+      bind(Inflated);
+      // It's inflated.
+      // Despite our balanced locking property we still check that m->_owner == Self
+      // as java routines or native JNI code called by this thread might
+      // have released the lock.
+      // Refer to the comments in synchronizer.cpp for how we might encode extra
+      // state in _succ so we can avoid fetching EntryList|cxq.
+      //
+      // I'd like to add more cases in fast_lock() and fast_unlock() --
+      // such as recursive enter and exit -- but we have to be wary of
+      // I$ bloat, T$ effects and BP$ effects.
+      //
+      // If there's no contention try a 1-0 exit.  That is, exit without
+      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
+      // we detect and recover from the race that the 1-0 exit admits.
+      //
+      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
+      // before it STs null into _owner, releasing the lock.  Updates
+      // to data protected by the critical section must be visible before
+      // we drop the lock (and thus before any other thread could acquire
+      // the lock and observe the fields protected by the lock).
+#ifndef OPT_THREAD
+      get_thread(TREG);
+#endif
+
+      // It's inflated
+      ld_d(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
+      xorr(scrReg, scrReg, TREG);
+
+      ld_d(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2));
+      orr(scrReg, scrReg, AT);
+
+      move(AT, R0);
+      bnez(scrReg, DONE_SET);
+
+      ld_d(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2));
+      ld_d(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2));
+      orr(scrReg, scrReg, AT);
+
+      move(AT, R0);
+      bnez(scrReg, DONE_SET);
+
+      membar(Assembler::Membar_mask_bits(LoadStore|StoreStore)); // release-store
+      st_d(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
+      li(resReg, 1);
+      b(DONE);
+
+      bind(Stacked);
+      ld_d(tmpReg, Address(boxReg, 0));
+      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
+
+      bind(DONE_SET);
+      move(resReg, AT);
+
+      if (EmitSync & 65536) {
+        bind (CheckSucc);
+      }
+
+      bind(DONE);
+
+      // Avoid branch to branch on AMD processors
+      if (EmitSync & 32768) { nop() ; }
+    }
+}
+
+void MacroAssembler::align(int modulus) {
+  while (offset() % modulus != 0) nop();
+}
+
+
+void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
+  //Unimplemented();
+}
+
+Register caller_saved_registers[]           = {T7, T5, T6, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP};
+Register caller_saved_registers_except_v0[] = {T7, T5, T6,     A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T4, S8, RA, FP};
+
+  //TODO: LA
+//In LA, F0~23 are all caller-saved registers
+FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
+
+// We preserve all caller-saved register
+void  MacroAssembler::pushad(){
+  int i;
+  // Fixed-point registers
+  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
+  addi_d(SP, SP, -1 * len * wordSize);
+  for (i = 0; i < len; i++) {
+    st_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
+  }
+
+  // Floating-point registers
+  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
+  addi_d(SP, SP, -1 * len * wordSize);
+  for (i = 0; i < len; i++) {
+    fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
+  }
+};
+
+void  MacroAssembler::popad(){
+  int i;
+  // Floating-point registers
+  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
+  for (i = 0; i < len; i++)
+  {
+    fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
+  }
+  addi_d(SP, SP, len * wordSize);
+
+  // Fixed-point registers
+  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
+  for (i = 0; i < len; i++)
+  {
+    ld_d(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
+  }
+  addi_d(SP, SP, len * wordSize);
+};
+
+// We preserve all caller-saved register except V0
+void MacroAssembler::pushad_except_v0() {
+  int i;
+  // Fixed-point registers
+  int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
+  addi_d(SP, SP, -1 * len * wordSize);
+  for (i = 0; i < len; i++) {
+    st_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
+  }
+
+  // Floating-point registers
+  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
+  addi_d(SP, SP, -1 * len * wordSize);
+  for (i = 0; i < len; i++) {
+    fst_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
+  }
+}
+
+void MacroAssembler::popad_except_v0() {
+  int i;
+  // Floating-point registers
+  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
+  for (i = 0; i < len; i++) {
+    fld_d(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
+  }
+  addi_d(SP, SP, len * wordSize);
+
+  // Fixed-point registers
+  len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
+  for (i = 0; i < len; i++) {
+    ld_d(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
+  }
+  addi_d(SP, SP, len * wordSize);
+}
+
+void MacroAssembler::push2(Register reg1, Register reg2) {
+  addi_d(SP, SP, -16);
+  st_d(reg1, SP, 8);
+  st_d(reg2, SP, 0);
+}
+
+void MacroAssembler::pop2(Register reg1, Register reg2) {
+  ld_d(reg1, SP, 8);
+  ld_d(reg2, SP, 0);
+  addi_d(SP, SP, 16);
+}
+
+// for UseCompressedOops Option
+void MacroAssembler::load_klass(Register dst, Register src) {
+  if(UseCompressedClassPointers){
+    ld_wu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    decode_klass_not_null(dst);
+  } else {
+    ld_d(dst, src, oopDesc::klass_offset_in_bytes());
+  }
+}
+
+void MacroAssembler::store_klass(Register dst, Register src) {
+  if(UseCompressedClassPointers){
+    encode_klass_not_null(src);
+    st_w(src, dst, oopDesc::klass_offset_in_bytes());
+  } else {
+    st_d(src, dst, oopDesc::klass_offset_in_bytes());
+  }
+}
+
+void MacroAssembler::load_prototype_header(Register dst, Register src) {
+  load_klass(dst, src);
+  ld_d(dst, Address(dst, Klass::prototype_header_offset()));
+}
+
+void MacroAssembler::store_klass_gap(Register dst, Register src) {
+  if (UseCompressedClassPointers) {
+    st_w(src, dst, oopDesc::klass_gap_offset_in_bytes());
+  }
+}
+
+void MacroAssembler::load_heap_oop(Register dst, Address src) {
+  if(UseCompressedOops){
+    ld_wu(dst, src);
+    decode_heap_oop(dst);
+  } else {
+    ld_d(dst, src);
+  }
+}
+
+void MacroAssembler::store_heap_oop(Address dst, Register src){
+  if(UseCompressedOops){
+    assert(!dst.uses(src), "not enough registers");
+    encode_heap_oop(src);
+    st_w(src, dst);
+  } else {
+    st_d(src, dst);
+  }
+}
+
+void MacroAssembler::store_heap_oop_null(Address dst){
+  if(UseCompressedOops){
+    st_w(R0, dst);
+  } else {
+    st_d(R0, dst);
+  }
+}
+
+#ifdef ASSERT
+void MacroAssembler::verify_heapbase(const char* msg) {
+  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+}
+#endif
+
+// Algorithm must match oop.inline.hpp encode_heap_oop.
+void MacroAssembler::encode_heap_oop(Register r) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
+#endif
+  verify_oop(r, "broken oop in encode_heap_oop");
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shr(r, LogMinObjAlignmentInBytes);
+    }
+    return;
+  }
+
+  sub_d(AT, r, S5_heapbase);
+  maskeqz(r, AT, r);
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shr(r, LogMinObjAlignmentInBytes);
+  }
+}
+
+void MacroAssembler::encode_heap_oop(Register dst, Register src) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
+#endif
+  verify_oop(src, "broken oop in encode_heap_oop");
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      srli_d(dst, src, LogMinObjAlignmentInBytes);
+    } else {
+      if (dst != src) {
+        move(dst, src);
+      }
+    }
+    return;
+  }
+
+  sub_d(AT, src, S5_heapbase);
+  maskeqz(dst, AT, src);
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shr(dst, LogMinObjAlignmentInBytes);
+  }
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register r) {
+  assert (UseCompressedOops, "should be compressed");
+#ifdef ASSERT
+  if (CheckCompressedOops) {
+    Label ok;
+    bne(r, R0, ok);
+    stop("null oop passed to encode_heap_oop_not_null");
+    bind(ok);
+  }
+#endif
+  verify_oop(r, "broken oop in encode_heap_oop_not_null");
+  if (Universe::narrow_oop_base() != NULL) {
+    sub_d(r, r, S5_heapbase);
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shr(r, LogMinObjAlignmentInBytes);
+  }
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
+  assert (UseCompressedOops, "should be compressed");
+#ifdef ASSERT
+  if (CheckCompressedOops) {
+    Label ok;
+    bne(src, R0, ok);
+    stop("null oop passed to encode_heap_oop_not_null2");
+    bind(ok);
+  }
+#endif
+  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      srli_d(dst, src, LogMinObjAlignmentInBytes);
+    } else {
+      if (dst != src) {
+        move(dst, src);
+      }
+    }
+    return;
+  }
+  sub_d(dst, src, S5_heapbase);
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shr(dst, LogMinObjAlignmentInBytes);
+  }
+}
+
+void MacroAssembler::decode_heap_oop(Register r) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
+#endif
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shl(r, LogMinObjAlignmentInBytes);
+    }
+    return;
+  }
+
+  move(AT, r);
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    if (LogMinObjAlignmentInBytes <= 4) {
+      alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1);
+    } else {
+      shl(r, LogMinObjAlignmentInBytes);
+      add_d(r, r, S5_heapbase);
+    }
+  } else {
+    add_d(r, r, S5_heapbase);
+  }
+  maskeqz(r, r, AT);
+  verify_oop(r, "broken oop in decode_heap_oop");
+}
+
+void MacroAssembler::decode_heap_oop(Register dst, Register src) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
+#endif
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      slli_d(dst, src, LogMinObjAlignmentInBytes);
+    } else {
+      if (dst != src) {
+        move(dst, src);
+      }
+    }
+    return;
+  }
+
+  Register cond;
+  if (dst == src) {
+    cond = AT;
+    move(cond, src);
+  } else {
+    cond = src;
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    if (LogMinObjAlignmentInBytes <= 4) {
+      alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1);
+    } else {
+      slli_d(dst, src, LogMinObjAlignmentInBytes);
+      add_d(dst, dst, S5_heapbase);
+    }
+  } else {
+    add_d(dst, src, S5_heapbase);
+  }
+  maskeqz(dst, dst, cond);
+  verify_oop(dst, "broken oop in decode_heap_oop");
+}
+
+void MacroAssembler::decode_heap_oop_not_null(Register r) {
+  // Note: it will change flags
+  assert(UseCompressedOops, "should only be used for compressed headers");
+  assert(Universe::heap() != NULL, "java heap should be initialized");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    if (Universe::narrow_oop_base() != NULL) {
+      if (LogMinObjAlignmentInBytes <= 4) {
+        alsl_d(r, r, S5_heapbase, LogMinObjAlignmentInBytes - 1);
+      } else {
+        shl(r, LogMinObjAlignmentInBytes);
+        add_d(r, r, S5_heapbase);
+      }
+    } else {
+      shl(r, LogMinObjAlignmentInBytes);
+    }
+  } else {
+    assert(Universe::narrow_oop_base() == NULL, "sanity");
+  }
+}
+
+void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+  assert(UseCompressedOops, "should only be used for compressed headers");
+  assert(Universe::heap() != NULL, "java heap should be initialized");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    if (Universe::narrow_oop_base() != NULL) {
+      if (LogMinObjAlignmentInBytes <= 4) {
+        alsl_d(dst, src, S5_heapbase, LogMinObjAlignmentInBytes - 1);
+      } else {
+        slli_d(dst, src, LogMinObjAlignmentInBytes);
+        add_d(dst, dst, S5_heapbase);
+      }
+    } else {
+      slli_d(dst, src, LogMinObjAlignmentInBytes);
+    }
+  } else {
+    assert (Universe::narrow_oop_base() == NULL, "sanity");
+    if (dst != src) {
+      move(dst, src);
+    }
+  }
+}
+
+void MacroAssembler::encode_klass_not_null(Register r) {
+  if (Universe::narrow_klass_base() != NULL) {
+    assert(r != AT, "Encoding a klass in AT");
+    li(AT, (int64_t)Universe::narrow_klass_base());
+    sub_d(r, r, AT);
+  }
+  if (Universe::narrow_klass_shift() != 0) {
+    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    shr(r, LogKlassAlignmentInBytes);
+  }
+}
+
+void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+  if (dst == src) {
+    encode_klass_not_null(src);
+  } else {
+    if (Universe::narrow_klass_base() != NULL) {
+      li(dst, (int64_t)Universe::narrow_klass_base());
+      sub_d(dst, src, dst);
+      if (Universe::narrow_klass_shift() != 0) {
+        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+        shr(dst, LogKlassAlignmentInBytes);
+      }
+    } else {
+      if (Universe::narrow_klass_shift() != 0) {
+        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+        srli_d(dst, src, LogKlassAlignmentInBytes);
+      } else {
+        move(dst, src);
+      }
+    }
+  }
+}
+
+// Function instr_size_for_decode_klass_not_null() counts the instructions
+// generated by decode_klass_not_null(register r) and reinit_heapbase(),
+// when (Universe::heap() != NULL).  Hence, if the instructions they
+// generate change, then this method needs to be updated.
+int MacroAssembler::instr_size_for_decode_klass_not_null() {
+  assert (UseCompressedClassPointers, "only for compressed klass ptrs");
+  if (Universe::narrow_klass_base() != NULL) {
+    // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
+    return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
+  } else {
+    // longest load decode klass function, mov64, leaq
+    return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
+  }
+}
+
+void MacroAssembler::decode_klass_not_null(Register r) {
+  assert(UseCompressedClassPointers, "should only be used for compressed headers");
+  assert(r != AT, "Decoding a klass in AT");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_klass_shift() != 0) {
+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    shl(r, LogKlassAlignmentInBytes);
+  }
+  if (Universe::narrow_klass_base() != NULL) {
+    li(AT, (int64_t)Universe::narrow_klass_base());
+    add_d(r, r, AT);
+  }
+}
+
+void MacroAssembler::decode_klass_not_null(Register dst, Register src) {
+  assert(UseCompressedClassPointers, "should only be used for compressed headers");
+  if (dst == src) {
+    decode_klass_not_null(dst);
+  } else {
+    // Cannot assert, unverified entry point counts instructions (see .ad file)
+    // vtableStubs also counts instructions in pd_code_size_limit.
+    // Also do not verify_oop as this is called by verify_oop.
+    li(dst, (int64_t)Universe::narrow_klass_base());
+    if (Universe::narrow_klass_shift() != 0) {
+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
+      alsl_d(dst, src, dst, Address::times_8 - 1);
+    } else {
+      add_d(dst, src, dst);
+    }
+  }
+}
+
+void MacroAssembler::reinit_heapbase() {
+  if (UseCompressedOops || UseCompressedClassPointers) {
+    if (Universe::heap() != NULL) {
+      if (Universe::narrow_oop_base() == NULL) {
+        move(S5_heapbase, R0);
+      } else {
+        li(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
+      }
+    } else {
+      li(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
+      ld_d(S5_heapbase, S5_heapbase, 0);
+    }
+  }
+}
+
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success) {
+//implement ind   gen_subtype_check
+  Label L_failure;
+  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
+  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
+  bind(L_failure);
+}
+
+SkipIfEqual::SkipIfEqual(
+    MacroAssembler* masm, const bool* flag_addr, bool value) {
+  _masm = masm;
+  _masm->li(AT, (address)flag_addr);
+  _masm->ld_b(AT, AT, 0);
+  _masm->addi_d(AT, AT, -value);
+  _masm->beq(AT, R0, _label);
+}
+
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                        RegisterOrConstant super_check_offset) {
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+  if (super_check_offset.is_register()) {
+    assert_different_registers(sub_klass, super_klass,
+                               super_check_offset.as_register());
+  } else if (must_load_sco) {
+    assert(temp_reg != noreg, "supply either a temp or a register offset");
+  }
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface.  Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front of the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  beq(sub_klass, super_klass, *L_success);
+  // Check the supertype display:
+  if (must_load_sco) {
+    ld_wu(temp_reg, super_klass, sco_offset);
+    super_check_offset = RegisterOrConstant(temp_reg);
+  }
+  add_d(AT, sub_klass, super_check_offset.register_or_noreg());
+  ld_d(AT, AT, super_check_offset.constant_or_zero());
+
+  // This check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_cache and the primary super display elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  if (super_check_offset.is_register()) {
+    beq(super_klass, AT, *L_success);
+    addi_d(AT, super_check_offset.as_register(), -sc_offset);
+    if (L_failure == &L_fallthrough) {
+      beq(AT, R0, *L_slow_path);
+    } else {
+      bne_far(AT, R0, *L_failure);
+      b(*L_slow_path);
+    }
+  } else if (super_check_offset.as_constant() == sc_offset) {
+    // Need a slow path; fast failure is impossible.
+    if (L_slow_path == &L_fallthrough) {
+      beq(super_klass, AT, *L_success);
+    } else {
+      bne(super_klass, AT, *L_slow_path);
+      b(*L_success);
+    }
+  } else {
+    // No slow path; it's a fast decision.
+    if (L_failure == &L_fallthrough) {
+      beq(super_klass, AT, *L_success);
+    } else {
+      bne_far(super_klass, AT, *L_failure);
+      b(*L_success);
+    }
+  }
+
+  bind(L_fallthrough);
+}
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Register temp2_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   bool set_cond_codes) {
+  if (temp2_reg == noreg)
+    temp2_reg = TSR;
+  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
+#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  // a couple of useful fields in sub_klass:
+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  Address secondary_supers_addr(sub_klass, ss_offset);
+  Address super_cache_addr(     sub_klass, sc_offset);
+
+  // Do a linear scan of the secondary super-klass chain.
+  // This code is rarely used, so simplicity is a virtue here.
+  // The repne_scan instruction uses fixed registers, which we must spill.
+  // Don't worry too much about pre-existing connections with the input regs.
+
+#ifndef PRODUCT
+  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
+  ExternalAddress pst_counter_addr((address) pst_counter);
+#endif //PRODUCT
+
+  // We will consult the secondary-super array.
+  ld_d(temp_reg, secondary_supers_addr);
+  // Load the array length.
+  ld_w(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
+  // Skip to start of data.
+  addi_d(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
+
+  Label Loop, subtype;
+  bind(Loop);
+  beq(temp2_reg, R0, *L_failure);
+  ld_d(AT, temp_reg, 0);
+  addi_d(temp_reg, temp_reg, 1 * wordSize);
+  beq(AT, super_klass, subtype);
+  addi_d(temp2_reg, temp2_reg, -1);
+  b(Loop);
+
+  bind(subtype);
+  st_d(super_klass, super_cache_addr);
+  if (L_success != &L_fallthrough) {
+    b(*L_success);
+  }
+
+  // Success.  Cache the super we found and proceed in triumph.
+#undef IS_A_TEMP
+
+  bind(L_fallthrough);
+}
+
+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
+  ld_d(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
+  st_d(R0, Address(java_thread, JavaThread::vm_result_offset()));
+  verify_oop(oop_result, "broken oop in call_VM_base");
+}
+
+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
+  ld_d(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
+  st_d(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
+}
+
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+                                         int extra_slot_offset) {
+  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
+  int stackElementSize = Interpreter::stackElementSize;
+  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
+#ifdef ASSERT
+  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
+  assert(offset1 - offset == stackElementSize, "correct arithmetic");
+#endif
+  Register             scale_reg    = NOREG;
+  Address::ScaleFactor scale_factor = Address::no_scale;
+  if (arg_slot.is_constant()) {
+    offset += arg_slot.as_constant() * stackElementSize;
+  } else {
+    scale_reg    = arg_slot.as_register();
+    scale_factor = Address::times_8;
+  }
+  // We don't push RA on stack in prepare_invoke.
+  //  offset += wordSize;           // return PC is on stack
+  if(scale_reg==NOREG) return Address(SP, offset);
+  else {
+  alsl_d(scale_reg, scale_reg, SP, scale_factor - 1);
+  return Address(scale_reg, offset);
+  }
+}
+
+SkipIfEqual::~SkipIfEqual() {
+  _masm->bind(_label);
+}
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
+  switch (size_in_bytes) {
+  case  8:  ld_d(dst, src); break;
+  case  4:  ld_w(dst, src); break;
+  case  2:  is_signed ? ld_h(dst, src) : ld_hu(dst, src); break;
+  case  1:  is_signed ? ld_b( dst, src) : ld_bu( dst, src); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
+  switch (size_in_bytes) {
+  case  8:  st_d(src, dst); break;
+  case  4:  st_w(src, dst); break;
+  case  2:  st_h(src, dst); break;
+  case  1:  st_b(src, dst); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+                                             Register intf_klass,
+                                             RegisterOrConstant itable_index,
+                                             Register method_result,
+                                             Register scan_temp,
+                                             Label& L_no_such_interface,
+                                             bool return_method) {
+  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
+  assert_different_registers(method_result, intf_klass, scan_temp, AT);
+  assert(recv_klass != method_result || !return_method,
+         "recv_klass can be destroyed when method isn't needed");
+
+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+         "caller must use same register for non-constant itable index as for method");
+
+  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+  int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
+  int itentry_off = itableMethodEntry::method_offset_in_bytes();
+  int scan_step   = itableOffsetEntry::size() * wordSize;
+  int vte_size    = vtableEntry::size() * wordSize;
+  Address::ScaleFactor times_vte_scale = Address::times_ptr;
+  assert(vte_size == wordSize, "else adjust times_vte_scale");
+
+  ld_w(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
+
+  // %%% Could store the aligned, prescaled offset in the klassoop.
+  alsl_d(scan_temp, scan_temp, recv_klass, times_vte_scale - 1);
+  addi_d(scan_temp, scan_temp, vtable_base);
+  if (HeapWordsPerLong > 1) {
+    // Round up to align_object_offset boundary
+    // see code for InstanceKlass::start_of_itable!
+    round_to(scan_temp, BytesPerLong);
+  }
+
+  if (return_method) {
+    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+    if (itable_index.is_constant()) {
+      li(AT, (int)itable_index.is_constant());
+      alsl_d(AT, AT, recv_klass, (int)Address::times_ptr - 1);
+    } else {
+      alsl_d(AT, itable_index.as_register(), recv_klass, (int)Address::times_ptr - 1);
+    }
+    addi_d(recv_klass, AT, itentry_off);
+  }
+
+  Label search, found_method;
+
+  for (int peel = 1; peel >= 0; peel--) {
+    ld_d(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
+
+    if (peel) {
+      beq(intf_klass, method_result, found_method);
+    } else {
+      bne(intf_klass, method_result, search);
+      // (invert the test to fall through to found_method...)
+    }
+
+    if (!peel)  break;
+
+    bind(search);
+
+    // Check that the previous entry is non-null.  A null entry means that
+    // the receiver class doesn't implement the interface, and wasn't the
+    // same as when the caller was compiled.
+    beq(method_result, R0, L_no_such_interface);
+    addi_d(scan_temp, scan_temp, scan_step);
+  }
+
+  bind(found_method);
+
+  if (return_method) {
+    // Got a hit.
+    ld_w(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
+    ldx_d(method_result, recv_klass, scan_temp);
+  }
+}
+
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register method_result) {
+  Register tmp = S8;
+  push(tmp);
+
+  if (vtable_index.is_constant()) {
+    assert_different_registers(recv_klass, method_result, tmp);
+  } else {
+    assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
+  }
+  const int base = InstanceKlass::vtable_start_offset() * wordSize;
+  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
+  if (vtable_index.is_constant()) {
+    li(AT, vtable_index.as_constant());
+    slli_d(AT, AT, (int)Address::times_ptr);
+  } else {
+    slli_d(AT, vtable_index.as_register(), (int)Address::times_ptr);
+  }
+  li(tmp, base + vtableEntry::method_offset_in_bytes());
+  add_d(tmp, tmp, AT);
+  add_d(tmp, tmp, recv_klass);
+  ld_d(method_result, tmp, 0);
+
+  pop(tmp);
+}
+
+void MacroAssembler::load_byte_map_base(Register reg) {
+  jbyte *byte_map_base =
+    ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base;
+
+  // Strictly speaking the byte_map_base isn't an address at all, and it might
+  // even be negative. It is thus materialised as a constant.
+  li(reg, (uint64_t)byte_map_base);
+}
+
+void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
+  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
+  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
+  // The inverted mask is sign-extended
+  li(AT, inverted_jweak_mask);
+  andr(possibly_jweak, AT, possibly_jweak);
+}
+
+void MacroAssembler::resolve_jobject(Register value,
+                                     Register thread,
+                                     Register tmp) {
+  assert_different_registers(value, thread, tmp);
+  Label done, not_weak;
+  beq(value, R0, done);                // Use NULL as-is.
+  li(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
+  andr(AT, value, AT);
+  beq(AT, R0, not_weak);
+  // Resolve jweak.
+  ld_d(value, value, -JNIHandles::weak_tag_value);
+  verify_oop(value);
+  #if INCLUDE_ALL_GCS
+    if (UseG1GC) {
+      g1_write_barrier_pre(noreg /* obj */,
+                           value /* pre_val */,
+                           thread /* thread */,
+                           tmp /* tmp */,
+                           true /* tosca_live */,
+                           true /* expand_call */);
+    }
+  #endif // INCLUDE_ALL_GCS
+  b(done);
+  bind(not_weak);
+  // Resolve (untagged) jobject.
+  ld_d(value, value, 0);
+  verify_oop(value);
+  bind(done);
+}
+
+void MacroAssembler::lea(Register rd, Address src) {
+  Register dst   = rd;
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index == noreg) {
+    if (is_simm(disp, 12)) {
+      addi_d(dst, base, disp);
+    } else {
+      lu12i_w(AT, split_low20(disp >> 12));
+      if (split_low12(disp))
+        ori(AT, AT, split_low12(disp));
+      add_d(dst, base, AT);
+    }
+  } else {
+    if (scale == 0) {
+      if (is_simm(disp, 12)) {
+        add_d(AT, base, index);
+        addi_d(dst, AT, disp);
+      } else {
+        lu12i_w(AT, split_low20(disp >> 12));
+        if (split_low12(disp))
+          ori(AT, AT, split_low12(disp));
+        add_d(AT, base, AT);
+        add_d(dst, AT, index);
+      }
+    } else {
+      if (is_simm(disp, 12)) {
+        alsl_d(AT, index, base, scale - 1);
+        addi_d(dst, AT, disp);
+      } else {
+        lu12i_w(AT, split_low20(disp >> 12));
+        if (split_low12(disp))
+          ori(AT, AT, split_low12(disp));
+        add_d(AT, AT, base);
+        alsl_d(dst, index, AT, scale - 1);
+      }
+    }
+  }
+}
+
+void MacroAssembler::lea(Register dst, AddressLiteral adr) {
+  code_section()->relocate(pc(), adr.rspec());
+  pcaddi(dst, (adr.target() - pc()) >> 2);
+}
+
+int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) {
+  int v = (dest_pos - inst_pos) >> 2;
+  switch(high(inst, 6)) {
+  case beq_op:
+  case bne_op:
+  case blt_op:
+  case bge_op:
+  case bltu_op:
+  case bgeu_op:
+    assert(is_simm16(v), "must be simm16");
+#ifndef PRODUCT
+    if(!is_simm16(v))
+    {
+      tty->print_cr("must be simm16");
+      tty->print_cr("Inst: %x", inst);
+    }
+#endif
+
+    inst &= 0xfc0003ff;
+    inst |= ((v & 0xffff) << 10);
+    break;
+  case beqz_op:
+  case bnez_op:
+  case bccondz_op:
+    assert(is_simm(v, 21), "must be simm21");
+#ifndef PRODUCT
+    if(!is_simm(v, 21))
+    {
+      tty->print_cr("must be simm21");
+      tty->print_cr("Inst: %x", inst);
+    }
+#endif
+
+    inst &= 0xfc0003e0;
+    inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x1f) );
+    break;
+  case b_op:
+  case bl_op:
+    assert(is_simm(v, 26), "must be simm26");
+#ifndef PRODUCT
+    if(!is_simm(v, 26))
+    {
+      tty->print_cr("must be simm26");
+      tty->print_cr("Inst: %x", inst);
+    }
+#endif
+
+    inst &= 0xfc000000;
+    inst |= ( ((v & 0xffff) << 10) | ((v >> 16) & 0x3ff) );
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+  return inst;
+}
+
+void MacroAssembler::cmp_cmov(Register  op1,
+                              Register  op2,
+                              Register  dst,
+                              Register  src,
+                              CMCompare cmp,
+                              bool      is_signed) {
+  switch (cmp) {
+    case EQ:
+      sub_d(AT, op1, op2);
+      maskeqz(dst, dst, AT);
+      masknez(AT, src, AT);
+      break;
+
+    case NE:
+      sub_d(AT, op1, op2);
+      masknez(dst, dst, AT);
+      maskeqz(AT, src, AT);
+      break;
+
+    case GT:
+      if (is_signed) {
+        slt(AT, op2, op1);
+      } else {
+        sltu(AT, op2, op1);
+      }
+      masknez(dst, dst, AT);
+      maskeqz(AT, src, AT);
+      break;
+
+    case GE:
+      if (is_signed) {
+        slt(AT, op1, op2);
+      } else {
+        sltu(AT, op1, op2);
+      }
+      maskeqz(dst, dst, AT);
+      masknez(AT, src, AT);
+      break;
+
+    case LT:
+      if (is_signed) {
+        slt(AT, op1, op2);
+      } else {
+        sltu(AT, op1, op2);
+      }
+      masknez(dst, dst, AT);
+      maskeqz(AT, src, AT);
+      break;
+
+    case LE:
+      if (is_signed) {
+        slt(AT, op2, op1);
+      } else {
+        sltu(AT, op2, op1);
+      }
+      maskeqz(dst, dst, AT);
+      masknez(AT, src, AT);
+      break;
+
+    default:
+      Unimplemented();
+  }
+  OR(dst, dst, AT);
+}
+
+
+void MacroAssembler::cmp_cmov(FloatRegister op1,
+                              FloatRegister op2,
+                              Register      dst,
+                              Register      src,
+                              FloatRegister tmp1,
+                              FloatRegister tmp2,
+                              CMCompare     cmp,
+                              bool          is_float) {
+  movgr2fr_d(tmp1, dst);
+  movgr2fr_d(tmp2, src);
+
+  switch(cmp) {
+    case EQ:
+      if (is_float) {
+        fcmp_ceq_s(FCC0, op1, op2);
+      } else {
+        fcmp_ceq_d(FCC0, op1, op2);
+      }
+      fsel(tmp1, tmp1, tmp2, FCC0);
+      break;
+
+    case NE:
+      if (is_float) {
+        fcmp_ceq_s(FCC0, op1, op2);
+      } else {
+        fcmp_ceq_d(FCC0, op1, op2);
+      }
+      fsel(tmp1, tmp2, tmp1, FCC0);
+      break;
+
+    case GT:
+      if (is_float) {
+        fcmp_cule_s(FCC0, op1, op2);
+      } else {
+        fcmp_cule_d(FCC0, op1, op2);
+      }
+      fsel(tmp1, tmp2, tmp1, FCC0);
+      break;
+
+    case GE:
+      if (is_float) {
+        fcmp_cult_s(FCC0, op1, op2);
+      } else {
+        fcmp_cult_d(FCC0, op1, op2);
+      }
+      fsel(tmp1, tmp2, tmp1, FCC0);
+      break;
+
+    case LT:
+      if (is_float) {
+        fcmp_cult_s(FCC0, op1, op2);
+      } else {
+        fcmp_cult_d(FCC0, op1, op2);
+      }
+      fsel(tmp1, tmp1, tmp2, FCC0);
+      break;
+
+    case LE:
+      if (is_float) {
+        fcmp_cule_s(FCC0, op1, op2);
+      } else {
+        fcmp_cule_d(FCC0, op1, op2);
+      }
+      fsel(tmp1, tmp1, tmp2, FCC0);
+      break;
+
+    default:
+      Unimplemented();
+  }
+
+  movfr2gr_d(dst, tmp1);
+}
+
+void MacroAssembler::cmp_cmov(FloatRegister op1,
+                              FloatRegister op2,
+                              FloatRegister dst,
+                              FloatRegister src,
+                              CMCompare     cmp,
+                              bool          is_float) {
+  switch(cmp) {
+    case EQ:
+      if (!is_float) {
+        fcmp_ceq_d(FCC0, op1, op2);
+      } else {
+        fcmp_ceq_s(FCC0, op1, op2);
+      }
+      fsel(dst, dst, src, FCC0);
+      break;
+
+    case NE:
+      if (!is_float) {
+        fcmp_ceq_d(FCC0, op1, op2);
+      } else {
+        fcmp_ceq_s(FCC0, op1, op2);
+      }
+      fsel(dst, src, dst, FCC0);
+      break;
+
+    case GT:
+      if (!is_float) {
+        fcmp_cule_d(FCC0, op1, op2);
+      } else {
+        fcmp_cule_s(FCC0, op1, op2);
+      }
+      fsel(dst, src, dst, FCC0);
+      break;
+
+    case GE:
+      if (!is_float) {
+        fcmp_cult_d(FCC0, op1, op2);
+      } else {
+        fcmp_cult_s(FCC0, op1, op2);
+      }
+      fsel(dst, src, dst, FCC0);
+      break;
+
+    case LT:
+      if (!is_float) {
+        fcmp_cult_d(FCC0, op1, op2);
+      } else {
+        fcmp_cult_s(FCC0, op1, op2);
+      }
+      fsel(dst, dst, src, FCC0);
+      break;
+
+    case LE:
+      if (!is_float) {
+        fcmp_cule_d(FCC0, op1, op2);
+      } else {
+        fcmp_cule_s(FCC0, op1, op2);
+      }
+      fsel(dst, dst, src, FCC0);
+      break;
+
+    default:
+      Unimplemented();
+  }
+}
+
+void MacroAssembler::cmp_cmov(Register      op1,
+                              Register      op2,
+                              FloatRegister dst,
+                              FloatRegister src,
+                              FloatRegister tmp1,
+                              FloatRegister tmp2,
+                              CMCompare     cmp) {
+  movgr2fr_w(tmp1, R0);
+
+  switch (cmp) {
+    case EQ:
+      sub_d(AT, op1, op2);
+      movgr2fr_w(tmp2, AT);
+      fcmp_ceq_s(FCC0, tmp1, tmp2);
+      fsel(dst, dst, src, FCC0);
+      break;
+
+    case NE:
+      sub_d(AT, op1, op2);
+      movgr2fr_w(tmp2, AT);
+      fcmp_ceq_s(FCC0, tmp1, tmp2);
+      fsel(dst, src, dst, FCC0);
+      break;
+
+    case GT:
+      slt(AT, op2, op1);
+      movgr2fr_w(tmp2, AT);
+      fcmp_ceq_s(FCC0, tmp1, tmp2);
+      fsel(dst, src, dst, FCC0);
+      break;
+
+    case GE:
+      slt(AT, op1, op2);
+      movgr2fr_w(tmp2, AT);
+      fcmp_ceq_s(FCC0, tmp1, tmp2);
+      fsel(dst, dst, src, FCC0);
+      break;
+
+    case LT:
+      slt(AT, op1, op2);
+      movgr2fr_w(tmp2, AT);
+      fcmp_ceq_s(FCC0, tmp1, tmp2);
+      fsel(dst, src, dst, FCC0);
+      break;
+
+    case LE:
+      slt(AT, op2, op1);
+      movgr2fr_w(tmp2, AT);
+      fcmp_ceq_s(FCC0, tmp1, tmp2);
+      fsel(dst, dst, src, FCC0);
+      break;
+
+    default:
+      Unimplemented();
+  }
+}
+
+void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) {
+  switch (type) {
+    case STORE_BYTE:   st_b (reg, base, disp); break;
+    case STORE_CHAR:
+    case STORE_SHORT:  st_h (reg, base, disp); break;
+    case STORE_INT:    st_w (reg, base, disp); break;
+    case STORE_LONG:   st_d (reg, base, disp); break;
+    case LOAD_BYTE:    ld_b (reg, base, disp); break;
+    case LOAD_U_BYTE:  ld_bu(reg, base, disp); break;
+    case LOAD_SHORT:   ld_h (reg, base, disp); break;
+    case LOAD_U_SHORT: ld_hu(reg, base, disp); break;
+    case LOAD_INT:     ld_w (reg, base, disp); break;
+    case LOAD_U_INT:   ld_wu(reg, base, disp); break;
+    case LOAD_LONG:    ld_d (reg, base, disp); break;
+    case LOAD_LINKED_LONG:
+      ll_d(reg, base, disp);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::loadstore(Register reg, Register base, Register disp, int type) {
+  switch (type) {
+    case STORE_BYTE:   stx_b (reg, base, disp); break;
+    case STORE_CHAR:
+    case STORE_SHORT:  stx_h (reg, base, disp); break;
+    case STORE_INT:    stx_w (reg, base, disp); break;
+    case STORE_LONG:   stx_d (reg, base, disp); break;
+    case LOAD_BYTE:    ldx_b (reg, base, disp); break;
+    case LOAD_U_BYTE:  ldx_bu(reg, base, disp); break;
+    case LOAD_SHORT:   ldx_h (reg, base, disp); break;
+    case LOAD_U_SHORT: ldx_hu(reg, base, disp); break;
+    case LOAD_INT:     ldx_w (reg, base, disp); break;
+    case LOAD_U_INT:   ldx_wu(reg, base, disp); break;
+    case LOAD_LONG:    ldx_d (reg, base, disp); break;
+    case LOAD_LINKED_LONG:
+      add_d(AT, base, disp);
+      ll_d(reg, AT, 0);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) {
+  switch (type) {
+    case STORE_FLOAT:    fst_s(reg, base, disp); break;
+    case STORE_DOUBLE:   fst_d(reg, base, disp); break;
+    case STORE_VECTORX:  vst  (reg, base, disp); break;
+    case STORE_VECTORY: xvst  (reg, base, disp); break;
+    case LOAD_FLOAT:     fld_s(reg, base, disp); break;
+    case LOAD_DOUBLE:    fld_d(reg, base, disp); break;
+    case LOAD_VECTORX:   vld  (reg, base, disp); break;
+    case LOAD_VECTORY:  xvld  (reg, base, disp); break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::loadstore(FloatRegister reg, Register base, Register disp, int type) {
+  switch (type) {
+    case STORE_FLOAT:    fstx_s(reg, base, disp); break;
+    case STORE_DOUBLE:   fstx_d(reg, base, disp); break;
+    case STORE_VECTORX:  vstx  (reg, base, disp); break;
+    case STORE_VECTORY: xvstx  (reg, base, disp); break;
+    case LOAD_FLOAT:     fldx_s(reg, base, disp); break;
+    case LOAD_DOUBLE:    fldx_d(reg, base, disp); break;
+    case LOAD_VECTORX:   vldx  (reg, base, disp); break;
+    case LOAD_VECTORY:  xvldx  (reg, base, disp); break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+/**
+ * Emits code to update CRC-32 with a byte value according to constants in table
+ *
+ * @param [in,out]crc   Register containing the crc.
+ * @param [in]val       Register containing the byte to fold into the CRC.
+ * @param [in]table     Register containing the table of crc constants.
+ *
+ * uint32_t crc;
+ * val = crc_table[(val ^ crc) & 0xFF];
+ * crc = val ^ (crc >> 8);
+**/
+void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
+  xorr(val, val, crc);
+  andi(val, val, 0xff);
+  ld_w(val, Address(table, val, Address::times_4, 0));
+  srli_w(crc, crc, 8);
+  xorr(crc, val, crc);
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param tmp   scratch register
+**/
+void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Register tmp) {
+  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
+  assert_different_registers(crc, buf, len, tmp);
+
+    nor(crc, crc, R0);
+
+    addi_d(len, len, -64);
+    bge(len, R0, CRC_by64_loop);
+    addi_d(len, len, 64-4);
+    bge(len, R0, CRC_by4_loop);
+    addi_d(len, len, 4);
+    blt(R0, len, CRC_by1_loop);
+    b(L_exit);
+
+  bind(CRC_by64_loop);
+    ld_d(tmp, buf, 0);
+    crc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 8);
+    crc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 16);
+    crc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 24);
+    crc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 32);
+    crc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 40);
+    crc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 48);
+    crc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 56);
+    crc_w_d_w(crc, tmp, crc);
+    addi_d(buf, buf, 64);
+    addi_d(len, len, -64);
+    bge(len, R0, CRC_by64_loop);
+    addi_d(len, len, 64-4);
+    bge(len, R0, CRC_by4_loop);
+    addi_d(len, len, 4);
+    blt(R0, len, CRC_by1_loop);
+    b(L_exit);
+
+  bind(CRC_by4_loop);
+    ld_w(tmp, buf, 0);
+    crc_w_w_w(crc, tmp, crc);
+    addi_d(buf, buf, 4);
+    addi_d(len, len, -4);
+    bge(len, R0, CRC_by4_loop);
+    addi_d(len, len, 4);
+    bge(R0, len, L_exit);
+
+  bind(CRC_by1_loop);
+    ld_b(tmp, buf, 0);
+    crc_w_b_w(crc, tmp, crc);
+    addi_d(buf, buf, 1);
+    addi_d(len, len, -1);
+    blt(R0, len, CRC_by1_loop);
+
+  bind(L_exit);
+    nor(crc, crc, R0);
+}
+
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param tmp   scratch register
+**/
+void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register tmp) {
+  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop, CRC_less64, CRC_by64_pre, CRC_by32_loop, CRC_less32, L_exit;
+  assert_different_registers(crc, buf, len, tmp);
+
+    addi_d(len, len, -64);
+    bge(len, R0, CRC_by64_loop);
+    addi_d(len, len, 64-4);
+    bge(len, R0, CRC_by4_loop);
+    addi_d(len, len, 4);
+    blt(R0, len, CRC_by1_loop);
+    b(L_exit);
+
+  bind(CRC_by64_loop);
+    ld_d(tmp, buf, 0);
+    crcc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 8);
+    crcc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 16);
+    crcc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 24);
+    crcc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 32);
+    crcc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 40);
+    crcc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 48);
+    crcc_w_d_w(crc, tmp, crc);
+    ld_d(tmp, buf, 56);
+    crcc_w_d_w(crc, tmp, crc);
+    addi_d(buf, buf, 64);
+    addi_d(len, len, -64);
+    bge(len, R0, CRC_by64_loop);
+    addi_d(len, len, 64-4);
+    bge(len, R0, CRC_by4_loop);
+    addi_d(len, len, 4);
+    blt(R0, len, CRC_by1_loop);
+    b(L_exit);
+
+  bind(CRC_by4_loop);
+    ld_w(tmp, buf, 0);
+    crcc_w_w_w(crc, tmp, crc);
+    addi_d(buf, buf, 4);
+    addi_d(len, len, -4);
+    bge(len, R0, CRC_by4_loop);
+    addi_d(len, len, 4);
+    bge(R0, len, L_exit);
+
+  bind(CRC_by1_loop);
+    ld_b(tmp, buf, 0);
+    crcc_w_b_w(crc, tmp, crc);
+    addi_d(buf, buf, 1);
+    addi_d(len, len, -1);
+    blt(R0, len, CRC_by1_loop);
+
+  bind(L_exit);
+}
diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp
new file mode 100644
index 00000000000..8b123c2906e
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.hpp
@@ -0,0 +1,771 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP
+
+#include "asm/assembler.hpp"
+#include "utilities/macros.hpp"
+#include "runtime/rtmLocking.hpp"
+
+
+// MacroAssembler extends Assembler by frequently used macros.
+//
+// Instructions for which a 'better' code sequence exists depending
+// on arguments should also go in here.
+
+class MacroAssembler: public Assembler {
+  friend class LIR_Assembler;
+  friend class Runtime1;      // as_Address()
+
+ public:
+  // Compare code
+  typedef enum {
+    EQ = 0x01,
+    NE = 0x02,
+    GT = 0x03,
+    GE = 0x04,
+    LT = 0x05,
+    LE = 0x06
+  } CMCompare;
+
+ protected:
+
+  Address as_Address(AddressLiteral adr);
+  Address as_Address(ArrayAddress adr);
+
+  // Support for VM calls
+  //
+  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+#ifdef CC_INTERP
+  // c++ interpreter never wants to use interp_masm version of call_VM
+  #define VIRTUAL
+#else
+  #define VIRTUAL virtual
+#endif
+
+  VIRTUAL void call_VM_leaf_base(
+    address entry_point,               // the entry point
+    int     number_of_arguments        // the number of arguments to pop after the call
+  );
+
+  // This is the base routine called by the different versions of call_VM. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+  //
+  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
+  // returns the register which contains the thread upon return. If a thread register has been
+  // specified, the return value will correspond to that register. If no last_java_sp is specified
+  // (noreg) than sp will be used instead.
+  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
+    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
+    Register java_thread,              // the thread if computed before     ; use noreg otherwise
+    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
+    address  entry_point,              // the entry point
+    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
+    bool     check_exceptions          // whether to check for pending exceptions after return
+  );
+
+  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
+  // The implementation is only non-empty for the InterpreterMacroAssembler,
+  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
+
+  // helpers for FPU flag access
+  // tmp is a temporary register, if none is available use noreg
+
+ public:
+  static intptr_t  i[32];
+  static float  f[32];
+  static void print(outputStream *s);
+
+  static int i_offset(unsigned int k);
+  static int f_offset(unsigned int k);
+
+  static void save_registers(MacroAssembler *masm);
+  static void restore_registers(MacroAssembler *masm);
+
+  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
+
+  // Support for NULL-checks
+  //
+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
+  // If the accessed location is M[reg + offset] and the offset is known, provide the
+  // offset. No explicit code generation is needed if the offset is within a certain
+  // range (0 <= offset <= page_size).
+
+  void null_check(Register reg, int offset = -1);
+  static bool needs_explicit_null_check(intptr_t offset);
+
+  // Required platform-specific helpers for Label::patch_instructions.
+  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
+  void pd_patch_instruction(address branch, address target);
+
+  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
+
+  // Support for inc/dec with optimal instruction selection depending on value
+  // void incrementl(Register reg, int value = 1);
+  // void decrementl(Register reg, int value = 1);
+
+
+  // Alignment
+  void align(int modulus);
+
+
+  // Stack frame creation/removal
+  void enter();
+  void leave();
+
+  // Frame creation and destruction shared between JITs.
+  void build_frame(int framesize);
+  void remove_frame(int framesize);
+
+  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
+  // The pointer will be loaded into the thread register.
+  void get_thread(Register thread);
+
+
+  // Support for VM calls
+  //
+  // It is imperative that all calls into the VM are handled via the call_VM macros.
+  // They make sure that the stack linkage is setup correctly. call_VM's correspond
+  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
+
+
+  void call_VM(Register oop_result,
+               address entry_point,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  // Overloadings with last_Java_sp
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               int number_of_arguments = 0,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, bool
+               check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  void get_vm_result  (Register oop_result, Register thread);
+  void get_vm_result_2(Register metadata_result, Register thread);
+  void call_VM_leaf(address entry_point,
+                    int number_of_arguments = 0);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1, Register arg_2);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1, Register arg_2, Register arg_3);
+
+  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
+  void super_call_VM_leaf(address entry_point);
+  void super_call_VM_leaf(address entry_point, Register arg_1);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+
+  // last Java Frame (fills frame anchor)
+  void set_last_Java_frame(Register thread,
+                           Register last_java_sp,
+                           Register last_java_fp,
+                           Label& last_java_pc);
+
+  // thread in the default location (S6)
+  void set_last_Java_frame(Register last_java_sp,
+                           Register last_java_fp,
+                           Label& last_java_pc);
+
+  void reset_last_Java_frame(Register thread, bool clear_fp);
+
+  // thread in the default location (S6)
+  void reset_last_Java_frame(bool clear_fp);
+
+  // Stores
+  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
+  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
+
+ void resolve_jobject(Register value, Register thread, Register tmp);
+ void clear_jweak_tag(Register possibly_jweak);
+
+#if INCLUDE_ALL_GCS
+
+  void g1_write_barrier_pre(Register obj,
+                            Register pre_val,
+                            Register thread,
+                            Register tmp,
+                            bool tosca_live,
+                            bool expand_call);
+
+  void g1_write_barrier_post(Register store_addr,
+                             Register new_val,
+                             Register thread,
+                             Register tmp,
+                             Register tmp2);
+
+#endif // INCLUDE_ALL_GCS
+
+  // split store_check(Register obj) to enhance instruction interleaving
+  void store_check_part_1(Register obj);
+  void store_check_part_2(Register obj);
+
+  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
+  void c2bool(Register x);
+  //add for compressedoops
+  void load_klass(Register dst, Register src);
+  void store_klass(Register dst, Register src);
+  void load_prototype_header(Register dst, Register src);
+
+  void store_klass_gap(Register dst, Register src);
+
+  void load_heap_oop(Register dst, Address src);
+  void store_heap_oop(Address dst, Register src);
+  void store_heap_oop_null(Address dst);
+  void encode_heap_oop(Register r);
+  void encode_heap_oop(Register dst, Register src);
+  void decode_heap_oop(Register r);
+  void decode_heap_oop(Register dst, Register src);
+  void encode_heap_oop_not_null(Register r);
+  void decode_heap_oop_not_null(Register r);
+  void encode_heap_oop_not_null(Register dst, Register src);
+  void decode_heap_oop_not_null(Register dst, Register src);
+
+  void encode_klass_not_null(Register r);
+  void decode_klass_not_null(Register r);
+  void encode_klass_not_null(Register dst, Register src);
+  void decode_klass_not_null(Register dst, Register src);
+
+  // Returns the byte size of the instructions generated by decode_klass_not_null()
+  // when compressed klass pointers are being used.
+  static int instr_size_for_decode_klass_not_null();
+
+  // if heap base register is used - reinit it with the correct value
+  void reinit_heapbase();
+
+  DEBUG_ONLY(void verify_heapbase(const char* msg);)
+
+  void set_narrow_klass(Register dst, Klass* k);
+  void set_narrow_oop(Register dst, jobject obj);
+
+  // Sign extension
+  void sign_extend_short(Register reg) { ext_w_h(reg, reg); }
+  void sign_extend_byte(Register reg)  { ext_w_b(reg, reg); }
+  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
+  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
+
+  void trigfunc(char trig, int num_fpu_regs_in_use = 1);
+  // allocation
+  void eden_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void incr_allocated_bytes(Register thread,
+                            Register var_size_in_bytes, int con_size_in_bytes,
+                            Register t1 = noreg);
+  // interface method calling
+  void lookup_interface_method(Register recv_klass,
+                               Register intf_klass,
+                               RegisterOrConstant itable_index,
+                               Register method_result,
+                               Register scan_temp,
+                               Label& no_such_interface,
+                               bool return_method = true);
+
+  // virtual method calling
+  void lookup_virtual_method(Register recv_klass,
+                             RegisterOrConstant vtable_index,
+                             Register method_result);
+
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except temp_reg.
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path,
+                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
+
+  // The rest of the type check; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The temp_reg and temp2_reg can be noreg, if no temps are available.
+  // Updates the sub's secondary super cache as necessary.
+  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
+  void check_klass_subtype_slow_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     bool set_cond_codes = false);
+
+  // Simplified, combined version, good for typical uses.
+  // Falls through on failure.
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success);
+
+
+  // Debugging
+
+  // only if +VerifyOops
+  void verify_oop(Register reg, const char* s = "broken oop");
+  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
+  void verify_oop_subroutine();
+  // TODO: verify method and klass metadata (compare against vptr?)
+  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
+  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
+
+  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
+  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
+
+  // only if +VerifyFPU
+  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
+
+  // prints msg, dumps registers and stops execution
+  void stop(const char* msg);
+
+  // prints msg and continues
+  void warn(const char* msg);
+
+  static void debug(char* msg/*, RegistersForDebugging* regs*/);
+  static void debug64(char* msg, int64_t pc, int64_t regs[]);
+
+  void untested()                                { stop("untested"); }
+
+  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, sizeof(b), "unimplemented: %s", what);  stop(b); }
+
+  void should_not_reach_here()                   { stop("should not reach here"); }
+
+  void print_CPU_state();
+
+  // Stack overflow checking
+  void bang_stack_with_offset(int offset) {
+    // stack grows down, caller passes positive offset
+    assert(offset > 0, "must bang with negative offset");
+    if (offset <= 2048) {
+      st_w(RA0, SP, -offset);
+    } else if (offset <= 32768 && !(offset & 3)) {
+      stptr_w(RA0, SP, -offset);
+    } else {
+      li(AT, offset);
+      sub_d(AT, SP, AT);
+      st_w(RA0, AT, 0);
+    }
+  }
+
+  // Writes to stack successive pages until offset reached to check for
+  // stack overflow + shadow pages.  Also, clobbers tmp
+  void bang_stack_size(Register size, Register tmp);
+
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+                                                Register tmp,
+                                                int offset);
+
+  // Support for serializing memory accesses between threads
+  void serialize_memory(Register thread, Register tmp);
+
+  //void verify_tlab();
+  void verify_tlab(Register t1, Register t2);
+
+  // Biased locking support
+  // lock_reg and obj_reg must be loaded up with the appropriate values.
+  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
+  // be killed; if not supplied, push/pop will be used internally to
+  // allocate a temporary (inefficient, avoid if possible).
+  // Optional slow case is for implementations (interpreter and C1) which branch to
+  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
+  // Returns offset of first potentially-faulting instruction for null
+  // check info (currently consumed only by C1). If
+  // swap_reg_contains_mark is true then returns -1 as it is assumed
+  // the calling code has already passed any potential faults.
+  int biased_locking_enter(Register lock_reg, Register obj_reg,
+                           Register swap_reg, Register tmp_reg,
+                           bool swap_reg_contains_mark,
+                           Label& done, Label* slow_case = NULL,
+                           BiasedLockingCounters* counters = NULL);
+  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
+#ifdef COMPILER2
+  void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr);
+  void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr);
+#endif
+
+  void round_to(Register reg, int modulus) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+#if 0
+    assert_different_registers(reg, AT);
+    increment(reg, modulus - 1);
+    move(AT, - modulus);
+    andr(reg, reg, AT);
+#endif
+  }
+
+  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
+  void increment(Register reg, int imm);
+  void decrement(Register reg, int imm);
+  void increment(Address addr, int imm = 1);
+  void decrement(Address addr, int imm = 1);
+  void shl(Register reg, int sa)        { slli_d(reg, reg, sa); }
+  void shr(Register reg, int sa)        { srli_d(reg, reg, sa); }
+  void sar(Register reg, int sa)        { srai_d(reg, reg, sa); }
+  // Helper functions for statistics gathering.
+  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
+
+  // Calls
+  void call(address entry);
+  void call(address entry, relocInfo::relocType rtype);
+  void call(address entry, RelocationHolder& rh);
+  void call_long(address entry);
+
+  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
+
+  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
+
+  static bool far_branches() {
+    if (ForceUnreachable) {
+      return true;
+    } else {
+      return ReservedCodeCacheSize > branch_range;
+    }
+  }
+
+  // Emit the CompiledIC call idiom
+  address ic_call(address entry);
+
+  // Jumps
+  void jmp(address entry);
+  void jmp(address entry, relocInfo::relocType rtype);
+  void jmp_far(Label& L); // patchable
+
+  /* branches may exceed 16-bit offset */
+  void b_far(address entry);
+  void b_far(Label& L);
+
+  void bne_far    (Register rs, Register rt, address entry);
+  void bne_far    (Register rs, Register rt, Label& L);
+
+  void beq_far    (Register rs, Register rt, address entry);
+  void beq_far    (Register rs, Register rt, Label& L);
+
+  void blt_far    (Register rs, Register rt, address entry, bool is_signed);
+  void blt_far    (Register rs, Register rt, Label& L, bool is_signed);
+
+  void bge_far    (Register rs, Register rt, address entry, bool is_signed);
+  void bge_far    (Register rs, Register rt, Label& L, bool is_signed);
+
+  // For C2 to support long branches
+  void beq_long   (Register rs, Register rt, Label& L);
+  void bne_long   (Register rs, Register rt, Label& L);
+  void blt_long   (Register rs, Register rt, Label& L, bool is_signed);
+  void bge_long   (Register rs, Register rt, Label& L, bool is_signed);
+  void bc1t_long  (Label& L);
+  void bc1f_long  (Label& L);
+
+  static bool patchable_branches() {
+    const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
+    return ReservedCodeCacheSize > branch_range;
+  }
+
+  static bool reachable_from_branch_short(jlong offs);
+
+  void patchable_jump_far(Register ra, jlong offs);
+  void patchable_jump(address target, bool force_patchable = false);
+  void patchable_call(address target, address call_size = 0);
+
+  // Floating
+  // Data
+
+  // Load and store values by size and signed-ness
+  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
+  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
+
+  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
+  inline void ld_ptr(Register rt, Address a) {
+    ld_d(rt, a);
+  }
+
+  inline void ld_ptr(Register rt, Register base, int offset16) {
+    ld_d(rt, base, offset16);
+  }
+
+  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
+  inline void st_ptr(Register rt, Address a) {
+    st_d(rt, a);
+  }
+
+  inline void st_ptr(Register rt, Register base, int offset16) {
+    st_d(rt, base, offset16);
+  }
+
+  void ld_ptr(Register rt, Register base, Register offset);
+  void st_ptr(Register rt, Register base, Register offset);
+
+  // ld_long will perform lw for 32 bit VMs and ld for 64 bit VMs
+  // st_long will perform sw for 32 bit VMs and sd for 64 bit VMs
+  inline void ld_long(Register rt, Register base, int offset16);
+  inline void st_long(Register rt, Register base, int offset16);
+  inline void ld_long(Register rt, Address a);
+  inline void st_long(Register rt, Address a);
+  void ld_long(Register rt, Register offset, Register base);
+  void st_long(Register rt, Register offset, Register base);
+
+  // swap the two byte of the low 16-bit halfword
+  // this directive will use AT, be sure the high 16-bit of reg is zero
+  void hswap(Register reg);
+  void huswap(Register reg);
+
+  // convert big endian integer to little endian integer
+  void swap(Register reg);
+
+  void cmpxchg(Address addr, Register oldval, Register newval, Register resflag,
+               bool retold, bool barrier);
+  void cmpxchg(Address addr, Register oldval, Register newval, Register tmp,
+               bool retold, bool barrier, Label& succ, Label* fail = NULL);
+  void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag,
+                 bool sign, bool retold, bool barrier);
+  void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
+                 bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL);
+
+  void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");}
+  void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");}
+  void push (Register reg)      { addi_d(SP, SP, -8); st_d  (reg, SP, 0); }
+  void push (FloatRegister reg) { addi_d(SP, SP, -8); fst_d (reg, SP, 0); }
+  void pop  (Register reg)      { ld_d  (reg, SP, 0);  addi_d(SP, SP, 8); }
+  void pop  (FloatRegister reg) { fld_d (reg, SP, 0);  addi_d(SP, SP, 8); }
+  void pop  ()                  { addi_d(SP, SP, 8); }
+  void pop2 ()                  { addi_d(SP, SP, 16); }
+  void push2(Register reg1, Register reg2);
+  void pop2 (Register reg1, Register reg2);
+  //we need 2 fun to save and resotre general register
+  void pushad();
+  void popad();
+  void pushad_except_v0();
+  void popad_except_v0();
+
+  void li(Register rd, jlong value);
+  void li(Register rd, address addr) { li(rd, (long)addr); }
+  void patchable_li52(Register rd, jlong value);
+  void lipc(Register rd, Label& L);
+  void move(Register rd, Register rs)   { orr(rd, rs, R0); }
+  void move_u32(Register rd, Register rs)   { add_w(rd, rs, R0); }
+  void mov_metadata(Register dst, Metadata* obj);
+  void mov_metadata(Address dst, Metadata* obj);
+
+  // Load the base of the cardtable byte map into reg.
+  void load_byte_map_base(Register reg);
+
+  //FIXME
+  void empty_FPU_stack(){/*need implemented*/};
+
+
+  // method handles (JSR 292)
+  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+
+
+  // LA added:
+  void jr  (Register reg)        { jirl(R0, reg, 0); }
+  void jalr(Register reg)        { jirl(RA, reg, 0); }
+  void nop ()                    { andi(R0, R0, 0); }
+  void andr(Register rd, Register rj, Register rk) { AND(rd, rj, rk); }
+  void xorr(Register rd, Register rj, Register rk) { XOR(rd, rj, rk); }
+  void orr (Register rd, Register rj, Register rk) {  OR(rd, rj, rk); }
+  void lea (Register rd, Address src);
+  void lea (Register dst, AddressLiteral adr);
+  static int  patched_branch(int dest_pos, int inst, int inst_pos);
+
+  // Conditional move
+  void cmp_cmov(Register        op1,
+                Register        op2,
+                Register        dst,
+                Register        src,
+                CMCompare       cmp = EQ,
+                bool      is_signed = true);
+  void cmp_cmov(FloatRegister   op1,
+                FloatRegister   op2,
+                Register        dst,
+                Register        src,
+                FloatRegister   tmp1,
+                FloatRegister   tmp2,
+                CMCompare       cmp = EQ,
+                bool       is_float = true);
+  void cmp_cmov(FloatRegister   op1,
+                FloatRegister   op2,
+                FloatRegister   dst,
+                FloatRegister   src,
+                CMCompare       cmp = EQ,
+                bool       is_float = true);
+  void cmp_cmov(Register        op1,
+                Register        op2,
+                FloatRegister   dst,
+                FloatRegister   src,
+                FloatRegister   tmp1,
+                FloatRegister   tmp2,
+                CMCompare       cmp = EQ);
+
+  // CRC32 code for java.util.zip.CRC32::update() instrinsic.
+  void update_byte_crc32(Register crc, Register val, Register table);
+
+  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
+  void kernel_crc32(Register crc, Register buf, Register len, Register tmp);
+
+  // CRC32C code for java.util.zip.CRC32C::updateBytes() instrinsic.
+  void kernel_crc32c(Register crc, Register buf, Register len, Register tmp);
+
+#undef VIRTUAL
+
+ public:
+// Memory Data Type
+#define INT_TYPE 0x100
+#define FLOAT_TYPE 0x200
+#define SIGNED_TYPE 0x10
+#define UNSIGNED_TYPE 0x20
+
+  typedef enum {
+    LOAD_BYTE        = INT_TYPE | SIGNED_TYPE | 0x1,
+    LOAD_CHAR        = INT_TYPE | SIGNED_TYPE | 0x2,
+    LOAD_SHORT       = INT_TYPE | SIGNED_TYPE | 0x3,
+    LOAD_INT         = INT_TYPE | SIGNED_TYPE | 0x4,
+    LOAD_LONG        = INT_TYPE | SIGNED_TYPE | 0x5,
+    STORE_BYTE       = INT_TYPE | SIGNED_TYPE | 0x6,
+    STORE_CHAR       = INT_TYPE | SIGNED_TYPE | 0x7,
+    STORE_SHORT      = INT_TYPE | SIGNED_TYPE | 0x8,
+    STORE_INT        = INT_TYPE | SIGNED_TYPE | 0x9,
+    STORE_LONG       = INT_TYPE | SIGNED_TYPE | 0xa,
+    LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb,
+
+    LOAD_U_BYTE      = INT_TYPE | UNSIGNED_TYPE | 0x1,
+    LOAD_U_SHORT     = INT_TYPE | UNSIGNED_TYPE | 0x2,
+    LOAD_U_INT       = INT_TYPE | UNSIGNED_TYPE | 0x3,
+
+    LOAD_FLOAT       = FLOAT_TYPE | SIGNED_TYPE | 0x1,
+    LOAD_DOUBLE      = FLOAT_TYPE | SIGNED_TYPE | 0x2,
+    LOAD_VECTORX     = FLOAT_TYPE | SIGNED_TYPE | 0x3,
+    LOAD_VECTORY     = FLOAT_TYPE | SIGNED_TYPE | 0x4,
+    STORE_FLOAT      = FLOAT_TYPE | SIGNED_TYPE | 0x5,
+    STORE_DOUBLE     = FLOAT_TYPE | SIGNED_TYPE | 0x6,
+    STORE_VECTORX    = FLOAT_TYPE | SIGNED_TYPE | 0x7,
+    STORE_VECTORY    = FLOAT_TYPE | SIGNED_TYPE | 0x8
+  } CMLoadStoreDataType;
+
+  void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) {
+    assert((type & INT_TYPE), "must be General reg type");
+    loadstore_t(reg, base, index, scale, disp, type);
+  }
+
+  void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) {
+    assert((type & FLOAT_TYPE), "must be Float reg type");
+    loadstore_t(reg, base, index, scale, disp, type);
+  }
+
+private:
+  template <typename T>
+  void loadstore_t(T reg, int base, int index, int scale, int disp, int type) {
+    if (index != 0) {
+        assert(((scale==0)&&(disp==0)), "only support base+index");
+        loadstore(reg, as_Register(base), as_Register(index), type);
+    } else {
+      loadstore(reg, as_Register(base), disp, type);
+    }
+  }
+  void loadstore(Register reg, Register base, int disp, int type);
+  void loadstore(Register reg, Register base, Register disp, int type);
+  void loadstore(FloatRegister reg, Register base, int disp, int type);
+  void loadstore(FloatRegister reg, Register base, Register disp, int type);
+};
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual {
+ private:
+  MacroAssembler* _masm;
+  Label _label;
+
+ public:
+   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
+   ~SkipIfEqual();
+};
+
+#ifdef ASSERT
+inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
+#endif
+
+struct tableswitch {
+  Register _reg;
+  int _insn_index; jint _first_key; jint _last_key;
+  Label _after;
+  Label _branches;
+};
+
+#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp
new file mode 100644
index 00000000000..0b265a4defb
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/macroAssembler_loongarch.inline.hpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP
+#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp
new file mode 100644
index 00000000000..b36216c5337
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/metaspaceShared_loongarch_64.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
+#include "memory/metaspaceShared.hpp"
+
+// Generate the self-patching vtable method:
+//
+// This method will be called (as any other Klass virtual method) with
+// the Klass itself as the first argument.  Example:
+//
+//      oop obj;
+//      int size = obj->klass()->klass_part()->oop_size(this);
+//
+// for which the virtual method call is Klass::oop_size();
+//
+// The dummy method is called with the Klass object as the first
+// operand, and an object as the second argument.
+//
+
+//=====================================================================
+
+// All of the dummy methods in the vtable are essentially identical,
+// differing only by an ordinal constant, and they bear no releationship
+// to the original method which the caller intended. Also, there needs
+// to be 'vtbl_list_size' instances of the vtable in order to
+// differentiate between the 'vtable_list_size' original Klass objects.
+
+#define __ masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+void MetaspaceShared::generate_vtable_methods(void** vtbl_list,
+                                                   void** vtable,
+                                                   char** md_top,
+                                                   char* md_end,
+                                                   char** mc_top,
+                                                   char* mc_end) {
+  intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*);
+  *(intptr_t *)(*md_top) = vtable_bytes;
+  *md_top += sizeof(intptr_t);
+  void** dummy_vtable = (void**)*md_top;
+  *vtable = dummy_vtable;
+  *md_top += vtable_bytes;
+
+  // Get ready to generate dummy methods.
+
+  CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+  Label common_code;
+  for (int i = 0; i < vtbl_list_size; ++i) {
+    for (int j = 0; j < num_virtuals; ++j) {
+      dummy_vtable[num_virtuals * i + j] = (void*)masm->pc();
+
+      // Load T5 with a value indicating vtable/offset pair.
+      // -- bits[ 7..0]  (8 bits) which virtual method in table?
+      // -- bits[12..8]  (5 bits) which virtual method table?
+      // -- must fit in 13-bit instruction immediate field.
+      __ li(T5, (i << 8) + j);
+      __ b(common_code);
+    }
+  }
+
+  __ bind(common_code);
+
+  __ srli_d(T4, T5, 8);    // isolate vtable identifier.
+  __ shl(T4, LogBytesPerWord);
+  __ li(AT, (long)vtbl_list);
+  __ ldx_d(T4, AT, T4);     // get correct vtable address.
+  __ st_d(T4, A0, 0);    // update vtable pointer.
+
+  __ andi(T5, T5, 0x00ff);  // isolate vtable method index
+  __ shl(T5, LogBytesPerWord);
+  __ ldx_d(T4, T4, T5);   // address of real method pointer.
+  __ jr(T4);      // get real method pointer.
+
+  __ flush();
+
+  *mc_top = (char*)__ pc();
+}
diff --git a/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp
new file mode 100644
index 00000000000..cb31ca5ad5b
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.cpp
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "prims/methodHandles.hpp"
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define STOP(error) block_comment(error); __ stop(error)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
+  if (VerifyMethodHandles)
+    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
+                 "MH argument is a Class");
+  __ ld_d(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+}
+
+#ifdef ASSERT
+static int check_nonzero(const char* xname, int x) {
+  assert(x != 0, err_msg("%s should be nonzero", xname));
+  return x;
+}
+#define NONZERO(x) check_nonzero(#x, x)
+#else //ASSERT
+#define NONZERO(x) (x)
+#endif //ASSERT
+
+#ifdef ASSERT
+void MethodHandles::verify_klass(MacroAssembler* _masm,
+                                 Register obj, SystemDictionary::WKID klass_id,
+                                 const char* error_message) {
+}
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
+  Label L;
+  BLOCK_COMMENT("verify_ref_kind {");
+  __ ld_w(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
+  __ srai_w(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
+  __ li(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
+  __ andr(temp, temp, AT);
+  __ li(AT, ref_kind);
+  __ beq(temp, AT, L);
+  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
+    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
+    if (ref_kind == JVM_REF_invokeVirtual ||
+        ref_kind == JVM_REF_invokeSpecial)
+      // could do this for all ref_kinds, but would explode assembly code size
+      trace_method_handle(_masm, buf);
+    __ STOP(buf);
+  }
+  BLOCK_COMMENT("} verify_ref_kind");
+  __ bind(L);
+}
+
+#endif //ASSERT
+
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+                                            bool for_compiler_entry) {
+  assert(method == Rmethod, "interpreter calling convention");
+
+  Label L_no_such_method;
+  __ beq(method, R0, L_no_such_method);
+
+  __ verify_method_ptr(method);
+
+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled.  Check here for
+    // interp_only_mode if these events CAN be enabled.
+    Register rthread = TREG;
+    // interp_only is an int, on little endian it is sufficient to test the byte only
+    // Is a cmpl faster?
+    __ ld_bu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
+    __ beq(AT, R0, run_compiled_code);
+    __ ld_d(T4, method, in_bytes(Method::interpreter_entry_offset()));
+    __ jr(T4);
+    __ BIND(run_compiled_code);
+  }
+
+  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
+                                                     Method::from_interpreted_offset();
+  __ ld_d(T4, method, in_bytes(entry_offset));
+  __ jr(T4);
+
+  __ bind(L_no_such_method);
+  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
+  __ jmp(wrong_method, relocInfo::runtime_call_type);
+}
+
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+                                        Register recv, Register method_temp,
+                                        Register temp2,
+                                        bool for_compiler_entry) {
+  BLOCK_COMMENT("jump_to_lambda_form {");
+  // This is the initial entry point of a lazy method handle.
+  // After type checking, it picks up the invoker from the LambdaForm.
+  assert_different_registers(recv, method_temp, temp2);
+  assert(recv != noreg, "required register");
+  assert(method_temp == Rmethod, "required register for loading method");
+
+  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
+
+  // Load the invoker, as MH -> MH.form -> LF.vmentry
+  __ verify_oop(recv);
+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
+  __ verify_oop(method_temp);
+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
+  __ verify_oop(method_temp);
+  // the following assumes that a Method* is normally compressed in the vmtarget field:
+  __ ld_d(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())));
+
+  if (VerifyMethodHandles && !for_compiler_entry) {
+    // make sure recv is already on stack
+    __ ld_d(temp2, Address(method_temp, Method::const_offset()));
+    __ load_sized_value(temp2,
+                        Address(temp2, ConstMethod::size_of_parameters_offset()),
+                        sizeof(u2), false);
+    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
+    Label L;
+    Address recv_addr = __ argument_address(temp2, -1);
+    __ ld_d(AT, recv_addr);
+    __ beq(recv, AT, L);
+
+    recv_addr = __ argument_address(temp2, -1);
+    __ ld_d(V0, recv_addr);
+    __ STOP("receiver not on stack");
+    __ BIND(L);
+  }
+
+  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
+  BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
+
+// Code generation
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+                                                                vmIntrinsics::ID iid) {
+  const bool not_for_compiler_entry = false;  // this is the interpreter entry
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  if (iid == vmIntrinsics::_invokeGeneric ||
+      iid == vmIntrinsics::_compiledLambdaForm) {
+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+    // They all allow an appendix argument.
+    __ stop("empty stubs make SG sick");
+    return NULL;
+  }
+
+  // Rmethod: Method*
+  // T4: argument locator (parameter slot count, added to sp)
+  // S7: used as temp to hold mh or receiver
+  Register t4_argp   = T4;   // argument list ptr, live on error paths
+  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
+  Register rm_method = Rmethod;   // eventual target of this invocation
+
+  // here's where control starts out:
+  __ align(CodeEntryAlignment);
+  address entry_point = __ pc();
+
+  if (VerifyMethodHandles) {
+    Label L;
+    BLOCK_COMMENT("verify_intrinsic_id {");
+    __ ld_bu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
+    guarantee(Assembler::is_simm(iid, 12), "Oops, iid is not simm16! Change the instructions.");
+    __ addi_d(AT, AT, -1 * (int) iid);
+    __ beq(AT, R0, L);
+    if (iid == vmIntrinsics::_linkToVirtual ||
+        iid == vmIntrinsics::_linkToSpecial) {
+      // could do this for all kinds, but would explode assembly code size
+      trace_method_handle(_masm, "bad Method*::intrinsic_id");
+    }
+    __ STOP("bad Method*::intrinsic_id");
+    __ bind(L);
+    BLOCK_COMMENT("} verify_intrinsic_id");
+  }
+
+  // First task:  Find out how big the argument list is.
+  Address t4_first_arg_addr;
+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+    __ ld_d(t4_argp, Address(rm_method, Method::const_offset()));
+    __ load_sized_value(t4_argp,
+                        Address(t4_argp, ConstMethod::size_of_parameters_offset()),
+                        sizeof(u2), false);
+    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
+    t4_first_arg_addr = __ argument_address(t4_argp, -1);
+  } else {
+    DEBUG_ONLY(t4_argp = noreg);
+  }
+
+  if (!is_signature_polymorphic_static(iid)) {
+    __ ld_d(s7_mh, t4_first_arg_addr);
+    DEBUG_ONLY(t4_argp = noreg);
+  }
+
+  // t4_first_arg_addr is live!
+
+  trace_method_handle_interpreter_entry(_masm, iid);
+
+  if (iid == vmIntrinsics::_invokeBasic) {
+    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
+
+  } else {
+    // Adjust argument list by popping the trailing MemberName argument.
+    Register r_recv = noreg;
+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
+      __ ld_d(r_recv = T2, t4_first_arg_addr);
+    }
+    DEBUG_ONLY(t4_argp = noreg);
+    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
+    __ pop(rm_member);         // extract last argument
+    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
+  }
+
+  return entry_point;
+}
+
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+                                                    vmIntrinsics::ID iid,
+                                                    Register receiver_reg,
+                                                    Register member_reg,
+                                                    bool for_compiler_entry) {
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  Register rm_method = Rmethod;   // eventual target of this invocation
+  // temps used in this code are not used in *either* compiled or interpreted calling sequences
+  Register j_rarg0 = T0;
+  Register j_rarg1 = A0;
+  Register j_rarg2 = A1;
+  Register j_rarg3 = A2;
+  Register j_rarg4 = A3;
+  Register j_rarg5 = A4;
+
+  Register temp1 = T8;
+  Register temp2 = T4;
+  Register temp3 = T5;
+  if (for_compiler_entry) {
+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
+    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
+    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
+    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
+  }
+  else {
+    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
+  }
+  assert_different_registers(temp1, temp2, temp3, receiver_reg);
+  assert_different_registers(temp1, temp2, temp3, member_reg);
+
+  if (iid == vmIntrinsics::_invokeBasic) {
+    // indirect through MH.form.vmentry.vmtarget
+    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
+
+  } else {
+    // The method is a member invoker used by direct method handles.
+    if (VerifyMethodHandles) {
+      // make sure the trailing argument really is a MemberName (caller responsibility)
+      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
+                   "MemberName required for invokeVirtual etc.");
+    }
+
+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
+
+    Register temp1_recv_klass = temp1;
+    if (iid != vmIntrinsics::_linkToStatic) {
+      __ verify_oop(receiver_reg);
+      if (iid == vmIntrinsics::_linkToSpecial) {
+        // Don't actually load the klass; just null-check the receiver.
+        __ null_check(receiver_reg);
+      } else {
+        // load receiver klass itself
+        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
+        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ verify_klass_ptr(temp1_recv_klass);
+      }
+      BLOCK_COMMENT("check_receiver {");
+      // The receiver for the MemberName must be in receiver_reg.
+      // Check the receiver against the MemberName.clazz
+      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+        // Did not load it above...
+        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ verify_klass_ptr(temp1_recv_klass);
+      }
+      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+        Label L_ok;
+        Register temp2_defc = temp2;
+        __ load_heap_oop(temp2_defc, member_clazz);
+        load_klass_from_Class(_masm, temp2_defc);
+        __ verify_klass_ptr(temp2_defc);
+        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
+        // If we get here, the type check failed!
+        __ STOP("receiver class disagrees with MemberName.clazz");
+        __ bind(L_ok);
+      }
+      BLOCK_COMMENT("} check_receiver");
+    }
+    if (iid == vmIntrinsics::_linkToSpecial ||
+        iid == vmIntrinsics::_linkToStatic) {
+      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
+    }
+
+    // Live registers at this point:
+    //  member_reg - MemberName that was the trailing argument
+    //  temp1_recv_klass - klass of stacked receiver, if needed
+
+    Label L_incompatible_class_change_error;
+    switch (iid) {
+    case vmIntrinsics::_linkToSpecial:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+      }
+      __ ld_d(rm_method, member_vmtarget);
+      break;
+
+    case vmIntrinsics::_linkToStatic:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+      }
+      __ ld_d(rm_method, member_vmtarget);
+      break;
+
+    case vmIntrinsics::_linkToVirtual:
+    {
+      // same as TemplateTable::invokevirtual,
+      // minus the CP setup and profiling:
+
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+      }
+
+      // pick out the vtable index from the MemberName, and then we can discard it:
+      Register temp2_index = temp2;
+      __ ld_d(temp2_index, member_vmindex);
+
+      if (VerifyMethodHandles) {
+        Label L_index_ok;
+        __ blt(R0, temp2_index, L_index_ok);
+        __ STOP("no virtual index");
+        __ BIND(L_index_ok);
+      }
+
+      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
+
+      // get target Method* & entry point
+      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
+      break;
+    }
+
+    case vmIntrinsics::_linkToInterface:
+    {
+      // same as TemplateTable::invokeinterface
+      // (minus the CP setup and profiling, with different argument motion)
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+      }
+
+      Register temp3_intf = temp3;
+      __ load_heap_oop(temp3_intf, member_clazz);
+      load_klass_from_Class(_masm, temp3_intf);
+      __ verify_klass_ptr(temp3_intf);
+
+      Register rm_index = rm_method;
+      __ ld_d(rm_index, member_vmindex);
+      if (VerifyMethodHandles) {
+        Label L;
+        __ bge(rm_index, R0, L);
+        __ STOP("invalid vtable index for MH.invokeInterface");
+        __ bind(L);
+      }
+
+      // given intf, index, and recv klass, dispatch to the implementation method
+      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+                                 // note: next two args must be the same:
+                                 rm_index, rm_method,
+                                 temp2,
+                                 L_incompatible_class_change_error);
+      break;
+    }
+
+    default:
+      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      break;
+    }
+
+    // Live at this point:
+    //   rm_method
+
+    // After figuring out which concrete method to call, jump into it.
+    // Note that this works in the interpreter with no data motion.
+    // But the compiled version will require that r_recv be shifted out.
+    __ verify_method_ptr(rm_method);
+    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
+
+    if (iid == vmIntrinsics::_linkToInterface) {
+      __ bind(L_incompatible_class_change_error);
+      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
+      __ jmp(icce_entry, relocInfo::runtime_call_type);
+    }
+  }
+}
+
+#ifndef PRODUCT
+void trace_method_handle_stub(const char* adaptername,
+                              oop mh,
+                              intptr_t* saved_regs,
+                              intptr_t* entry_sp) {
+  // called as a leaf from native code: do not block the JVM!
+  bool has_mh = (strstr(adaptername, "/static") == NULL &&
+                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
+  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
+  tty->print_cr("MH %s %s="PTR_FORMAT" sp="PTR_FORMAT,
+                adaptername, mh_reg_name,
+                p2i(mh), p2i(entry_sp));
+
+  if (Verbose) {
+    tty->print_cr("Registers:");
+    const int saved_regs_count = RegisterImpl::number_of_registers;
+    for (int i = 0; i < saved_regs_count; i++) {
+      Register r = as_Register(i);
+      // The registers are stored in reverse order on the stack (by pusha).
+      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
+      if ((i + 1) % 4 == 0) {
+        tty->cr();
+      } else {
+        tty->print(", ");
+      }
+    }
+    tty->cr();
+
+    {
+     // dumping last frame with frame::describe
+
+      JavaThread* p = JavaThread::active();
+
+      ResourceMark rm;
+      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
+      FrameValues values;
+
+      // Note: We want to allow trace_method_handle from any call site.
+      // While trace_method_handle creates a frame, it may be entered
+      // without a PC on the stack top (e.g. not just after a call).
+      // Walking that frame could lead to failures due to that invalid PC.
+      // => carefully detect that frame when doing the stack walking
+
+      // Current C frame
+      frame cur_frame = os::current_frame();
+
+      // Robust search of trace_calling_frame (independant of inlining).
+      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
+      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
+      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
+      while (trace_calling_frame.fp() < saved_regs) {
+        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
+      }
+
+      // safely create a frame and call frame::describe
+      intptr_t *dump_sp = trace_calling_frame.sender_sp();
+      intptr_t *dump_fp = trace_calling_frame.link();
+
+      bool walkable = has_mh; // whether the traced frame shoud be walkable
+
+      if (walkable) {
+        // The previous definition of walkable may have to be refined
+        // if new call sites cause the next frame constructor to start
+        // failing. Alternatively, frame constructors could be
+        // modified to support the current or future non walkable
+        // frames (but this is more intrusive and is not considered as
+        // part of this RFE, which will instead use a simpler output).
+        frame dump_frame = frame(dump_sp, dump_fp);
+        dump_frame.describe(values, 1);
+      } else {
+        // Stack may not be walkable (invalid PC above FP):
+        // Add descriptions without building a Java frame to avoid issues
+        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+        values.describe(-1, dump_sp, "sp for #1");
+      }
+      values.describe(-1, entry_sp, "raw top of stack");
+
+      tty->print_cr("Stack layout:");
+      values.print(p);
+    }
+    if (has_mh && mh->is_oop()) {
+      mh->print();
+      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
+        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
+          java_lang_invoke_MethodHandle::form(mh)->print();
+      }
+    }
+  }
+}
+
+// The stub wraps the arguments in a struct on the stack to avoid
+// dealing with the different calling conventions for passing 6
+// arguments.
+struct MethodHandleStubArguments {
+  const char* adaptername;
+  oopDesc* mh;
+  intptr_t* saved_regs;
+  intptr_t* entry_sp;
+};
+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
+  trace_method_handle_stub(args->adaptername,
+                           args->mh,
+                           args->saved_regs,
+                           args->entry_sp);
+}
+
+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
+}
+#endif //PRODUCT
diff --git a/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp
new file mode 100644
index 00000000000..f84337424b9
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/methodHandles_loongarch.hpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Platform-specific definitions for method handles.
+// These definitions are inlined into class MethodHandles.
+
+// Adapters
+enum /* platform_dependent_constants */ {
+  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
+};
+
+// Additional helper methods for MethodHandles code generation:
+public:
+  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
+
+  static void verify_klass(MacroAssembler* _masm,
+                           Register obj, SystemDictionary::WKID klass_id,
+                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
+
+  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
+    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
+                 "reference is a MH");
+  }
+
+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
+  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
+  // Takes care of special dispatch from single stepping too.
+  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+                                      bool for_compiler_entry);
+
+  static void jump_to_lambda_form(MacroAssembler* _masm,
+                                  Register recv, Register method_temp,
+                                  Register temp2,
+                                  bool for_compiler_entry);
+
+  static Register saved_last_sp_register() {
+    // Should be in sharedRuntime, not here.
+   return R3;
+  }
diff --git a/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp
new file mode 100644
index 00000000000..639ac6cd3e8
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.cpp
@@ -0,0 +1,485 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "compiler/disassembler.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_loongarch.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/ostream.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+
+#ifndef PRODUCT
+#include "compiler/disassembler.hpp"
+#endif
+
+#include <sys/mman.h>
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+void NativeInstruction::wrote(int offset) {
+  ICache::invalidate_word(addr_at(offset));
+}
+
+void NativeInstruction::set_long_at(int offset, long i) {
+  address addr = addr_at(offset);
+  *(long*)addr = i;
+  ICache::invalidate_range(addr, 8);
+}
+
+bool NativeInstruction::is_int_branch() {
+  int op = Assembler::high(insn_word(), 6);
+  return op == Assembler::beqz_op || op == Assembler::bnez_op ||
+         op == Assembler::beq_op  || op == Assembler::bne_op  ||
+         op == Assembler::blt_op  || op == Assembler::bge_op  ||
+         op == Assembler::bltu_op || op == Assembler::bgeu_op;
+}
+
+bool NativeInstruction::is_float_branch() {
+  return Assembler::high(insn_word(), 6) == Assembler::bccondz_op;
+}
+
+bool NativeCall::is_bl() const {
+  return Assembler::high(int_at(0), 6) == Assembler::bl_op;
+}
+
+void NativeCall::verify() {
+  assert(is_bl(), "not a NativeCall");
+}
+
+address NativeCall::target_addr_for_bl(address orig_addr) const {
+  address addr = orig_addr ? orig_addr : addr_at(0);
+
+  // bl
+  if (is_bl()) {
+    return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) |
+                              ((int_at(0) >> 10) & 0xffff)) << 2);
+  }
+
+  fatal("not a NativeCall");
+  return NULL;
+}
+
+address NativeCall::destination() const {
+  address addr = (address)this;
+  address destination = target_addr_for_bl();
+  // Do we use a trampoline stub for this call?
+  // Trampoline stubs are located behind the main code.
+  if (destination > addr) {
+    // Filter out recursive method invocation (call to verified/unverified entry point).
+    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
+    assert(cb && cb->is_nmethod(), "sanity");
+    nmethod *nm = (nmethod *)cb;
+    NativeInstruction* ni = nativeInstruction_at(destination);
+    if (nm->stub_contains(destination) && ni->is_NativeCallTrampolineStub_at()) {
+      // Yes we do, so get the destination from the trampoline stub.
+      const address trampoline_stub_addr = destination;
+      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
+    }
+  }
+  return destination;
+}
+
+// Similar to replace_mt_safe, but just changes the destination. The
+// important thing is that free-running threads are able to execute this
+// call instruction at all times.
+//
+// Used in the runtime linkage of calls; see class CompiledIC.
+//
+// Add parameter assert_lock to switch off assertion
+// during code generation, where no patching lock is needed.
+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
+  assert(!assert_lock ||
+         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
+         "concurrent code patching");
+
+  ResourceMark rm;
+  address addr_call = addr_at(0);
+  bool reachable = MacroAssembler::reachable_from_branch_short(dest - addr_call);
+  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
+
+  // Patch the call.
+  if (!reachable) {
+    address trampoline_stub_addr = get_trampoline();
+    assert (trampoline_stub_addr != NULL, "we need a trampoline");
+    guarantee(Assembler::is_simm((trampoline_stub_addr - addr_call) >> 2, 26), "cannot reach trampoline stub");
+
+    // Patch the constant in the call's trampoline stub.
+    NativeInstruction* ni = nativeInstruction_at(dest);
+    assert (! ni->is_NativeCallTrampolineStub_at(), "chained trampolines");
+    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
+    dest = trampoline_stub_addr;
+  }
+  set_destination(dest);
+}
+
+address NativeCall::get_trampoline() {
+  address call_addr = addr_at(0);
+
+  CodeBlob *code = CodeCache::find_blob(call_addr);
+  assert(code != NULL, "Could not find the containing code blob");
+
+  address bl_destination
+    = nativeCall_at(call_addr)->target_addr_for_bl();
+  NativeInstruction* ni = nativeInstruction_at(bl_destination);
+  if (code->contains(bl_destination) &&
+      ni->is_NativeCallTrampolineStub_at())
+    return bl_destination;
+
+  // If the codeBlob is not a nmethod, this is because we get here from the
+  // CodeBlob constructor, which is called within the nmethod constructor.
+  return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
+}
+
+void NativeCall::set_destination(address dest) {
+  address addr_call = addr_at(0);
+  CodeBuffer cb(addr_call, instruction_size);
+  MacroAssembler masm(&cb);
+  assert(is_call_at(addr_call), "unexpected call type");
+  jlong offs = dest - addr_call;
+  masm.bl(offs >> 2);
+  ICache::invalidate_range(addr_call, instruction_size);
+}
+
+void NativeCall::print() {
+  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
+                p2i(instruction_address()), p2i(destination()));
+}
+
+// Inserts a native call instruction at a given pc
+void NativeCall::insert(address code_pos, address entry) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+}
+
+// MT-safe patching of a call instruction.
+// First patches first word of instruction to two jmp's that jmps to them
+// selfs (spinlock). Then patches the last byte, and then atomicly replaces
+// the jmp's with the first 4 byte of the new instruction.
+void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
+  Unimplemented();
+}
+
+bool NativeFarCall::is_short() const {
+  return Assembler::high(int_at(0), 10) == Assembler::andi_op &&
+         Assembler::low(int_at(0), 22) == 0 &&
+         Assembler::high(int_at(4), 6) == Assembler::bl_op;
+}
+
+bool NativeFarCall::is_far() const {
+  return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op &&
+         Assembler::high(int_at(4), 6) == Assembler::jirl_op      &&
+         Assembler::low(int_at(4), 5)  == RA->encoding();
+}
+
+address NativeFarCall::destination(address orig_addr) const {
+  address addr = orig_addr ? orig_addr : addr_at(0);
+
+  if (is_short()) {
+  // short
+    return addr + BytesPerInstWord +
+           (Assembler::simm26(((int_at(4) & 0x3ff) << 16) |
+                              ((int_at(4) >> 10) & 0xffff)) << 2);
+  }
+
+  if (is_far()) {
+  // far
+    return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) +
+           (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2);
+  }
+
+  fatal("not a NativeFarCall");
+  return NULL;
+}
+
+void NativeFarCall::set_destination(address dest) {
+  address addr_call = addr_at(0);
+  CodeBuffer cb(addr_call, instruction_size);
+  MacroAssembler masm(&cb);
+  assert(is_far_call_at(addr_call), "unexpected call type");
+  masm.patchable_call(dest, addr_call);
+  ICache::invalidate_range(addr_call, instruction_size);
+}
+
+void NativeFarCall::verify() {
+  assert(is_short() || is_far(), "not a NativeFarcall");
+}
+
+//-------------------------------------------------------------------
+
+bool NativeMovConstReg::is_lu12iw_ori_lu32id() const {
+  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
+         Assembler::high(int_at(4), 10)  == Assembler::ori_op     &&
+         Assembler::high(int_at(8), 7)   == Assembler::lu32i_d_op;
+}
+
+bool NativeMovConstReg::is_lu12iw_lu32id_nop() const {
+  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
+         Assembler::high(int_at(4), 7)   == Assembler::lu32i_d_op &&
+         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
+}
+
+bool NativeMovConstReg::is_lu12iw_2nop() const {
+  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
+         Assembler::high(int_at(4), 10)  == Assembler::andi_op    &&
+         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
+}
+
+bool NativeMovConstReg::is_lu12iw_ori_nop() const {
+  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
+         Assembler::high(int_at(4), 10)  == Assembler::ori_op     &&
+         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
+}
+
+bool NativeMovConstReg::is_addid_2nop() const {
+  return Assembler::high(int_at(0), 10)  == Assembler::addi_d_op &&
+         Assembler::high(int_at(4), 10)  == Assembler::andi_op   &&
+         Assembler::high(int_at(8), 10)  == Assembler::andi_op;
+}
+
+void NativeMovConstReg::verify() {
+  assert(is_li52(), "not a mov reg, imm52");
+}
+
+void NativeMovConstReg::print() {
+  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
+                p2i(instruction_address()), data());
+}
+
+intptr_t NativeMovConstReg::data() const {
+  if (is_lu12iw_ori_lu32id()) {
+    return Assembler::merge((intptr_t)((int_at(4)  >> 10) & 0xfff),
+                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff),
+                            (intptr_t)((int_at(8)  >> 5)  & 0xfffff));
+  }
+
+  if (is_lu12iw_lu32id_nop()) {
+    return Assembler::merge((intptr_t)0,
+                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff),
+                            (intptr_t)((int_at(4)  >> 5)  & 0xfffff));
+  }
+
+  if (is_lu12iw_2nop()) {
+    return Assembler::merge((intptr_t)0,
+                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff));
+  }
+
+  if (is_lu12iw_ori_nop()) {
+    return Assembler::merge((intptr_t)((int_at(4)  >> 10) & 0xfff),
+                            (intptr_t)((int_at(0)  >> 5)  & 0xfffff));
+  }
+
+  if (is_addid_2nop()) {
+    return Assembler::simm12((int_at(0) >> 10) & 0xfff);
+  }
+
+#ifndef PRODUCT
+  Disassembler::decode(addr_at(0), addr_at(0) + 16, tty);
+#endif
+  fatal("not a mov reg, imm52");
+  return 0; // unreachable
+}
+
+void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
+  CodeBuffer cb(addr_at(0), instruction_size);
+  MacroAssembler masm(&cb);
+  masm.patchable_li52(as_Register(int_at(0) & 0x1f), x);
+  ICache::invalidate_range(addr_at(0), instruction_size);
+
+  // Find and replace the oop/metadata corresponding to this
+  // instruction in oops section.
+  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
+  nmethod* nm = blob->as_nmethod_or_null();
+  if (nm != NULL) {
+    o = o ? o : x;
+    RelocIterator iter(nm, instruction_address(), next_instruction_address());
+    while (iter.next()) {
+      if (iter.type() == relocInfo::oop_type) {
+        oop* oop_addr = iter.oop_reloc()->oop_addr();
+        *oop_addr = cast_to_oop(o);
+        break;
+      } else if (iter.type() == relocInfo::metadata_type) {
+        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
+        *metadata_addr = (Metadata*)o;
+        break;
+      }
+    }
+  }
+}
+
+//-------------------------------------------------------------------
+
+int NativeMovRegMem::offset() const{
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+  return 0; // mute compiler
+}
+
+void NativeMovRegMem::set_offset(int x) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+}
+
+void NativeMovRegMem::verify() {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+}
+
+
+void NativeMovRegMem::print() {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+}
+
+bool NativeInstruction::is_sigill_zombie_not_entrant() {
+  return uint_at(0) == NativeIllegalInstruction::instruction_code;
+}
+
+void NativeIllegalInstruction::insert(address code_pos) {
+  *(juint*)code_pos = instruction_code;
+  ICache::invalidate_range(code_pos, instruction_size);
+}
+
+void NativeJump::verify() {
+  assert(is_short() || is_far(), "not a general jump instruction");
+}
+
+bool NativeJump::is_short() {
+  return Assembler::high(insn_word(), 6) == Assembler::b_op;
+}
+
+bool NativeJump::is_far() {
+  return Assembler::high(int_at(0), 7) == Assembler::pcaddu18i_op &&
+         Assembler::high(int_at(4), 6) == Assembler::jirl_op      &&
+         Assembler::low(int_at(4), 5)  == R0->encoding();
+}
+
+address NativeJump::jump_destination(address orig_addr) {
+  address addr = orig_addr ? orig_addr : addr_at(0);
+
+  // short
+  if (is_short()) {
+    return addr + (Assembler::simm26(((int_at(0) & 0x3ff) << 16) |
+                                     ((int_at(0) >> 10) & 0xffff)) << 2);
+  }
+
+  // far
+  if (is_far()) {
+    return addr + ((intptr_t)Assembler::simm20(int_at(0) >> 5 & 0xfffff) << 18) +
+           (Assembler::simm16(int_at(4) >> 10 & 0xffff) << 2);
+  }
+
+  fatal("not a jump");
+  return NULL;
+}
+
+void NativeJump::set_jump_destination(address dest) {
+  OrderAccess::fence();
+
+  CodeBuffer cb(addr_at(0), instruction_size);
+  MacroAssembler masm(&cb);
+  masm.patchable_jump(dest);
+  ICache::invalidate_range(addr_at(0), instruction_size);
+}
+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+}
+
+// MT-safe patching of a long jump instruction.
+// First patches first word of instruction to two jmp's that jmps to them
+// selfs (spinlock). Then patches the last byte, and then atomicly replaces
+// the jmp's with the first 4 byte of the new instruction.
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+  //TODO: LA
+  guarantee(0, "LA not implemented yet");
+}
+
+// Must ensure atomicity
+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
+  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
+  jlong offs = dest - verified_entry;
+
+  if (MacroAssembler::reachable_from_branch_short(offs)) {
+    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
+    MacroAssembler masm(&cb);
+    masm.b(dest);
+  } else {
+    // We use an illegal instruction for marking a method as
+    // not_entrant or zombie
+    NativeIllegalInstruction::insert(verified_entry);
+  }
+  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
+}
+
+bool NativeInstruction::is_dtrace_trap() {
+  //return (*(int32_t*)this & 0xff) == 0xcc;
+  Unimplemented();
+  return false;
+}
+
+bool NativeInstruction::is_safepoint_poll() {
+  //
+  // 390     li   T2, 0x0000000000400000 #@loadConP
+  // 394     st_w    [SP + #12], V1    # spill 9
+  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
+  //
+  //  0x000000ffe5815130: lu12i_w  t2, 0x400
+  //  0x000000ffe5815134: st_w  v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
+  //                                           ;*goto
+  //                                           ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
+  //
+  //  0x000000ffe5815138: ld_w  at, 0x0(t2)    ;*goto       <---  PC
+  //                                           ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
+  //
+
+  // Since there may be some spill instructions between the safePoint_poll and loadConP,
+  // we check the safepoint instruction like this.
+  return Assembler::high(insn_word(), 10) == Assembler::ld_w_op &&
+         Assembler::low(insn_word(), 5)   == AT->encoding();
+}
diff --git a/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp
new file mode 100644
index 00000000000..493239923b5
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/nativeInst_loongarch.hpp
@@ -0,0 +1,513 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP
+
+#include "asm/assembler.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/os.hpp"
+#include "utilities/top.hpp"
+
+// We have interfaces for the following instructions:
+// - NativeInstruction
+// - - NativeCall
+// - - NativeMovConstReg
+// - - NativeMovConstRegPatching
+// - - NativeMovRegMem
+// - - NativeMovRegMemPatching
+// - - NativeIllegalOpCode
+// - - NativeGeneralJump
+// - - NativePushConst
+// - - NativeTstRegMem
+
+// The base class for different kinds of native instruction abstractions.
+// Provides the primitive operations to manipulate code relative to this.
+
+class NativeInstruction VALUE_OBJ_CLASS_SPEC {
+  friend class Relocation;
+
+ public:
+  enum loongarch_specific_constants {
+    nop_instruction_code        =    0,
+    nop_instruction_size        =    4,
+    sync_instruction_code       =    0xf
+  };
+
+  bool is_nop()                        { guarantee(0, "LA not implemented yet"); return long_at(0) == nop_instruction_code; }
+  bool is_sync()                       { return Assembler::high(insn_word(), 17) == Assembler::dbar_op; }
+  bool is_dtrace_trap();
+  inline bool is_call();
+  inline bool is_far_call();
+  inline bool is_illegal();
+  bool is_jump();
+  bool is_safepoint_poll();
+
+  // LoongArch has no instruction to generate a illegal instrucion exception?
+  // But `break  11` is not illegal instruction for LoongArch.
+  static int illegal_instruction();
+
+  bool is_int_branch();
+  bool is_float_branch();
+
+  inline bool is_NativeCallTrampolineStub_at();
+  //We use an illegal instruction for marking a method as not_entrant or zombie.
+  bool is_sigill_zombie_not_entrant();
+
+ protected:
+  address addr_at(int offset) const    { return address(this) + offset; }
+  address instruction_address() const       { return addr_at(0); }
+  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
+  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
+
+  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
+  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
+
+  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
+  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
+
+  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
+
+  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
+  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
+
+
+  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
+  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
+  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
+  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
+  void set_long_at(int offset, long  i);
+
+  int  insn_word() const { return long_at(0); }
+
+  void wrote(int offset);
+
+ public:
+
+  // unit test stuff
+  static void test() {}                 // override for testing
+
+  inline friend NativeInstruction* nativeInstruction_at(address address);
+};
+
+inline NativeInstruction* nativeInstruction_at(address address) {
+  NativeInstruction* inst = (NativeInstruction*)address;
+#ifdef ASSERT
+  //inst->verify();
+#endif
+  return inst;
+}
+
+inline NativeCall* nativeCall_at(address address);
+
+// The NativeCall is an abstraction for accessing/manipulating native call
+// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
+class NativeCall: public NativeInstruction {
+ public:
+  enum loongarch_specific_constants {
+    instruction_offset    = 0,
+    instruction_size      = 1 * BytesPerInstWord,
+    return_address_offset = 1 * BytesPerInstWord,
+    displacement_offset   = 0
+  };
+
+  // We have only bl.
+  bool is_bl() const;
+
+  address instruction_address() const { return addr_at(instruction_offset); }
+
+  address next_instruction_address() const {
+    return addr_at(return_address_offset);
+  }
+
+  address return_address() const {
+    return next_instruction_address();
+  }
+
+  address target_addr_for_bl(address orig_addr = 0) const;
+  address destination() const;
+  void set_destination(address dest);
+
+  void verify_alignment() {}
+  void verify();
+  void print();
+
+  // Creation
+  inline friend NativeCall* nativeCall_at(address address);
+  inline friend NativeCall* nativeCall_before(address return_address);
+
+  static bool is_call_at(address instr) {
+    return nativeInstruction_at(instr)->is_call();
+  }
+
+  static bool is_call_before(address return_address) {
+    return is_call_at(return_address - return_address_offset);
+  }
+
+  // MT-safe patching of a call instruction.
+  static void insert(address code_pos, address entry);
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+
+  // Similar to replace_mt_safe, but just changes the destination.  The
+  // important thing is that free-running threads are able to execute
+  // this call instruction at all times.  If the call is an immediate bl
+  // instruction we can simply rely on atomicity of 32-bit writes to
+  // make sure other threads will see no intermediate states.
+
+  // We cannot rely on locks here, since the free-running threads must run at
+  // full speed.
+  //
+  // Used in the runtime linkage of calls; see class CompiledIC.
+
+  // The parameter assert_lock disables the assertion during code generation.
+  void set_destination_mt_safe(address dest, bool assert_lock = true);
+
+  address get_trampoline();
+
+};
+
+inline NativeCall* nativeCall_at(address address) {
+  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
+#ifdef ASSERT
+  call->verify();
+#endif
+  return call;
+}
+
+inline NativeCall* nativeCall_before(address return_address) {
+  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
+#ifdef ASSERT
+  call->verify();
+#endif
+  return call;
+}
+
+// The NativeFarCall is an abstraction for accessing/manipulating native
+// call-anywhere instructions.
+// Used to call native methods which may be loaded anywhere in the address
+// space, possibly out of reach of a call instruction.
+class NativeFarCall: public NativeInstruction {
+ public:
+  enum loongarch_specific_constants {
+    instruction_size      = 2 * BytesPerInstWord,
+  };
+
+  // We use MacroAssembler::patchable_call() for implementing a
+  // call-anywhere instruction.
+  bool is_short() const;
+  bool is_far() const;
+
+  // Checks whether instr points at a NativeFarCall instruction.
+  static bool is_far_call_at(address address) {
+    return nativeInstruction_at(address)->is_far_call();
+  }
+
+  // Returns the NativeFarCall's destination.
+  address destination(address orig_addr = 0) const;
+
+  // Sets the NativeFarCall's destination, not necessarily mt-safe.
+  // Used when relocating code.
+  void set_destination(address dest);
+
+  void verify();
+};
+
+// Instantiates a NativeFarCall object starting at the given instruction
+// address and returns the NativeFarCall object.
+inline NativeFarCall* nativeFarCall_at(address address) {
+  NativeFarCall* call = (NativeFarCall*)address;
+#ifdef ASSERT
+  call->verify();
+#endif
+  return call;
+}
+
+// An interface for accessing/manipulating native set_oop imm, reg instructions
+// (used to manipulate inlined data references, etc.).
+class NativeMovConstReg: public NativeInstruction {
+ public:
+  enum loongarch_specific_constants {
+    instruction_offset    =    0,
+    instruction_size          =    3 * BytesPerInstWord,
+    next_instruction_offset   =    3 * BytesPerInstWord,
+  };
+
+  int     insn_word() const                 { return long_at(instruction_offset); }
+  address instruction_address() const       { return addr_at(0); }
+  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
+  intptr_t data() const;
+  void    set_data(intptr_t x, intptr_t o = 0);
+
+  bool is_li52() const {
+    return is_lu12iw_ori_lu32id() ||
+           is_lu12iw_lu32id_nop() ||
+           is_lu12iw_2nop() ||
+           is_lu12iw_ori_nop() ||
+           is_addid_2nop();
+  }
+  bool is_lu12iw_ori_lu32id() const;
+  bool is_lu12iw_lu32id_nop() const;
+  bool is_lu12iw_2nop() const;
+  bool is_lu12iw_ori_nop() const;
+  bool is_addid_2nop() const;
+  void  verify();
+  void  print();
+
+  // unit test stuff
+  static void test() {}
+
+  // Creation
+  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
+  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
+};
+
+inline NativeMovConstReg* nativeMovConstReg_at(address address) {
+  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+inline NativeMovConstReg* nativeMovConstReg_before(address address) {
+  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+class NativeMovConstRegPatching: public NativeMovConstReg {
+ private:
+    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
+    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
+    #ifdef ASSERT
+      test->verify();
+    #endif
+    return test;
+  }
+};
+
+class NativeMovRegMem: public NativeInstruction {
+ public:
+  enum loongarch_specific_constants {
+    instruction_offset = 0,
+    instruction_size = 4,
+    hiword_offset   = 4,
+    ldst_offset     = 12,
+    immediate_size  = 4,
+    ldst_size       = 16
+  };
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+
+  int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; }
+
+  int   offset() const;
+
+  void  set_offset(int x);
+
+  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
+
+  void verify();
+  void print ();
+
+  // unit test stuff
+  static void test() {}
+
+ private:
+  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
+};
+
+inline NativeMovRegMem* nativeMovRegMem_at (address address) {
+  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+class NativeMovRegMemPatching: public NativeMovRegMem {
+ private:
+  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
+    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
+    #ifdef ASSERT
+      test->verify();
+    #endif
+    return test;
+  }
+};
+
+
+// Handles all kinds of jump on Loongson.
+//   short:
+//     b offs26
+//     nop
+//
+//   far:
+//     pcaddu18i reg, si20
+//     jirl  r0, reg, si18
+//
+class NativeJump: public NativeInstruction {
+ public:
+  enum loongarch_specific_constants {
+    instruction_offset = 0,
+    instruction_size   = 2 * BytesPerInstWord
+  };
+
+  bool is_short();
+  bool is_far();
+
+  address instruction_address() const { return addr_at(instruction_offset); }
+  address jump_destination(address orig_addr = 0);
+  void  set_jump_destination(address dest);
+
+  // Creation
+  inline friend NativeJump* nativeJump_at(address address);
+
+  // Insertion of native jump instruction
+  static void insert(address code_pos, address entry) { Unimplemented(); }
+  // MT-safe insertion of native jump at verified method entry
+  static void check_verified_entry_alignment(address entry, address verified_entry){}
+  static void patch_verified_entry(address entry, address verified_entry, address dest);
+
+  void verify();
+};
+
+inline NativeJump* nativeJump_at(address address) {
+  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
+  debug_only(jump->verify();)
+  return jump;
+}
+
+class NativeGeneralJump: public NativeJump {
+ public:
+  // Creation
+  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
+
+  // Insertion of native general jump instruction
+  static void insert_unconditional(address code_pos, address entry);
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+};
+
+inline NativeGeneralJump* nativeGeneralJump_at(address address) {
+  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
+  debug_only(jump->verify();)
+  return jump;
+}
+
+class NativeIllegalInstruction: public NativeInstruction {
+public:
+  enum loongarch_specific_constants {
+    instruction_code        = 0xbadc0de0, // TODO: LA
+                                          // Temporary LoongArch reserved instruction
+    instruction_size        = 4,
+    instruction_offset      = 0,
+    next_instruction_offset = 4
+  };
+
+  // Insert illegal opcode as specific address
+  static void insert(address code_pos);
+};
+
+inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
+
+inline bool NativeInstruction::is_call() {
+  NativeCall *call = (NativeCall*)instruction_address();
+  return call->is_bl();
+}
+
+inline bool NativeInstruction::is_far_call() {
+  NativeFarCall *call = (NativeFarCall*)instruction_address();
+
+  // short
+  if (call->is_short()) {
+    return true;
+  }
+
+  // far
+  if (call->is_far()) {
+    return true;
+  }
+
+  return false;
+}
+
+inline bool NativeInstruction::is_jump()
+{
+  NativeGeneralJump *jump = (NativeGeneralJump*)instruction_address();
+
+  // short
+  if (jump->is_short()) {
+    return true;
+  }
+
+  // far
+  if (jump->is_far()) {
+    return true;
+  }
+
+  return false;
+}
+
+// Call trampoline stubs.
+class NativeCallTrampolineStub : public NativeInstruction {
+ public:
+
+  enum la_specific_constants {
+    instruction_size            =    6 * 4,
+    instruction_offset          =    0,
+    data_offset                 =    4 * 4,
+    next_instruction_offset     =    6 * 4
+  };
+
+  address destination() const {
+    return (address)ptr_at(data_offset);
+  }
+
+  void set_destination(address new_destination) {
+    set_ptr_at(data_offset, (intptr_t)new_destination);
+    OrderAccess::fence();
+  }
+};
+
+// Note: Other stubs must not begin with this pattern.
+inline bool NativeInstruction::is_NativeCallTrampolineStub_at() {
+  // pcaddi
+  // ld_d
+  // jirl
+  return Assembler::high(int_at(0), 7) == Assembler::pcaddi_op &&
+         Assembler::high(int_at(4), 10) == Assembler::ld_d_op &&
+         Assembler::high(int_at(8), 6) == Assembler::jirl_op      &&
+         Assembler::low(int_at(8), 5)  == R0->encoding();
+}
+
+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
+  NativeInstruction* ni = nativeInstruction_at(addr);
+  assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found");
+  return (NativeCallTrampolineStub*)addr;
+}
+#endif // CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp
new file mode 100644
index 00000000000..5ff7555d2f0
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/registerMap_loongarch.hpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP
+
+// machine-dependent implemention for register maps
+  friend class frame;
+
+ private:
+  // This is the hook for finding a register in an "well-known" location,
+  // such as a register block of a predetermined format.
+  // Since there is none, we just return NULL.
+  // See registerMap_sparc.hpp for an example of grabbing registers
+  // from register save areas of a standard layout.
+   address pd_location(VMReg reg) const {return NULL;}
+
+  // no PD state to clear or copy:
+  void pd_clear() {}
+  void pd_initialize() {}
+  void pd_initialize_from(const RegisterMap* map) {}
+
+#endif // CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp
new file mode 100644
index 00000000000..c6424c321f1
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/register_definitions_loongarch.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/register.hpp"
+#include "register_loongarch.hpp"
+#ifdef TARGET_ARCH_MODEL_loongarch_32
+# include "interp_masm_loongarch_32.hpp"
+#endif
+#ifdef TARGET_ARCH_MODEL_loongarch_64
+# include "interp_masm_loongarch_64.hpp"
+#endif
+
+REGISTER_DEFINITION(Register, noreg);
+REGISTER_DEFINITION(Register, r0);
+REGISTER_DEFINITION(Register, r1);
+REGISTER_DEFINITION(Register, r2);
+REGISTER_DEFINITION(Register, r3);
+REGISTER_DEFINITION(Register, r4);
+REGISTER_DEFINITION(Register, r5);
+REGISTER_DEFINITION(Register, r6);
+REGISTER_DEFINITION(Register, r7);
+REGISTER_DEFINITION(Register, r8);
+REGISTER_DEFINITION(Register, r9);
+REGISTER_DEFINITION(Register, r10);
+REGISTER_DEFINITION(Register, r11);
+REGISTER_DEFINITION(Register, r12);
+REGISTER_DEFINITION(Register, r13);
+REGISTER_DEFINITION(Register, r14);
+REGISTER_DEFINITION(Register, r15);
+REGISTER_DEFINITION(Register, r16);
+REGISTER_DEFINITION(Register, r17);
+REGISTER_DEFINITION(Register, r18);
+REGISTER_DEFINITION(Register, r19);
+REGISTER_DEFINITION(Register, r20);
+REGISTER_DEFINITION(Register, r21);
+REGISTER_DEFINITION(Register, r22);
+REGISTER_DEFINITION(Register, r23);
+REGISTER_DEFINITION(Register, r24);
+REGISTER_DEFINITION(Register, r25);
+REGISTER_DEFINITION(Register, r26);
+REGISTER_DEFINITION(Register, r27);
+REGISTER_DEFINITION(Register, r28);
+REGISTER_DEFINITION(Register, r29);
+REGISTER_DEFINITION(Register, r30);
+REGISTER_DEFINITION(Register, r31);
+
+REGISTER_DEFINITION(FloatRegister, fnoreg);
+REGISTER_DEFINITION(FloatRegister, f0);
+REGISTER_DEFINITION(FloatRegister, f1);
+REGISTER_DEFINITION(FloatRegister, f2);
+REGISTER_DEFINITION(FloatRegister, f3);
+REGISTER_DEFINITION(FloatRegister, f4);
+REGISTER_DEFINITION(FloatRegister, f5);
+REGISTER_DEFINITION(FloatRegister, f6);
+REGISTER_DEFINITION(FloatRegister, f7);
+REGISTER_DEFINITION(FloatRegister, f8);
+REGISTER_DEFINITION(FloatRegister, f9);
+REGISTER_DEFINITION(FloatRegister, f10);
+REGISTER_DEFINITION(FloatRegister, f11);
+REGISTER_DEFINITION(FloatRegister, f12);
+REGISTER_DEFINITION(FloatRegister, f13);
+REGISTER_DEFINITION(FloatRegister, f14);
+REGISTER_DEFINITION(FloatRegister, f15);
+REGISTER_DEFINITION(FloatRegister, f16);
+REGISTER_DEFINITION(FloatRegister, f17);
+REGISTER_DEFINITION(FloatRegister, f18);
+REGISTER_DEFINITION(FloatRegister, f19);
+REGISTER_DEFINITION(FloatRegister, f20);
+REGISTER_DEFINITION(FloatRegister, f21);
+REGISTER_DEFINITION(FloatRegister, f22);
+REGISTER_DEFINITION(FloatRegister, f23);
+REGISTER_DEFINITION(FloatRegister, f24);
+REGISTER_DEFINITION(FloatRegister, f25);
+REGISTER_DEFINITION(FloatRegister, f26);
+REGISTER_DEFINITION(FloatRegister, f27);
+REGISTER_DEFINITION(FloatRegister, f28);
+REGISTER_DEFINITION(FloatRegister, f29);
+REGISTER_DEFINITION(FloatRegister, f30);
+REGISTER_DEFINITION(FloatRegister, f31);
diff --git a/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp
new file mode 100644
index 00000000000..3104cd1cc5d
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/register_loongarch.cpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "register_loongarch.hpp"
+
+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
+const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
+                                                                 2 * FloatRegisterImpl::number_of_registers;
+
+
+const char* RegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "zero", "ra", "tp", "sp", "a0/v0", "a1/v1", "a2", "a3",
+    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
+    "t4", "t5", "t6", "t7", "t8", "x", "fp", "s0",
+    "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8"
+  };
+  return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* FloatRegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
+    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
+    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
+    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
+  };
+  return is_valid() ? names[encoding()] : "fnoreg";
+}
+
+const char* ConditionalFlagRegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "fcc0",  "fcc1",   "fcc2",  "fcc3",   "fcc4",  "fcc5",   "fcc6",  "fcc7",
+  };
+  return is_valid() ? names[encoding()] : "fccnoreg";
+}
diff --git a/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp
new file mode 100644
index 00000000000..37b39f9129f
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/register_loongarch.hpp
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP
+
+#include "asm/register.hpp"
+#include "vm_version_loongarch.hpp"
+
+class VMRegImpl;
+typedef VMRegImpl* VMReg;
+
+// Use Register as shortcut
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+
+// The implementation of integer registers for the LoongArch architecture
+inline Register as_Register(int encoding) {
+  return (Register)(intptr_t) encoding;
+}
+
+class RegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers     = 32,
+    max_slots_per_register  = 2
+  };
+
+  // derived registers, offsets, and addresses
+  Register successor() const                          { return as_Register(encoding() + 1); }
+
+  // construction
+  inline friend Register as_Register(int encoding);
+
+  VMReg as_VMReg();
+
+  // accessors
+  int   encoding() const                         { assert(is_valid(),err_msg( "invalid register (%d)", (int)(intptr_t)this)); return (intptr_t)this; }
+  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  const char* name() const;
+};
+
+
+// The integer registers of the LoongArch architecture
+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+
+
+CONSTANT_REGISTER_DECLARATION(Register, r0,    (0));
+CONSTANT_REGISTER_DECLARATION(Register, r1,    (1));
+CONSTANT_REGISTER_DECLARATION(Register, r2,    (2));
+CONSTANT_REGISTER_DECLARATION(Register, r3,    (3));
+CONSTANT_REGISTER_DECLARATION(Register, r4,    (4));
+CONSTANT_REGISTER_DECLARATION(Register, r5,    (5));
+CONSTANT_REGISTER_DECLARATION(Register, r6,    (6));
+CONSTANT_REGISTER_DECLARATION(Register, r7,    (7));
+CONSTANT_REGISTER_DECLARATION(Register, r8,    (8));
+CONSTANT_REGISTER_DECLARATION(Register, r9,    (9));
+CONSTANT_REGISTER_DECLARATION(Register, r10,   (10));
+CONSTANT_REGISTER_DECLARATION(Register, r11,   (11));
+CONSTANT_REGISTER_DECLARATION(Register, r12,   (12));
+CONSTANT_REGISTER_DECLARATION(Register, r13,   (13));
+CONSTANT_REGISTER_DECLARATION(Register, r14,   (14));
+CONSTANT_REGISTER_DECLARATION(Register, r15,   (15));
+CONSTANT_REGISTER_DECLARATION(Register, r16,   (16));
+CONSTANT_REGISTER_DECLARATION(Register, r17,   (17));
+CONSTANT_REGISTER_DECLARATION(Register, r18,   (18));
+CONSTANT_REGISTER_DECLARATION(Register, r19,   (19));
+CONSTANT_REGISTER_DECLARATION(Register, r20,   (20));
+CONSTANT_REGISTER_DECLARATION(Register, r21,   (21));
+CONSTANT_REGISTER_DECLARATION(Register, r22,   (22));
+CONSTANT_REGISTER_DECLARATION(Register, r23,   (23));
+CONSTANT_REGISTER_DECLARATION(Register, r24,   (24));
+CONSTANT_REGISTER_DECLARATION(Register, r25,   (25));
+CONSTANT_REGISTER_DECLARATION(Register, r26,   (26));
+CONSTANT_REGISTER_DECLARATION(Register, r27,   (27));
+CONSTANT_REGISTER_DECLARATION(Register, r28,   (28));
+CONSTANT_REGISTER_DECLARATION(Register, r29,   (29));
+CONSTANT_REGISTER_DECLARATION(Register, r30,   (30));
+CONSTANT_REGISTER_DECLARATION(Register, r31,   (31));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define NOREG ((Register)(noreg_RegisterEnumValue))
+
+#define R0  ((Register)(r0_RegisterEnumValue))
+#define R1  ((Register)(r1_RegisterEnumValue))
+#define R2  ((Register)(r2_RegisterEnumValue))
+#define R3  ((Register)(r3_RegisterEnumValue))
+#define R4  ((Register)(r4_RegisterEnumValue))
+#define R5  ((Register)(r5_RegisterEnumValue))
+#define R6  ((Register)(r6_RegisterEnumValue))
+#define R7  ((Register)(r7_RegisterEnumValue))
+#define R8  ((Register)(r8_RegisterEnumValue))
+#define R9  ((Register)(r9_RegisterEnumValue))
+#define R10 ((Register)(r10_RegisterEnumValue))
+#define R11 ((Register)(r11_RegisterEnumValue))
+#define R12 ((Register)(r12_RegisterEnumValue))
+#define R13 ((Register)(r13_RegisterEnumValue))
+#define R14 ((Register)(r14_RegisterEnumValue))
+#define R15 ((Register)(r15_RegisterEnumValue))
+#define R16 ((Register)(r16_RegisterEnumValue))
+#define R17 ((Register)(r17_RegisterEnumValue))
+#define R18 ((Register)(r18_RegisterEnumValue))
+#define R19 ((Register)(r19_RegisterEnumValue))
+#define R20 ((Register)(r20_RegisterEnumValue))
+#define R21 ((Register)(r21_RegisterEnumValue))
+#define R22 ((Register)(r22_RegisterEnumValue))
+#define R23 ((Register)(r23_RegisterEnumValue))
+#define R24 ((Register)(r24_RegisterEnumValue))
+#define R25 ((Register)(r25_RegisterEnumValue))
+#define R26 ((Register)(r26_RegisterEnumValue))
+#define R27 ((Register)(r27_RegisterEnumValue))
+#define R28 ((Register)(r28_RegisterEnumValue))
+#define R29 ((Register)(r29_RegisterEnumValue))
+#define R30 ((Register)(r30_RegisterEnumValue))
+#define R31 ((Register)(r31_RegisterEnumValue))
+
+
+#define RA           R1
+#define TP           R2
+#define SP           R3
+#define RA0          R4
+#define RA1          R5
+#define RA2          R6
+#define RA3          R7
+#define RA4          R8
+#define RA5          R9
+#define RA6          R10
+#define RA7          R11
+#define RT0          R12
+#define RT1          R13
+#define RT2          R14
+#define RT3          R15
+#define RT4          R16
+#define RT5          R17
+#define RT6          R18
+#define RT7          R19
+#define RT8          R20
+#define RX           R21
+#define FP           R22
+#define S0           R23
+#define S1           R24
+#define S2           R25
+#define S3           R26
+#define S4           R27
+#define S5           R28
+#define S6           R29
+#define S7           R30
+#define S8           R31
+
+#define c_rarg0       RT0
+#define c_rarg1       RT1
+#define Rmethod       S3
+#define Rsender       S4
+#define Rnext         S1
+
+#define V0       RA0
+#define V1       RA1
+
+#define SCR1     RT7
+#define SCR2     RT4
+
+//for interpreter frame
+// bytecode pointer register
+#define BCP            S0
+// local variable pointer register
+#define LVP            S7
+// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
+// be sure to save and restore its value in call_stub
+#define TSR            S2
+
+//OPT_SAFEPOINT not supported yet
+#define OPT_SAFEPOINT 1
+
+#define OPT_THREAD 1
+
+#define TREG           S6
+
+#define S5_heapbase    S5
+
+#define FSR            V0
+#define SSR            T6
+#define FSF            FV0
+
+#define RECEIVER       T0
+#define IC_Klass       T1
+
+#define SHIFT_count    T3
+
+// ---------- Scratch Register ----------
+#define AT             RT7
+#define fscratch       F23
+
+#endif // DONT_USE_REGISTER_DEFINES
+
+// Use FloatRegister as shortcut
+class FloatRegisterImpl;
+typedef FloatRegisterImpl* FloatRegister;
+
+inline FloatRegister as_FloatRegister(int encoding) {
+  return (FloatRegister)(intptr_t) encoding;
+}
+
+// The implementation of floating point registers for the LoongArch architecture
+class FloatRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers     = 32,
+    save_slots_per_register = 2,
+    slots_per_lsx_register  = 4,
+    slots_per_lasx_register = 8,
+    max_slots_per_register  = 8
+  };
+
+  // construction
+  inline friend FloatRegister as_FloatRegister(int encoding);
+
+  VMReg as_VMReg();
+
+  // derived registers, offsets, and addresses
+  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
+
+  // accessors
+  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  const char* name() const;
+
+};
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
+#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
+#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
+#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
+#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
+#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
+#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
+#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
+#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
+#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
+#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
+#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
+#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
+#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
+#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
+#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
+#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
+#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
+#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
+#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
+#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
+#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
+#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
+#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
+#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
+#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
+#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
+#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
+#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
+#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
+#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
+#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
+#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
+
+#define FA0    F0
+#define FA1    F1
+#define FA2    F2
+#define FA3    F3
+#define FA4    F4
+#define FA5    F5
+#define FA6    F6
+#define FA7    F7
+
+#define FV0    F0
+#define FV1    F1
+
+#define FT0    F8
+#define FT1    F9
+#define FT2    F10
+#define FT3    F11
+#define FT4    F12
+#define FT5    F13
+#define FT6    F14
+#define FT7    F15
+#define FT8    F16
+#define FT9    F17
+#define FT10   F18
+#define FT11   F19
+#define FT12   F20
+#define FT13   F21
+#define FT14   F22
+#define FT15   F23
+
+#define FS0    F24
+#define FS1    F25
+#define FS2    F26
+#define FS3    F27
+#define FS4    F28
+#define FS5    F29
+#define FS6    F30
+#define FS7    F31
+
+#endif // DONT_USE_REGISTER_DEFINES
+
+// Use ConditionalFlagRegister as shortcut
+class ConditionalFlagRegisterImpl;
+typedef ConditionalFlagRegisterImpl* ConditionalFlagRegister;
+
+inline ConditionalFlagRegister as_ConditionalFlagRegister(int encoding) {
+  return (ConditionalFlagRegister)(intptr_t) encoding;
+}
+
+// The implementation of floating point registers for the LoongArch architecture
+class ConditionalFlagRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+//    conditionalflag_arg_base      = 12,
+    number_of_registers = 8
+  };
+
+  // construction
+  inline friend ConditionalFlagRegister as_ConditionalFlagRegister(int encoding);
+
+  VMReg as_VMReg();
+
+  // derived registers, offsets, and addresses
+  ConditionalFlagRegister successor() const                          { return as_ConditionalFlagRegister(encoding() + 1); }
+
+  // accessors
+  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  const char* name() const;
+
+};
+
+CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fccnoreg , (-1));
+
+CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc0     , ( 0));
+CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc1     , ( 1));
+CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc2     , ( 2));
+CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc3     , ( 3));
+CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc4     , ( 4));
+CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc5     , ( 5));
+CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc6     , ( 6));
+CONSTANT_REGISTER_DECLARATION(ConditionalFlagRegister, fcc7     , ( 7));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define FCCNOREG ((ConditionalFlagRegister)(fccnoreg_ConditionalFlagRegisterEnumValue))
+#define FCC0     ((ConditionalFlagRegister)(    fcc0_ConditionalFlagRegisterEnumValue))
+#define FCC1     ((ConditionalFlagRegister)(    fcc1_ConditionalFlagRegisterEnumValue))
+#define FCC2     ((ConditionalFlagRegister)(    fcc2_ConditionalFlagRegisterEnumValue))
+#define FCC3     ((ConditionalFlagRegister)(    fcc3_ConditionalFlagRegisterEnumValue))
+#define FCC4     ((ConditionalFlagRegister)(    fcc4_ConditionalFlagRegisterEnumValue))
+#define FCC5     ((ConditionalFlagRegister)(    fcc5_ConditionalFlagRegisterEnumValue))
+#define FCC6     ((ConditionalFlagRegister)(    fcc6_ConditionalFlagRegisterEnumValue))
+#define FCC7     ((ConditionalFlagRegister)(    fcc7_ConditionalFlagRegisterEnumValue))
+
+#endif // DONT_USE_REGISTER_DEFINES
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+  enum {
+  // A big enough number for C2: all the registers plus flags
+  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
+  // There is no requirement that any ordering here matches any ordering c2 gives
+  // it's optoregs.
+    number_of_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
+                          FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers
+  };
+
+  static const int max_gpr;
+  static const int max_fpr;
+
+
+};
+
+#endif //CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp
new file mode 100644
index 00000000000..bf4498dc62c
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/relocInfo.hpp"
+#include "compiler/disassembler.hpp"
+#include "nativeInst_loongarch.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/safepoint.hpp"
+
+
+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
+  x += o;
+  typedef Assembler::WhichOperand WhichOperand;
+  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
+  assert(which == Assembler::disp32_operand ||
+         which == Assembler::narrow_oop_operand ||
+         which == Assembler::imm_operand, "format unpacks ok");
+  if (which == Assembler::imm_operand) {
+    if (verify_only) {
+      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
+    } else {
+      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
+    }
+  } else if (which == Assembler::narrow_oop_operand) {
+    // both compressed oops and compressed classes look the same
+    if (Universe::heap()->is_in_reserved((oop)x)) {
+      if (verify_only) {
+        assert(nativeMovConstReg_at(addr())->data() == (long)oopDesc::encode_heap_oop((oop)x), "instructions must match");
+      } else {
+        nativeMovConstReg_at(addr())->set_data((intptr_t)(oopDesc::encode_heap_oop((oop)x)), (intptr_t)(x));
+      }
+    } else {
+      if (verify_only) {
+        assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match");
+      } else {
+        nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x));
+      }
+    }
+  } else {
+    // Note:  Use runtime_call_type relocations for call32_operand.
+    assert(0, "call32_operand not supported in LoongArch64");
+  }
+}
+
+
+address Relocation::pd_call_destination(address orig_addr) {
+  NativeInstruction* ni = nativeInstruction_at(addr());
+  if (ni->is_far_call()) {
+    return nativeFarCall_at(addr())->destination(orig_addr);
+  } else if (ni->is_call()) {
+    address trampoline = nativeCall_at(addr())->get_trampoline();
+    if (trampoline) {
+      return nativeCallTrampolineStub_at(trampoline)->destination();
+    } else {
+      address new_addr = nativeCall_at(addr())->target_addr_for_bl(orig_addr);
+      // If call is branch to self, don't try to relocate it, just leave it
+      // as branch to self. This happens during code generation if the code
+      // buffer expands. It will be relocated to the trampoline above once
+      // code generation is complete.
+      return (new_addr == orig_addr) ? addr() : new_addr;
+    }
+  } else if (ni->is_jump()) {
+    return nativeGeneralJump_at(addr())->jump_destination(orig_addr);
+  } else {
+    tty->print_cr("\nError!\ncall destination: 0x%lx", p2i(addr()));
+    Disassembler::decode(addr() - 10 * BytesPerInstWord, addr() + 10 * BytesPerInstWord, tty);
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
+
+void Relocation::pd_set_call_destination(address x) {
+  NativeInstruction* ni = nativeInstruction_at(addr());
+  if (ni->is_far_call()) {
+    nativeFarCall_at(addr())->set_destination(x);
+  } else if (ni->is_call()) {
+    address trampoline = nativeCall_at(addr())->get_trampoline();
+    if (trampoline) {
+      nativeCall_at(addr())->set_destination_mt_safe(x, false);
+    } else {
+      nativeCall_at(addr())->set_destination(x);
+    }
+  } else if (ni->is_jump()) {
+    nativeGeneralJump_at(addr())->set_jump_destination(x);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+address* Relocation::pd_address_in_code() {
+  return (address*)addr();
+}
+
+address Relocation::pd_get_address_from_code() {
+  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
+  return (address)ni->data();
+}
+
+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+}
+
+void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+}
+
+void metadata_Relocation::pd_fix_value(address x) {
+}
diff --git a/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp
new file mode 100644
index 00000000000..211242f3fb1
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/relocInfo_loongarch.hpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP
+
+  // machine-dependent parts of class relocInfo
+ private:
+  enum {
+    // Since LoongArch instructions are whole words,
+    // the two low-order offset bits can always be discarded.
+    offset_unit        =  4,
+
+    // imm_oop_operand vs. narrow_oop_operand
+    format_width       =  2
+  };
+
+#endif // CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp
new file mode 100644
index 00000000000..e6ee65f3672
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/runtime_loongarch_64.cpp
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#ifdef COMPILER2
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/vmreg.hpp"
+#include "interpreter/interpreter.hpp"
+#include "opto/runtime.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "vmreg_loongarch.inline.hpp"
+#endif
+
+#define __ masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+//-------------- generate_exception_blob -----------
+// creates _exception_blob.
+// The exception blob is jumped to from a compiled method.
+// (see emit_exception_handler in sparc.ad file)
+//
+// Given an exception pc at a call we call into the runtime for the
+// handler in this method. This handler might merely restore state
+// (i.e. callee save registers) unwind the frame and jump to the
+// exception handler for the nmethod if there is no Java level handler
+// for the nmethod.
+//
+// This code is entered with a jump, and left with a jump.
+//
+// Arguments:
+//   V0: exception oop
+//   V1: exception pc
+//
+// Results:
+//   A0: exception oop
+//   A1: exception pc in caller or ???
+//   jumps to: exception handler of caller
+//
+// Note: the exception pc MUST be at a call (precise debug information)
+//
+//  [stubGenerator_loongarch_64.cpp] generate_forward_exception()
+//      |- V0, V1 are created
+//      |- T4 <= SharedRuntime::exception_handler_for_return_address
+//      `- jr T4
+//           `- the caller's exception_handler
+//                 `- jr OptoRuntime::exception_blob
+//                        `- here
+//
+void OptoRuntime::generate_exception_blob() {
+  // Capture info about frame layout
+  enum layout {
+    fp_off,
+    return_off,                 // slot for return address
+    framesize
+  };
+
+  // allocate space for the code
+  ResourceMark rm;
+  // setup code generation tools
+  CodeBuffer   buffer("exception_blob", 5120, 5120);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  address start = __ pc();
+
+  __ addi_d(SP, SP, -1 * framesize * wordSize);   // Prolog!
+
+  // this frame will be treated as the original caller method.
+  // So, the return pc should be filled with the original exception pc.
+  //   ref: X86's implementation
+  __ st_d(V1, SP, return_off * wordSize);  // return address
+  __ st_d(FP, SP, fp_off * wordSize);
+
+  // Save callee saved registers.  None for UseSSE=0,
+  // floats-only for UseSSE=1, and doubles for UseSSE=2.
+
+  __ addi_d(FP, SP, fp_off * wordSize);
+
+  // Store exception in Thread object. We cannot pass any arguments to the
+  // handle_exception call, since we do not want to make any assumption
+  // about the size of the frame where the exception happened in.
+  Register thread = TREG;
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+  __ st_d(V0, Address(thread, JavaThread::exception_oop_offset()));
+  __ st_d(V1, Address(thread, JavaThread::exception_pc_offset()));
+
+  // This call does all the hard work.  It checks if an exception handler
+  // exists in the method.
+  // If so, it returns the handler address.
+  // If not, it prepares for stack-unwinding, restoring the callee-save
+  // registers of the frame being removed.
+  Label L;
+  address the_pc = __ pc();
+  __ bind(L);
+  __ set_last_Java_frame(thread, NOREG, NOREG, L);
+
+  __ li(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
+
+  __ move(A0, thread);
+  // TODO: confirm reloc
+  __ call((address)OptoRuntime::handle_exception_C, relocInfo::runtime_call_type);
+
+  // Set an oopmap for the call site
+  OopMapSet *oop_maps = new OopMapSet();
+
+  oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0));
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ reset_last_Java_frame(thread, true);
+
+  // Pop self-frame.
+  __ leave();     // Epilog!
+
+  // V0: exception handler
+
+  // We have a handler in V0, (could be deopt blob)
+  __ move(T4, V0);
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  // Get the exception
+  __ ld_d(A0, Address(thread, JavaThread::exception_oop_offset()));
+  // Get the exception pc in case we are deoptimized
+  __ ld_d(A1, Address(thread, JavaThread::exception_pc_offset()));
+#ifdef ASSERT
+  __ st_d(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
+  __ st_d(R0, Address(thread, JavaThread::exception_pc_offset()));
+#endif
+  // Clear the exception oop so GC no longer processes it as a root.
+  __ st_d(R0, Address(thread, JavaThread::exception_oop_offset()));
+
+  // Fix seg fault when running:
+  //    Eclipse + Plugin + Debug As
+  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
+  //
+  __ move(V0, A0);
+  __ move(V1, A1);
+
+  // V0: exception oop
+  // T4: exception handler
+  // A1: exception pc
+  __ jr(T4);
+
+  // make sure all code is generated
+  masm->flush();
+  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
+}
diff --git a/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp
new file mode 100644
index 00000000000..36786b53bd4
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/sharedRuntime_loongarch_64.cpp
@@ -0,0 +1,3453 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/debugInfoRec.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "prims/jvmtiRedefineClassesTrace.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_loongarch.inline.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+#include <alloca.h>
+
+#define __ masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
+
+class RegisterSaver {
+  // Capture info about frame layout
+  enum layout {
+    fpr0_off = 0,
+    fpr1_off,
+    fpr2_off,
+    fpr3_off,
+    fpr4_off,
+    fpr5_off,
+    fpr6_off,
+    fpr7_off,
+    fpr8_off,
+    fpr9_off,
+    fpr10_off,
+    fpr11_off,
+    fpr12_off,
+    fpr13_off,
+    fpr14_off,
+    fpr15_off,
+    fpr16_off,
+    fpr17_off,
+    fpr18_off,
+    fpr19_off,
+    fpr20_off,
+    fpr21_off,
+    fpr22_off,
+    fpr23_off,
+    fpr24_off,
+    fpr25_off,
+    fpr26_off,
+    fpr27_off,
+    fpr28_off,
+    fpr29_off,
+    fpr30_off,
+    fpr31_off,
+    a0_off,
+    a1_off,
+    a2_off,
+    a3_off,
+    a4_off,
+    a5_off,
+    a6_off,
+    a7_off,
+    t0_off,
+    t1_off,
+    t2_off,
+    t3_off,
+    t4_off,
+    t5_off,
+    t6_off,
+    t7_off,
+    t8_off,
+    s0_off,
+    s1_off,
+    s2_off,
+    s3_off,
+    s4_off,
+    s5_off,
+    s6_off,
+    s7_off,
+    s8_off,
+    fp_off,
+    ra_off,
+    fpr_size = fpr31_off - fpr0_off + 1,
+    gpr_size = ra_off - a0_off + 1,
+  };
+
+  const bool _save_vectors;
+  public:
+  RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {}
+
+  OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
+  void restore_live_registers(MacroAssembler* masm);
+
+  int slots_save() {
+    int slots = gpr_size * VMRegImpl::slots_per_word;
+
+    if (_save_vectors && UseLASX)
+      slots += FloatRegisterImpl::slots_per_lasx_register * fpr_size;
+    else if (_save_vectors && UseLSX)
+      slots += FloatRegisterImpl::slots_per_lsx_register * fpr_size;
+    else
+      slots += FloatRegisterImpl::save_slots_per_register * fpr_size;
+
+    return slots;
+  }
+
+  int gpr_offset(int off) {
+      int slots_per_fpr = FloatRegisterImpl::save_slots_per_register;
+      int slots_per_gpr = VMRegImpl::slots_per_word;
+
+      if (_save_vectors && UseLASX)
+        slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register;
+      else if (_save_vectors && UseLSX)
+        slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register;
+
+      return (fpr_size * slots_per_fpr + (off - a0_off) * slots_per_gpr) * VMRegImpl::stack_slot_size;
+  }
+
+  int fpr_offset(int off) {
+      int slots_per_fpr = FloatRegisterImpl::save_slots_per_register;
+
+      if (_save_vectors && UseLASX)
+        slots_per_fpr = FloatRegisterImpl::slots_per_lasx_register;
+      else if (_save_vectors && UseLSX)
+        slots_per_fpr = FloatRegisterImpl::slots_per_lsx_register;
+
+      return off * slots_per_fpr * VMRegImpl::stack_slot_size;
+  }
+
+  int ra_offset() { return gpr_offset(ra_off); }
+  int t5_offset() { return gpr_offset(t5_off); }
+  int s3_offset() { return gpr_offset(s3_off); }
+  int v0_offset() { return gpr_offset(a0_off); }
+  int v1_offset() { return gpr_offset(a1_off); }
+
+  int fpr0_offset() { return fpr_offset(fpr0_off); }
+  int fpr1_offset() { return fpr_offset(fpr1_off); }
+
+  // During deoptimization only the result register need to be restored
+  // all the other values have already been extracted.
+  void restore_result_registers(MacroAssembler* masm);
+};
+
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+
+  // Always make the frame size 16-byte aligned
+  int frame_size_in_bytes = round_to(additional_frame_words * wordSize + slots_save() * VMRegImpl::stack_slot_size, StackAlignmentInBytes);
+  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
+  int frame_size_in_slots = frame_size_in_bytes / VMRegImpl::stack_slot_size;
+  // The caller will allocate additional_frame_words
+  int additional_frame_slots = additional_frame_words * wordSize / VMRegImpl::stack_slot_size;
+  // CodeBlob frame size is in words.
+  int frame_size_in_words = frame_size_in_bytes / wordSize;
+
+  *total_frame_words = frame_size_in_words;
+
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map =  new OopMap(frame_size_in_slots, 0);
+
+  // save registers
+  __ addi_d(SP, SP, -slots_save() * VMRegImpl::stack_slot_size);
+
+  for (int i = 0; i < fpr_size; i++) {
+    FloatRegister fpr = as_FloatRegister(i);
+    int off = fpr_offset(i);
+
+    if (_save_vectors && UseLASX)
+      __ xvst(fpr, SP, off);
+    else if (_save_vectors && UseLSX)
+      __ vst(fpr, SP, off);
+    else
+      __ fst_d(fpr, SP, off);
+    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), fpr->as_VMReg());
+  }
+
+  for (int i = a0_off; i <= a7_off; i++) {
+    Register gpr = as_Register(A0->encoding() + (i - a0_off));
+    int off = gpr_offset(i);
+
+    __ st_d(gpr, SP, gpr_offset(i));
+    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
+  }
+
+  for (int i = t0_off; i <= t6_off; i++) {
+    Register gpr = as_Register(T0->encoding() + (i - t0_off));
+    int off = gpr_offset(i);
+
+    __ st_d(gpr, SP, gpr_offset(i));
+    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
+  }
+  __ st_d(T8, SP, gpr_offset(t8_off));
+  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(t8_off) / VMRegImpl::stack_slot_size + additional_frame_slots), T8->as_VMReg());
+
+  for (int i = s0_off; i <= s8_off; i++) {
+    Register gpr = as_Register(S0->encoding() + (i - s0_off));
+    int off = gpr_offset(i);
+
+    __ st_d(gpr, SP, gpr_offset(i));
+    map->set_callee_saved(VMRegImpl::stack2reg(off / VMRegImpl::stack_slot_size + additional_frame_slots), gpr->as_VMReg());
+  }
+
+  __ st_d(FP, SP, gpr_offset(fp_off));
+  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(fp_off) / VMRegImpl::stack_slot_size + additional_frame_slots), FP->as_VMReg());
+  __ st_d(RA, SP, gpr_offset(ra_off));
+  map->set_callee_saved(VMRegImpl::stack2reg(gpr_offset(ra_off) / VMRegImpl::stack_slot_size + additional_frame_slots), RA->as_VMReg());
+
+  __ addi_d(FP, SP, gpr_offset(fp_off));
+
+  return map;
+}
+
+
+// Pop the current frame and restore all the registers that we
+// saved.
+void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
+  for (int i = 0; i < fpr_size; i++) {
+    FloatRegister fpr = as_FloatRegister(i);
+    int off = fpr_offset(i);
+
+    if (_save_vectors && UseLASX)
+      __ xvld(fpr, SP, off);
+    else if (_save_vectors && UseLSX)
+      __ vld(fpr, SP, off);
+    else
+      __ fld_d(fpr, SP, off);
+  }
+
+  for (int i = a0_off; i <= a7_off; i++) {
+    Register gpr = as_Register(A0->encoding() + (i - a0_off));
+    int off = gpr_offset(i);
+
+    __ ld_d(gpr, SP, gpr_offset(i));
+  }
+
+  for (int i = t0_off; i <= t6_off; i++) {
+    Register gpr = as_Register(T0->encoding() + (i - t0_off));
+    int off = gpr_offset(i);
+
+    __ ld_d(gpr, SP, gpr_offset(i));
+  }
+  __ ld_d(T8, SP, gpr_offset(t8_off));
+
+  for (int i = s0_off; i <= s8_off; i++) {
+    Register gpr = as_Register(S0->encoding() + (i - s0_off));
+    int off = gpr_offset(i);
+
+    __ ld_d(gpr, SP, gpr_offset(i));
+  }
+
+  __ ld_d(FP, SP, gpr_offset(fp_off));
+  __ ld_d(RA, SP, gpr_offset(ra_off));
+
+  __ addi_d(SP, SP, slots_save() * VMRegImpl::stack_slot_size);
+}
+
+// Pop the current frame and restore the registers that might be holding
+// a result.
+void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
+  // Just restore result register. Only used by deoptimization. By
+  // now any callee save register that needs to be restore to a c2
+  // caller of the deoptee has been extracted into the vframeArray
+  // and will be stuffed into the c2i adapter we create for later
+  // restoration so only result registers need to be restored here.
+
+  __ ld_d(V0, SP, gpr_offset(a0_off));
+  __ ld_d(V1, SP, gpr_offset(a1_off));
+
+  __ fld_d(F0, SP, fpr_offset(fpr0_off));
+  __ fld_d(F1, SP, fpr_offset(fpr1_off));
+
+  __ addi_d(SP, SP, gpr_offset(ra_off));
+}
+
+// Is vector's size (in bytes) bigger than a size saved by default?
+// 8 bytes registers are saved by default using fld/fst instructions.
+bool SharedRuntime::is_wide_vector(int size) {
+  return size > 8;
+}
+
+// The java_calling_convention describes stack locations as ideal slots on
+// a frame with no abi restrictions. Since we must observe abi restrictions
+// (like the placement of the register window) the slots must be biased by
+// the following value.
+
+static int reg2offset_in(VMReg r) {
+  // Account for saved fp and return address
+  // This should really be in_preserve_stack_slots
+  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
+}
+
+static int reg2offset_out(VMReg r) {
+  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+}
+
+// ---------------------------------------------------------------------------
+// Read the array of BasicTypes from a signature, and compute where the
+// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
+// quantities.  Values less than SharedInfo::stack0 are registers, those above
+// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
+// as framesizes are fixed.
+// VMRegImpl::stack0 refers to the first slot 0(sp).
+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
+// up to RegisterImpl::number_of_registers) are the 32-bit
+// integer registers.
+
+// Pass first five oop/int args in registers T0, A0 - A3.
+// Pass float/double/long args in stack.
+// Doubles have precedence, so if you pass a mix of floats and doubles
+// the doubles will grab the registers before the floats will.
+
+// Note: the INPUTS in sig_bt are in units of Java argument words, which are
+// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
+// units regardless of build.
+
+
+// ---------------------------------------------------------------------------
+// The compiled Java calling convention.
+// Pass first five oop/int args in registers T0, A0 - A3.
+// Pass float/double/long args in stack.
+// Doubles have precedence, so if you pass a mix of floats and doubles
+// the doubles will grab the registers before the floats will.
+
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+                                           VMRegPair *regs,
+                                           int total_args_passed,
+                                           int is_outgoing) {
+
+  // Create the mapping between argument positions and registers.
+  static const Register INT_ArgReg[Argument::n_register_parameters + 1] = {
+    T0, A0, A1, A2, A3, A4, A5, A6, A7
+  };
+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
+    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
+  };
+
+  uint int_args = 0;
+  uint fp_args = 0;
+  uint stk_args = 0; // inc by 2 each time
+
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_VOID:
+      // halves of T_LONG or T_DOUBLE
+      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+      regs[i].set_bad();
+      break;
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+      if (int_args < Argument::n_register_parameters + 1) {
+        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_LONG:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      // fall through
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS:
+      if (int_args < Argument::n_register_parameters + 1) {
+        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_FLOAT:
+      if (fp_args < Argument::n_float_register_parameters) {
+        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_DOUBLE:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      if (fp_args < Argument::n_float_register_parameters) {
+        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+
+  return round_to(stk_args, 2);
+}
+
+// Patch the callers callsite with entry to compiled code if it exists.
+static void patch_callers_callsite(MacroAssembler *masm) {
+  Label L;
+  __ verify_oop(Rmethod);
+  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
+  __ beq(AT, R0, L);
+  // Schedule the branch target address early.
+  // Call into the VM to patch the caller, then jump to compiled callee
+  // T5 isn't live so capture return address while we easily can
+  __ move(T5, RA);
+
+  __ pushad();
+#ifdef COMPILER2
+  // C2 may leave the stack dirty if not in SSE2+ mode
+  __ empty_FPU_stack();
+#endif
+
+  // VM needs caller's callsite
+  // VM needs target method
+
+  __ move(A0, Rmethod);
+  __ move(A1, T5);
+  // we should preserve the return address
+  __ verify_oop(Rmethod);
+  __ move(S0, SP);
+  __ li(AT, -(StackAlignmentInBytes));   // align the stack
+  __ andr(SP, SP, AT);
+  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
+          relocInfo::runtime_call_type);
+
+  __ move(SP, S0);
+  __ popad();
+  __ bind(L);
+}
+
+static void gen_c2i_adapter(MacroAssembler *masm,
+                            int total_args_passed,
+                            int comp_args_on_stack,
+                            const BasicType *sig_bt,
+                            const VMRegPair *regs,
+                            Label& skip_fixup) {
+
+  // Before we get into the guts of the C2I adapter, see if we should be here
+  // at all.  We've come from compiled code and are attempting to jump to the
+  // interpreter, which means the caller made a static call to get here
+  // (vcalls always get a compiled target if there is one).  Check for a
+  // compiled target.  If there is one, we need to patch the caller's call.
+  // However we will run interpreted if we come thru here. The next pass
+  // thru the call site will run compiled. If we ran compiled here then
+  // we can (theorectically) do endless i2c->c2i->i2c transitions during
+  // deopt/uncommon trap cycles. If we always go interpreted here then
+  // we can have at most one and don't need to play any tricks to keep
+  // from endlessly growing the stack.
+  //
+  // Actually if we detected that we had an i2c->c2i transition here we
+  // ought to be able to reset the world back to the state of the interpreted
+  // call and not bother building another interpreter arg area. We don't
+  // do that at this point.
+
+  patch_callers_callsite(masm);
+  __ bind(skip_fixup);
+
+#ifdef COMPILER2
+  __ empty_FPU_stack();
+#endif
+  //this is for native ?
+  // Since all args are passed on the stack, total_args_passed * interpreter_
+  // stack_element_size  is the
+  // space we need.
+  int extraspace = total_args_passed * Interpreter::stackElementSize;
+
+  // stack is aligned, keep it that way
+  extraspace = round_to(extraspace, 2*wordSize);
+
+  // Get return address
+  __ move(T5, RA);
+  // set senderSP value
+  //refer to interpreter_loongarch.cpp:generate_asm_entry
+  __ move(Rsender, SP);
+  __ addi_d(SP, SP, -extraspace);
+
+  // Now write the args into the outgoing interpreter space
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_VOID) {
+      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+      continue;
+    }
+
+    // st_off points to lowest address on stack.
+    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
+    // Say 4 args:
+    // i   st_off
+    // 0   12 T_LONG
+    // 1    8 T_VOID
+    // 2    4 T_OBJECT
+    // 3    0 T_BOOL
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_stack()) {
+      // memory to memory use fpu stack top
+      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
+      if (!r_2->is_valid()) {
+        __ ld_ptr(AT, Address(SP, ld_off));
+        __ st_ptr(AT, Address(SP, st_off));
+
+      } else {
+
+
+        int next_off = st_off - Interpreter::stackElementSize;
+        __ ld_ptr(AT, Address(SP, ld_off));
+        __ st_ptr(AT, Address(SP, st_off));
+
+        // Ref to is_Register condition
+        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
+          __ st_ptr(AT, SP, st_off - 8);
+      }
+    } else if (r_1->is_Register()) {
+      Register r = r_1->as_Register();
+      if (!r_2->is_valid()) {
+          __ st_d(r, SP, st_off);
+      } else {
+        //FIXME, LA will not enter here
+        // long/double in gpr
+        __ st_d(r, SP, st_off);
+        // In [java/util/zip/ZipFile.java]
+        //
+        //    private static native long open(String name, int mode, long lastModified);
+        //    private static native int getTotal(long jzfile);
+        //
+        // We need to transfer T_LONG paramenters from a compiled method to a native method.
+        // It's a complex process:
+        //
+        // Caller -> lir_static_call -> gen_resolve_stub
+        //      -> -- resolve_static_call_C
+        //         `- gen_c2i_adapter()  [*]
+        //             |
+        //       `- AdapterHandlerLibrary::get_create_apapter_index
+        //      -> generate_native_entry
+        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
+        //
+        // In [**], T_Long parameter is stored in stack as:
+        //
+        //   (high)
+        //    |         |
+        //    -----------
+        //    | 8 bytes |
+        //    | (void)  |
+        //    -----------
+        //    | 8 bytes |
+        //    | (long)  |
+        //    -----------
+        //    |         |
+        //   (low)
+        //
+        // However, the sequence is reversed here:
+        //
+        //   (high)
+        //    |         |
+        //    -----------
+        //    | 8 bytes |
+        //    | (long)  |
+        //    -----------
+        //    | 8 bytes |
+        //    | (void)  |
+        //    -----------
+        //    |         |
+        //   (low)
+        //
+        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
+        //
+        if (sig_bt[i] == T_LONG)
+          __ st_d(r, SP, st_off - 8);
+      }
+    } else if (r_1->is_FloatRegister()) {
+      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
+
+      FloatRegister fr = r_1->as_FloatRegister();
+      if (sig_bt[i] == T_FLOAT)
+        __ fst_s(fr, SP, st_off);
+      else {
+        __ fst_d(fr, SP, st_off);
+        __ fst_d(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
+      }
+    }
+  }
+
+  // Schedule the branch target address early.
+  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
+  // And repush original return address
+  __ move(RA, T5);
+  __ jr (AT);
+}
+
+static void gen_i2c_adapter(MacroAssembler *masm,
+                            int total_args_passed,
+                            int comp_args_on_stack,
+                            const BasicType *sig_bt,
+                            const VMRegPair *regs) {
+
+  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
+  // layout.  Lesp was saved by the calling I-frame and will be restored on
+  // return.  Meanwhile, outgoing arg space is all owned by the callee
+  // C-frame, so we can mangle it at will.  After adjusting the frame size,
+  // hoist register arguments and repack other args according to the compiled
+  // code convention.  Finally, end in a jump to the compiled code.  The entry
+  // point address is the start of the buffer.
+
+  // We will only enter here from an interpreted frame and never from after
+  // passing thru a c2i. Azul allowed this but we do not. If we lose the
+  // race and use a c2i we will remain interpreted for the race loser(s).
+  // This removes all sorts of headaches on the LA side and also eliminates
+  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
+
+  __ move(T4, SP);
+
+  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
+  // in registers, we will occasionally have no stack args.
+  int comp_words_on_stack = 0;
+  if (comp_args_on_stack) {
+    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
+    // registers are below.  By subtracting stack0, we either get a negative
+    // number (all values in registers) or the maximum stack slot accessed.
+    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
+    // Convert 4-byte stack slots to words.
+    // did LA need round? FIXME
+    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
+    // Round up to miminum stack alignment, in wordSize
+    comp_words_on_stack = round_to(comp_words_on_stack, 2);
+    __ addi_d(SP, SP, -comp_words_on_stack * wordSize);
+  }
+
+  // Align the outgoing SP
+  __ li(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);
+  // push the return address on the stack (note that pushing, rather
+  // than storing it, yields the correct frame alignment for the callee)
+  // Put saved SP in another register
+  const Register saved_sp = T5;
+  __ move(saved_sp, T4);
+
+
+  // Will jump to the compiled code just as if compiled code was doing it.
+  // Pre-load the register-jump target early, to schedule it better.
+  __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset()));
+
+  // Now generate the shuffle code.  Pick up all register args and move the
+  // rest through the floating point stack top.
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_VOID) {
+      // Longs and doubles are passed in native word order, but misaligned
+      // in the 32-bit build.
+      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+      continue;
+    }
+
+    // Pick up 0, 1 or 2 words from SP+offset.
+
+    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
+    // Load in argument order going down.
+    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
+    // Point to interpreter value (vs. tag)
+    int next_off = ld_off - Interpreter::stackElementSize;
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_stack()) {
+      // Convert stack slot to an SP offset (+ wordSize to
+      // account for return address )
+      // NOTICE HERE!!!! I sub a wordSize here
+      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
+      //+ wordSize;
+
+      if (!r_2->is_valid()) {
+        __ ld_d(AT, saved_sp, ld_off);
+        __ st_d(AT, SP, st_off);
+      } else {
+        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
+        // are accessed as negative so LSW is at LOW address
+
+        // ld_off is MSW so get LSW
+        // st_off is LSW (i.e. reg.first())
+
+        // [./org/eclipse/swt/graphics/GC.java]
+        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
+        //  int destX, int destY, int destWidth, int destHeight,
+        //  boolean simple,
+        //  int imgWidth, int imgHeight,
+        //  long maskPixmap,  <-- Pass T_LONG in stack
+        //  int maskType);
+        // Before this modification, Eclipse displays icons with solid black background.
+        //
+        __ ld_d(AT, saved_sp, ld_off);
+        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
+          __ ld_d(AT, saved_sp, ld_off - 8);
+        __ st_d(AT, SP, st_off);
+      }
+    } else if (r_1->is_Register()) {  // Register argument
+      Register r = r_1->as_Register();
+      if (r_2->is_valid()) {
+        // Remember r_1 is low address (and LSB on LA)
+        // So r_2 gets loaded from high address regardless of the platform
+        assert(r_2->as_Register() == r_1->as_Register(), "");
+        __ ld_d(r, saved_sp, ld_off);
+
+        //
+        // For T_LONG type, the real layout is as below:
+        //
+        //   (high)
+        //    |         |
+        //    -----------
+        //    | 8 bytes |
+        //    | (void)  |
+        //    -----------
+        //    | 8 bytes |
+        //    | (long)  |
+        //    -----------
+        //    |         |
+        //   (low)
+        //
+        // We should load the low-8 bytes.
+        //
+        if (sig_bt[i] == T_LONG)
+          __ ld_d(r, saved_sp, ld_off - 8);
+      } else {
+        __ ld_w(r, saved_sp, ld_off);
+      }
+    } else if (r_1->is_FloatRegister()) { // Float Register
+      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
+
+      FloatRegister fr = r_1->as_FloatRegister();
+      if (sig_bt[i] == T_FLOAT)
+          __ fld_s(fr, saved_sp, ld_off);
+      else {
+          __ fld_d(fr, saved_sp, ld_off);
+          __ fld_d(fr, saved_sp, ld_off - 8);
+      }
+    }
+  }
+
+  // 6243940 We might end up in handle_wrong_method if
+  // the callee is deoptimized as we race thru here. If that
+  // happens we don't want to take a safepoint because the
+  // caller frame will look interpreted and arguments are now
+  // "compiled" so it is much better to make this transition
+  // invisible to the stack walking code. Unfortunately if
+  // we try and find the callee by normal means a safepoint
+  // is possible. So we stash the desired callee in the thread
+  // and the vm will find there should this case occur.
+  __ get_thread(T8);
+  __ st_d(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
+
+  // move methodOop to T5 in case we end up in an c2i adapter.
+  // the c2i adapters expect methodOop in T5 (c2) because c2's
+  // resolve stubs return the result (the method) in T5.
+  // I'd love to fix this.
+  __ move(T5, Rmethod);
+  __ jr(T4);
+}
+
+// ---------------------------------------------------------------
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
+                                                            int total_args_passed,
+                                                            int comp_args_on_stack,
+                                                            const BasicType *sig_bt,
+                                                            const VMRegPair *regs,
+                                                            AdapterFingerPrint* fingerprint) {
+  address i2c_entry = __ pc();
+
+  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+
+  // -------------------------------------------------------------------------
+  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
+  // args start out packed in the compiled layout.  They need to be unpacked
+  // into the interpreter layout.  This will almost always require some stack
+  // space.  We grow the current (compiled) stack, then repack the args.  We
+  // finally end in a jump to the generic interpreter entry point.  On exit
+  // from the interpreter, the interpreter will restore our SP (lest the
+  // compiled code, which relys solely on SP and not FP, get sick).
+
+  address c2i_unverified_entry = __ pc();
+  Label skip_fixup;
+  {
+    Register holder = T1;
+    Register receiver = T0;
+    Register temp = T8;
+    address ic_miss = SharedRuntime::get_ic_miss_stub();
+
+    Label missed;
+
+    __ verify_oop(holder);
+    //add for compressedoops
+    __ load_klass(temp, receiver);
+    __ verify_oop(temp);
+
+    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
+    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
+    __ bne(AT, temp, missed);
+    // Method might have been compiled since the call site was patched to
+    // interpreted if that is the case treat it as a miss so we can get
+    // the call site corrected.
+    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
+    __ beq(AT, R0, skip_fixup);
+    __ bind(missed);
+
+    __ jmp(ic_miss, relocInfo::runtime_call_type);
+  }
+  address c2i_entry = __ pc();
+
+  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+
+  __ flush();
+  return  AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+}
+
+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+                                         VMRegPair *regs,
+                                         VMRegPair *regs2,
+                                         int total_args_passed) {
+  assert(regs2 == NULL, "not needed on LA");
+  // Return the number of VMReg stack_slots needed for the args.
+  // This value does not include an abi space (like register window
+  // save area).
+
+  // We return the amount of VMReg stack slots we need to reserve for all
+  // the arguments NOT counting out_preserve_stack_slots. Since we always
+  // have space for storing at least 6 registers to memory we start with that.
+  // See int_stk_helper for a further discussion.
+  // We return the amount of VMRegImpl stack slots we need to reserve for all
+  // the arguments NOT counting out_preserve_stack_slots.
+  static const Register INT_ArgReg[Argument::n_register_parameters] = {
+    A0, A1, A2, A3, A4, A5, A6, A7
+  };
+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
+    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
+  };
+  uint int_args = 0;
+  uint fp_args = 0;
+  uint stk_args = 0; // inc by 2 each time
+
+// Example:
+//    n   java.lang.UNIXProcess::forkAndExec
+//     private native int forkAndExec(byte[] prog,
+//                                    byte[] argBlock, int argc,
+//                                    byte[] envBlock, int envc,
+//                                    byte[] dir,
+//                                    boolean redirectErrorStream,
+//                                    FileDescriptor stdin_fd,
+//                                    FileDescriptor stdout_fd,
+//                                    FileDescriptor stderr_fd)
+// JNIEXPORT jint JNICALL
+// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
+//                                        jobject process,
+//                                        jbyteArray prog,
+//                                        jbyteArray argBlock, jint argc,
+//                                        jbyteArray envBlock, jint envc,
+//                                        jbyteArray dir,
+//                                        jboolean redirectErrorStream,
+//                                        jobject stdin_fd,
+//                                        jobject stdout_fd,
+//                                        jobject stderr_fd)
+//
+// ::c_calling_convention
+//  0:      // env                 <--       a0
+//  1: L    // klass/obj           <-- t0 => a1
+//  2: [    // prog[]              <-- a0 => a2
+//  3: [    // argBlock[]          <-- a1 => a3
+//  4: I    // argc                <-- a2 => a4
+//  5: [    // envBlock[]          <-- a3 => a5
+//  6: I    // envc                <-- a4 => a5
+//  7: [    // dir[]               <-- a5 => a7
+//  8: Z    // redirectErrorStream <-- a6 => sp[0]
+//  9: L    // stdin               <-- a7 => sp[8]
+// 10: L    // stdout              fp[16] => sp[16]
+// 11: L    // stderr              fp[24] => sp[24]
+//
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_VOID: // Halves of longs and doubles
+      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+      regs[i].set_bad();
+      break;
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+      if (int_args < Argument::n_register_parameters) {
+        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_LONG:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      // fall through
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS:
+    case T_METADATA:
+      if (int_args < Argument::n_register_parameters) {
+        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_FLOAT:
+      if (fp_args < Argument::n_float_register_parameters) {
+        regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
+      } else if (int_args < Argument::n_register_parameters) {
+        regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_DOUBLE:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      if (fp_args < Argument::n_float_register_parameters) {
+        regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
+      } else if (int_args < Argument::n_register_parameters) {
+        regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+
+  return round_to(stk_args, 2);
+}
+
+// ---------------------------------------------------------------------------
+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+  // We always ignore the frame_slots arg and just use the space just below frame pointer
+  // which by this time is free to use
+  switch (ret_type) {
+    case T_FLOAT:
+      __ fst_s(FSF, FP, -wordSize);
+      break;
+    case T_DOUBLE:
+      __ fst_d(FSF, FP, -wordSize );
+      break;
+    case T_VOID:  break;
+    case T_LONG:
+      __ st_d(V0, FP, -wordSize);
+      break;
+    case T_OBJECT:
+    case T_ARRAY:
+      __ st_d(V0, FP, -wordSize);
+      break;
+    default: {
+      __ st_w(V0, FP, -wordSize);
+      }
+  }
+}
+
+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+  // We always ignore the frame_slots arg and just use the space just below frame pointer
+  // which by this time is free to use
+  switch (ret_type) {
+    case T_FLOAT:
+      __ fld_s(FSF, FP, -wordSize);
+      break;
+    case T_DOUBLE:
+      __ fld_d(FSF, FP, -wordSize );
+      break;
+    case T_LONG:
+      __ ld_d(V0, FP, -wordSize);
+      break;
+    case T_VOID:  break;
+    case T_OBJECT:
+    case T_ARRAY:
+      __ ld_d(V0, FP, -wordSize);
+      break;
+    default: {
+      __ ld_w(V0, FP, -wordSize);
+      }
+  }
+}
+
+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
+  for ( int i = first_arg ; i < arg_count ; i++ ) {
+    if (args[i].first()->is_Register()) {
+      __ push(args[i].first()->as_Register());
+    } else if (args[i].first()->is_FloatRegister()) {
+      __ push(args[i].first()->as_FloatRegister());
+    }
+  }
+}
+
+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
+  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
+    if (args[i].first()->is_Register()) {
+      __ pop(args[i].first()->as_Register());
+    } else if (args[i].first()->is_FloatRegister()) {
+      __ pop(args[i].first()->as_FloatRegister());
+    }
+  }
+}
+
+// A simple move of integer like type
+static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ ld_w(AT, FP, reg2offset_in(src.first()));
+      __ st_d(AT, SP, reg2offset_out(dst.first()));
+    } else {
+      // stack to reg
+      __ ld_w(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
+  } else {
+    if (dst.first() != src.first()){
+      __ move(dst.first()->as_Register(), src.first()->as_Register());
+    }
+  }
+}
+
+// An oop arg. Must pass a handle not the oop itself
+static void object_move(MacroAssembler* masm,
+                        OopMap* map,
+                        int oop_handle_offset,
+                        int framesize_in_slots,
+                        VMRegPair src,
+                        VMRegPair dst,
+                        bool is_receiver,
+                        int* receiver_offset) {
+
+  // must pass a handle. First figure out the location we use as a handle
+
+  if (src.first()->is_stack()) {
+    // Oop is already on the stack as an argument
+    Register rHandle = T5;
+    Label nil;
+    __ xorr(rHandle, rHandle, rHandle);
+    __ ld_d(AT, FP, reg2offset_in(src.first()));
+    __ beq(AT, R0, nil);
+    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
+    __ bind(nil);
+    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
+    else                       __ move( (dst.first())->as_Register(), rHandle);
+
+    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
+    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
+    if (is_receiver) {
+      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
+    }
+  } else {
+    // Oop is in an a register we must store it to the space we reserve
+    // on the stack for oop_handles
+    const Register rOop = src.first()->as_Register();
+    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
+    const Register rHandle = T5;
+    //Important: refer to java_calling_convertion
+    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
+    int offset = oop_slot*VMRegImpl::stack_slot_size;
+    Label skip;
+    __ st_d( rOop , SP, offset );
+    map->set_oop(VMRegImpl::stack2reg(oop_slot));
+    __ xorr( rHandle, rHandle, rHandle);
+    __ beq(rOop, R0, skip);
+    __ lea(rHandle, Address(SP, offset));
+    __ bind(skip);
+    // Store the handle parameter
+    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
+    else                       __ move((dst.first())->as_Register(), rHandle);
+
+    if (is_receiver) {
+      *receiver_offset = offset;
+    }
+  }
+}
+
+// A float arg may have to do float reg int reg conversion
+static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
+  if (src.first()->is_stack()) {
+    // stack to stack/reg
+    if (dst.first()->is_stack()) {
+      __ ld_w(AT, FP, reg2offset_in(src.first()));
+      __ st_w(AT, SP, reg2offset_out(dst.first()));
+    } else if (dst.first()->is_FloatRegister()) {
+      __ fld_s(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
+    } else {
+      __ ld_w(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
+    }
+  } else {
+    // reg to stack/reg
+    if(dst.first()->is_stack()) {
+      __ fst_s(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
+    } else if (dst.first()->is_FloatRegister()) {
+      __ fmov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+    } else {
+      __ movfr2gr_s(dst.first()->as_Register(), src.first()->as_FloatRegister());
+    }
+  }
+}
+
+// A long move
+static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+
+  // The only legal possibility for a long_move VMRegPair is:
+  // 1: two stack slots (possibly unaligned)
+  // as neither the java  or C calling convention will use registers
+  // for longs.
+  if (src.first()->is_stack()) {
+    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
+    if( dst.first()->is_stack()){
+      __ ld_d(AT, FP, reg2offset_in(src.first()));
+      __ st_d(AT, SP, reg2offset_out(dst.first()));
+    } else {
+      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
+    }
+  } else {
+    if( dst.first()->is_stack()){
+      __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
+    } else {
+      __ move(dst.first()->as_Register(), src.first()->as_Register());
+    }
+  }
+}
+
+// A double move
+static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+
+  // The only legal possibilities for a double_move VMRegPair are:
+  // The painful thing here is that like long_move a VMRegPair might be
+
+  // Because of the calling convention we know that src is either
+  //   1: a single physical register (xmm registers only)
+  //   2: two stack slots (possibly unaligned)
+  // dst can only be a pair of stack slots.
+
+  if (src.first()->is_stack()) {
+    // source is all stack
+    if( dst.first()->is_stack()){
+      __ ld_d(AT, FP, reg2offset_in(src.first()));
+      __ st_d(AT, SP, reg2offset_out(dst.first()));
+    } else if (dst.first()->is_FloatRegister()) {
+      __ fld_d(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
+    } else {
+      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
+    }
+  } else {
+    // reg to stack/reg
+    // No worries about stack alignment
+    if( dst.first()->is_stack()){
+      __ fst_d(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
+    } else if (dst.first()->is_FloatRegister()) {
+      __ fmov_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+    } else {
+      __ movfr2gr_d(dst.first()->as_Register(), src.first()->as_FloatRegister());
+    }
+  }
+}
+
+static void verify_oop_args(MacroAssembler* masm,
+                            methodHandle method,
+                            const BasicType* sig_bt,
+                            const VMRegPair* regs) {
+  Register temp_reg = T4;  // not part of any compiled calling seq
+  if (VerifyOops) {
+    for (int i = 0; i < method->size_of_parameters(); i++) {
+      if (sig_bt[i] == T_OBJECT ||
+          sig_bt[i] == T_ARRAY) {
+        VMReg r = regs[i].first();
+        assert(r->is_valid(), "bad oop arg");
+        if (r->is_stack()) {
+          __ ld_d(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
+          __ verify_oop(temp_reg);
+        } else {
+          __ verify_oop(r->as_Register());
+        }
+      }
+    }
+  }
+}
+
+static void gen_special_dispatch(MacroAssembler* masm,
+                                 methodHandle method,
+                                 const BasicType* sig_bt,
+                                 const VMRegPair* regs) {
+  verify_oop_args(masm, method, sig_bt, regs);
+  vmIntrinsics::ID iid = method->intrinsic_id();
+
+  // Now write the args into the outgoing interpreter space
+  bool     has_receiver   = false;
+  Register receiver_reg   = noreg;
+  int      member_arg_pos = -1;
+  Register member_reg     = noreg;
+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
+  if (ref_kind != 0) {
+    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
+    member_reg = S3;  // known to be free at this point
+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+  } else if (iid == vmIntrinsics::_invokeBasic) {
+    has_receiver = true;
+  } else {
+    fatal(err_msg_res("unexpected intrinsic id %d", iid));
+  }
+
+  if (member_reg != noreg) {
+    // Load the member_arg into register, if necessary.
+    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
+    VMReg r = regs[member_arg_pos].first();
+    if (r->is_stack()) {
+      __ ld_d(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
+    } else {
+      // no data motion is needed
+      member_reg = r->as_Register();
+    }
+  }
+
+  if (has_receiver) {
+    // Make sure the receiver is loaded into a register.
+    assert(method->size_of_parameters() > 0, "oob");
+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+    VMReg r = regs[0].first();
+    assert(r->is_valid(), "bad receiver arg");
+    if (r->is_stack()) {
+      // Porting note:  This assumes that compiled calling conventions always
+      // pass the receiver oop in a register.  If this is not true on some
+      // platform, pick a temp and load the receiver from stack.
+      fatal("receiver always in a register");
+      receiver_reg = SSR;  // known to be free at this point
+      __ ld_d(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
+    } else {
+      // no data motion is needed
+      receiver_reg = r->as_Register();
+    }
+  }
+
+  // Figure out which address we are really jumping to:
+  MethodHandles::generate_method_handle_dispatch(masm, iid,
+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
+}
+
+// ---------------------------------------------------------------------------
+// Generate a native wrapper for a given method.  The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// convention (handlizes oops, etc), transitions to native, makes the call,
+// returns to java state (possibly blocking), unhandlizes any result and
+// returns.
+nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+                                                methodHandle method,
+                                                int compile_id,
+                                                BasicType* in_sig_bt,
+                                                VMRegPair* in_regs,
+                                                BasicType ret_type) {
+  if (method->is_method_handle_intrinsic()) {
+    vmIntrinsics::ID iid = method->intrinsic_id();
+    intptr_t start = (intptr_t)__ pc();
+    int vep_offset = ((intptr_t)__ pc()) - start;
+    gen_special_dispatch(masm,
+                         method,
+                         in_sig_bt,
+                         in_regs);
+    assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord,
+           "valid size for make_non_entrant");
+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
+    __ flush();
+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
+    return nmethod::new_native_nmethod(method,
+                                       compile_id,
+                                       masm->code(),
+                                       vep_offset,
+                                       frame_complete,
+                                       stack_slots / VMRegImpl::slots_per_word,
+                                       in_ByteSize(-1),
+                                       in_ByteSize(-1),
+                                       (OopMapSet*)NULL);
+  }
+
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
+
+  // Native nmethod wrappers never take possesion of the oop arguments.
+  // So the caller will gc the arguments. The only thing we need an
+  // oopMap for is if the call is static
+  //
+  // An OopMap for lock (and class if static), and one for the VM call itself
+  OopMapSet *oop_maps = new OopMapSet();
+
+  // We have received a description of where all the java arg are located
+  // on entry to the wrapper. We need to convert these args to where
+  // the jni function will expect them. To figure out where they go
+  // we convert the java signature to a C signature by inserting
+  // the hidden arguments as arg[0] and possibly arg[1] (static method)
+
+  const int total_in_args = method->size_of_parameters();
+  int total_c_args = total_in_args;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        total_c_args++;
+      }
+    }
+  }
+
+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
+
+  int argc = 0;
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
+  }
+
+  // Now figure out where the args must be stored and how much stack space
+  // they require (neglecting out_preserve_stack_slots but space for storing
+  // the 1st six register arguments). It's weird see int_stk_helper.
+  //
+  int out_arg_slots;
+  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+  // Compute framesize for the wrapper.  We need to handlize all oops in
+  // registers. We must create space for them here that is disjoint from
+  // the windowed save area because we have no control over when we might
+  // flush the window again and overwrite values that gc has since modified.
+  // (The live window race)
+  //
+  // We always just allocate 6 word for storing down these object. This allow
+  // us to simply record the base and use the Ireg number to decide which
+  // slot to use. (Note that the reg number is the inbound number not the
+  // outbound number).
+  // We must shuffle args to match the native convention, and include var-args space.
+
+  // Calculate the total number of stack slots we will need.
+
+  // First count the abi requirement plus all of the outgoing args
+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+
+  // Now the space for the inbound oop handle area
+  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  single_slots++; break;
+          case T_ARRAY:
+          case T_LONG: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+    // align the save area
+    if (double_slots != 0) {
+      stack_slots = round_to(stack_slots, 2);
+    }
+  }
+
+  int oop_handle_offset = stack_slots;
+  stack_slots += total_save_slots;
+
+  // Now any space we need for handlizing a klass if static method
+
+  int klass_slot_offset = 0;
+  int klass_offset = -1;
+  int lock_slot_offset = 0;
+  bool is_static = false;
+
+  if (method->is_static()) {
+    klass_slot_offset = stack_slots;
+    stack_slots += VMRegImpl::slots_per_word;
+    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
+    is_static = true;
+  }
+
+  // Plus a lock if needed
+
+  if (method->is_synchronized()) {
+    lock_slot_offset = stack_slots;
+    stack_slots += VMRegImpl::slots_per_word;
+  }
+
+  // Now a place to save return value or as a temporary for any gpr -> fpr moves
+  // + 2 for return address (which we own) and saved fp
+  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
+
+  // Ok The space we have allocated will look like:
+  //
+  //
+  // FP-> |                     |
+  //      |---------------------|
+  //      | 2 slots for moves   |
+  //      |---------------------|
+  //      | lock box (if sync)  |
+  //      |---------------------| <- lock_slot_offset
+  //      | klass (if static)   |
+  //      |---------------------| <- klass_slot_offset
+  //      | oopHandle area      |
+  //      |---------------------| <- oop_handle_offset
+  //      | outbound memory     |
+  //      | based arguments     |
+  //      |                     |
+  //      |---------------------|
+  //      | vararg area         |
+  //      |---------------------|
+  //      |                     |
+  // SP-> | out_preserved_slots |
+  //
+  //
+
+
+  // Now compute actual number of stack words we need rounding to make
+  // stack properly aligned.
+  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
+
+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+
+  intptr_t start = (intptr_t)__ pc();
+
+
+
+  // First thing make an ic check to see if we should even be here
+  address ic_miss = SharedRuntime::get_ic_miss_stub();
+
+  // We are free to use all registers as temps without saving them and
+  // restoring them except fp. fp is the only callee save register
+  // as far as the interpreter and the compiler(s) are concerned.
+
+  //refer to register_loongarch.hpp:IC_Klass
+  const Register ic_reg = T1;
+  const Register receiver = T0;
+
+  Label hit;
+  Label exception_pending;
+
+  __ verify_oop(receiver);
+  //add for compressedoops
+  __ load_klass(T4, receiver);
+  __ beq(T4, ic_reg, hit);
+  __ jmp(ic_miss, relocInfo::runtime_call_type);
+  __ bind(hit);
+
+  int vep_offset = ((intptr_t)__ pc()) - start;
+
+  // Generate stack overflow check
+  if (UseStackBanging) {
+    __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
+  }
+
+  // The instruction at the verified entry point must be 4 bytes or longer
+  // because it can be patched on the fly by make_non_entrant.
+  if (((intptr_t)__ pc() - start - vep_offset) < 1 * BytesPerInstWord) {
+    __ nop();
+  }
+
+  // Generate a new frame for the wrapper.
+  // do LA need this ?
+#ifndef OPT_THREAD
+  __ get_thread(TREG);
+#endif
+  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
+  __ li(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);
+
+  __ enter();
+  // -2 because return address is already present and so is saved fp
+  __ addi_d(SP, SP, -1 * (stack_size - 2*wordSize));
+
+  // Frame is now completed as far a size and linkage.
+
+  int frame_complete = ((intptr_t)__ pc()) - start;
+
+  // Calculate the difference between sp and fp. We need to know it
+  // after the native call because on windows Java Natives will pop
+  // the arguments and it is painful to do sp relative addressing
+  // in a platform independent way. So after the call we switch to
+  // fp relative addressing.
+  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
+  //the SP
+  int fp_adjustment = stack_size - 2*wordSize;
+
+#ifdef COMPILER2
+  // C2 may leave the stack dirty if not in SSE2+ mode
+  __ empty_FPU_stack();
+#endif
+
+  // Compute the fp offset for any slots used after the jni call
+
+  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
+  // We use TREG as a thread pointer because it is callee save and
+  // if we load it once it is usable thru the entire wrapper
+  const Register thread = TREG;
+
+  // We use S4 as the oop handle for the receiver/klass
+  // It is callee save so it survives the call to native
+
+  const Register oop_handle_reg = S4;
+  if (is_critical_native) {
+    Unimplemented();
+    // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
+    //                                   oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+  //
+  // We immediately shuffle the arguments so that any vm call we have to
+  // make from here on out (sync slow path, jvmpi, etc.) we will have
+  // captured the oops from our caller and have a valid oopMap for
+  // them.
+
+  // -----------------
+  // The Grand Shuffle
+  //
+  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
+  // and, if static, the class mirror instead of a receiver.  This pretty much
+  // guarantees that register layout will not match (and LA doesn't use reg
+  // parms though amd does).  Since the native abi doesn't use register args
+  // and the java conventions does we don't have to worry about collisions.
+  // All of our moved are reg->stack or stack->stack.
+  // We ignore the extra arguments during the shuffle and handle them at the
+  // last moment. The shuffle is described by the two calling convention
+  // vectors we have in our possession. We simply walk the java vector to
+  // get the source locations and the c vector to get the destinations.
+
+  int c_arg = method->is_static() ? 2 : 1 ;
+
+  // Record sp-based slot for receiver on stack for non-static methods
+  int receiver_offset = -1;
+
+  // This is a trick. We double the stack slots so we can claim
+  // the oops in the caller's frame. Since we are sure to have
+  // more args than the caller doubling is enough to make
+  // sure we can capture all the incoming oop args from the
+  // caller.
+  //
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+  // Mark location of fp (someday)
+  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
+
+#ifdef ASSERT
+  bool reg_destroyed[RegisterImpl::number_of_registers];
+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
+    reg_destroyed[r] = false;
+  }
+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
+    freg_destroyed[f] = false;
+  }
+
+#endif /* ASSERT */
+
+  // This may iterate in two different directions depending on the
+  // kind of native it is.  The reason is that for regular JNI natives
+  // the incoming and outgoing registers are offset upwards and for
+  // critical natives they are offset down.
+  GrowableArray<int> arg_order(2 * total_in_args);
+  VMRegPair tmp_vmreg;
+  tmp_vmreg.set2(T8->as_VMReg());
+
+  if (!is_critical_native) {
+    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
+      arg_order.push(i);
+      arg_order.push(c_arg);
+    }
+  } else {
+    // Compute a valid move order, using tmp_vmreg to break any cycles
+    Unimplemented();
+    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
+  }
+
+  int temploc = -1;
+  for (int ai = 0; ai < arg_order.length(); ai += 2) {
+    int i = arg_order.at(ai);
+    int c_arg = arg_order.at(ai + 1);
+    __ block_comment(err_msg("move %d -> %d", i, c_arg));
+    if (c_arg == -1) {
+      assert(is_critical_native, "should only be required for critical natives");
+      // This arg needs to be moved to a temporary
+      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
+      in_regs[i] = tmp_vmreg;
+      temploc = i;
+      continue;
+    } else if (i == -1) {
+      assert(is_critical_native, "should only be required for critical natives");
+      // Read from the temporary location
+      assert(temploc != -1, "must be valid");
+      i = temploc;
+      temploc = -1;
+    }
+#ifdef ASSERT
+    if (in_regs[i].first()->is_Register()) {
+      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
+    } else if (in_regs[i].first()->is_FloatRegister()) {
+      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
+    }
+    if (out_regs[c_arg].first()->is_Register()) {
+      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
+      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
+    }
+#endif /* ASSERT */
+    switch (in_sig_bt[i]) {
+      case T_ARRAY:
+        if (is_critical_native) {
+          Unimplemented();
+          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
+          c_arg++;
+#ifdef ASSERT
+          if (out_regs[c_arg].first()->is_Register()) {
+            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
+            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
+          }
+#endif
+          break;
+        }
+      case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
+        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
+                    ((i == 0) && (!is_static)),
+                    &receiver_offset);
+        break;
+      case T_VOID:
+        break;
+
+      case T_FLOAT:
+        float_move(masm, in_regs[i], out_regs[c_arg]);
+          break;
+
+      case T_DOUBLE:
+        assert( i + 1 < total_in_args &&
+                in_sig_bt[i + 1] == T_VOID &&
+                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
+        double_move(masm, in_regs[i], out_regs[c_arg]);
+        break;
+
+      case T_LONG :
+        long_move(masm, in_regs[i], out_regs[c_arg]);
+        break;
+
+      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
+
+      default:
+        simple_move32(masm, in_regs[i], out_regs[c_arg]);
+    }
+  }
+
+  // point c_arg at the first arg that is already loaded in case we
+  // need to spill before we call out
+  c_arg = total_c_args - total_in_args;
+  // Pre-load a static method's oop.  Used both by locking code and
+  // the normal JNI call code.
+
+  __ move(oop_handle_reg, A1);
+
+  if (method->is_static() && !is_critical_native) {
+
+    //  load opp into a register
+    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
+          (method->method_holder())->java_mirror()));
+
+
+    RelocationHolder rspec = oop_Relocation::spec(oop_index);
+    __ relocate(rspec);
+    __ patchable_li52(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
+    // Now handlize the static class mirror it's known not-null.
+    __ st_d( oop_handle_reg, SP, klass_offset);
+    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
+
+    // Now get the handle
+    __ lea(oop_handle_reg, Address(SP, klass_offset));
+    // store the klass handle as second argument
+    __ move(A1, oop_handle_reg);
+    // and protect the arg if we must spill
+    c_arg--;
+  }
+
+  // Change state to native (we save the return address in the thread, since it might not
+  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
+  // points into the right code segment. It does not have to be the correct return pc.
+  // We use the same pc/oopMap repeatedly when we call out
+
+  Label native_return;
+  __ set_last_Java_frame(SP, noreg, native_return);
+
+  // We have all of the arguments setup at this point. We must not touch any register
+  // argument registers at this point (what if we save/restore them there are no oop?
+  {
+    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
+    save_args(masm, total_c_args, c_arg, out_regs);
+    int metadata_index = __ oop_recorder()->find_index(method());
+    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
+    __ relocate(rspec);
+    __ patchable_li52(AT, (long)(method()));
+
+    __ call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+      thread, AT);
+
+    restore_args(masm, total_c_args, c_arg, out_regs);
+  }
+
+  // These are register definitions we need for locking/unlocking
+  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
+  const Register obj_reg  = T4;  // Will contain the oop
+  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
+  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
+
+
+
+  Label slow_path_lock;
+  Label lock_done;
+
+  // Lock a synchronized method
+  if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
+
+    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
+
+    // Get the handle (the 2nd argument)
+    __ move(oop_handle_reg, A1);
+
+    // Get address of the box
+    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
+
+    // Load the oop from the handle
+    __ ld_d(obj_reg, oop_handle_reg, 0);
+
+    if (UseBiasedLocking) {
+      // Note that oop_handle_reg is trashed during this call
+      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
+    }
+
+    // Load immediate 1 into swap_reg %T8
+    __ li(swap_reg, 1);
+
+    __ ld_d(AT, obj_reg, 0);
+    __ orr(swap_reg, swap_reg, AT);
+
+    __ st_d(swap_reg, lock_reg, mark_word_offset);
+    __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done);
+    // Test if the oopMark is an obvious stack pointer, i.e.,
+    //  1) (mark & 3) == 0, and
+    //  2) sp <= mark < mark + os::pagesize()
+    // These 3 tests can be done by evaluating the following
+    // expression: ((mark - sp) & (3 - os::vm_page_size())),
+    // assuming both stack pointer and pagesize have their
+    // least significant 2 bits clear.
+    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
+
+    __ sub_d(swap_reg, swap_reg, SP);
+    __ li(AT, 3 - os::vm_page_size());
+    __ andr(swap_reg , swap_reg, AT);
+    // Save the test result, for recursive case, the result is zero
+    __ st_d(swap_reg, lock_reg, mark_word_offset);
+    __ bne(swap_reg, R0, slow_path_lock);
+    // Slow path will re-enter here
+    __ bind(lock_done);
+
+    if (UseBiasedLocking) {
+      // Re-fetch oop_handle_reg as we trashed it above
+      __ move(A1, oop_handle_reg);
+    }
+  }
+
+
+  // Finally just about ready to make the JNI call
+
+
+  // get JNIEnv* which is first argument to native
+  if (!is_critical_native) {
+    __ addi_d(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
+  }
+
+  // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
+  // Load the second arguments into A1
+  //__ ld(A1, SP , wordSize );   // klass
+
+  // Now set thread in native
+  __ addi_d(AT, R0, _thread_in_native);
+  if (os::is_MP()) {
+    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
+  }
+  __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset()));
+  // do the call
+  __ call(native_func, relocInfo::runtime_call_type);
+  __ bind(native_return);
+
+  oop_maps->add_gc_map(((intptr_t)__ pc()) - start, map);
+
+  // WARNING - on Windows Java Natives use pascal calling convention and pop the
+  // arguments off of the stack. We could just re-adjust the stack pointer here
+  // and continue to do SP relative addressing but we instead switch to FP
+  // relative addressing.
+
+  // Unpack native results.
+  switch (ret_type) {
+  case T_BOOLEAN: __ c2bool(V0);                break;
+  case T_CHAR   : __ bstrpick_d(V0, V0, 15, 0); break;
+  case T_BYTE   : __ sign_extend_byte (V0);     break;
+  case T_SHORT  : __ sign_extend_short(V0);     break;
+  case T_INT    : // nothing to do         break;
+  case T_DOUBLE :
+  case T_FLOAT  :
+  // Result is in st0 we'll save as needed
+  break;
+  case T_ARRAY:                 // Really a handle
+  case T_OBJECT:                // Really a handle
+  break; // can't de-handlize until after safepoint check
+  case T_VOID: break;
+  case T_LONG: break;
+  default       : ShouldNotReachHere();
+  }
+  // Switch thread to "native transition" state before reading the synchronization state.
+  // This additional state is necessary because reading and testing the synchronization
+  // state is not atomic w.r.t. GC, as this scenario demonstrates:
+  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
+  //     VM thread changes sync state to synchronizing and suspends threads for GC.
+  //     Thread A is resumed to finish this native method, but doesn't block here since it
+  //     didn't see any synchronization is progress, and escapes.
+  __ addi_d(AT, R0, _thread_in_native_trans);
+  if (os::is_MP()) {
+    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
+  }
+  __ st_w(AT, thread, in_bytes(JavaThread::thread_state_offset()));
+
+  if(os::is_MP()) {
+    if (UseMembar) {
+      // Force this write out before the read below
+      __ membar(__ AnyAny);
+    } else {
+      // Write serialization page so VM thread can do a pseudo remote membar.
+      // We use the current thread pointer to calculate a thread specific
+      // offset to write to within the page. This minimizes bus traffic
+      // due to cache line collision.
+      __ serialize_memory(thread, T5);
+    }
+  }
+
+  Label after_transition;
+
+  // check for safepoint operation in progress and/or pending suspend requests
+  {
+    Label Continue;
+    __ li(AT, SafepointSynchronize::address_of_state());
+    __ ld_w(T5, AT, 0);
+    __ addi_d(AT, T5, -SafepointSynchronize::_not_synchronized);
+    Label L;
+    __ bne(AT, R0, L);
+    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
+    __ beq(AT, R0, Continue);
+    __ bind(L);
+
+    // Don't use call_VM as it will see a possible pending exception and forward it
+    // and never return here preventing us from clearing _last_native_pc down below.
+    //
+    save_native_result(masm, ret_type, stack_slots);
+    __ move(A0, thread);
+    __ addi_d(SP, SP, -wordSize);
+    __ push(S2);
+    __ li(AT, -(StackAlignmentInBytes));
+    __ move(S2, SP);     // use S2 as a sender SP holder
+    __ andr(SP, SP, AT); // align stack as required by ABI
+    if (!is_critical_native) {
+      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
+    } else {
+      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
+    }
+    __ move(SP, S2);     // use S2 as a sender SP holder
+    __ pop(S2);
+    __ addi_d(SP, SP, wordSize);
+    //add for compressedoops
+    __ reinit_heapbase();
+    // Restore any method result value
+    restore_native_result(masm, ret_type, stack_slots);
+
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ beq(R0, R0, after_transition);
+    }
+
+    __ bind(Continue);
+  }
+
+  // change thread state
+  __ addi_d(AT, R0, _thread_in_Java);
+  if (os::is_MP()) {
+    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
+  }
+  __ st_w(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
+  __ bind(after_transition);
+  Label reguard;
+  Label reguard_done;
+  __ ld_w(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
+  __ addi_d(AT, AT, -JavaThread::stack_guard_yellow_disabled);
+  __ beq(AT, R0, reguard);
+  // slow path reguard  re-enters here
+  __ bind(reguard_done);
+
+  // Handle possible exception (will unlock if necessary)
+
+  // native result if any is live
+
+  // Unlock
+  Label slow_path_unlock;
+  Label unlock_done;
+  if (method->is_synchronized()) {
+
+    Label done;
+
+    // Get locked oop from the handle we passed to jni
+    __ ld_d( obj_reg, oop_handle_reg, 0);
+    if (UseBiasedLocking) {
+      __ biased_locking_exit(obj_reg, T8, done);
+
+    }
+
+    // Simple recursive lock?
+
+    __ ld_d(AT, FP, lock_slot_fp_offset);
+    __ beq(AT, R0, done);
+    // Must save FSF if if it is live now because cmpxchg must use it
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+
+    //  get old displaced header
+    __ ld_d (T8, FP, lock_slot_fp_offset);
+    // get address of the stack lock
+    __ addi_d (c_rarg0, FP, lock_slot_fp_offset);
+    // Atomic swap old header if oop still contains the stack lock
+    __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock);
+
+    // slow path re-enters here
+    __ bind(unlock_done);
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+
+    __ bind(done);
+
+  }
+  {
+    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
+    // Tell dtrace about this method exit
+    save_native_result(masm, ret_type, stack_slots);
+    int metadata_index = __ oop_recorder()->find_index( (method()));
+    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
+    __ relocate(rspec);
+    __ patchable_li52(AT, (long)(method()));
+
+    __ call_VM_leaf(
+         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+         thread, AT);
+    restore_native_result(masm, ret_type, stack_slots);
+  }
+
+  // We can finally stop using that last_Java_frame we setup ages ago
+
+  __ reset_last_Java_frame(false);
+
+  // Unpack oop result, e.g. JNIHandles::resolve value.
+  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+    __ resolve_jobject(V0, thread, T4);
+  }
+
+  if (!is_critical_native) {
+    // reset handle block
+    __ ld_d(AT, thread, in_bytes(JavaThread::active_handles_offset()));
+    __ st_w(R0, AT, JNIHandleBlock::top_offset_in_bytes());
+  }
+
+  if (!is_critical_native) {
+    // Any exception pending?
+    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
+    __ bne(AT, R0, exception_pending);
+  }
+  // no exception, we're almost done
+
+  // check that only result value is on FPU stack
+  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
+
+  // Return
+#ifndef OPT_THREAD
+  __ get_thread(TREG);
+#endif
+  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
+  __ leave();
+
+  __ jr(RA);
+  // Unexpected paths are out of line and go here
+  // Slow path locking & unlocking
+  if (method->is_synchronized()) {
+
+    // BEGIN Slow path lock
+    __ bind(slow_path_lock);
+
+    // protect the args we've loaded
+    save_args(masm, total_c_args, c_arg, out_regs);
+
+    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
+
+    __ move(A0, obj_reg);
+    __ move(A1, lock_reg);
+    __ move(A2, thread);
+    __ addi_d(SP, SP, - 3*wordSize);
+
+    __ li(AT, -(StackAlignmentInBytes));
+    __ move(S2, SP);     // use S2 as a sender SP holder
+    __ andr(SP, SP, AT); // align stack as required by ABI
+
+    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
+    __ move(SP, S2);
+    __ addi_d(SP, SP, 3*wordSize);
+
+    restore_args(masm, total_c_args, c_arg, out_regs);
+
+#ifdef ASSERT
+    { Label L;
+      __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
+      __ beq(AT, R0, L);
+      __ stop("no pending exception allowed on exit from monitorenter");
+      __ bind(L);
+    }
+#endif
+    __ b(lock_done);
+    // END Slow path lock
+
+    // BEGIN Slow path unlock
+    __ bind(slow_path_unlock);
+
+    // Slow path unlock
+
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
+
+    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
+    __ push(AT);
+    __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset()));
+
+    __ li(AT, -(StackAlignmentInBytes));
+    __ move(S2, SP);     // use S2 as a sender SP holder
+    __ andr(SP, SP, AT); // align stack as required by ABI
+
+    // should be a peal
+    // +wordSize because of the push above
+    __ addi_d(A1, FP, lock_slot_fp_offset);
+
+    __ move(A0, obj_reg);
+    __ addi_d(SP,SP, -2*wordSize);
+    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
+        relocInfo::runtime_call_type);
+    __ addi_d(SP, SP, 2*wordSize);
+    __ move(SP, S2);
+    //add for compressedoops
+    __ reinit_heapbase();
+#ifdef ASSERT
+    {
+      Label L;
+      __ ld_d( AT, thread, in_bytes(Thread::pending_exception_offset()));
+      __ beq(AT, R0, L);
+      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
+      __ bind(L);
+    }
+#endif /* ASSERT */
+
+    __ pop(AT);
+    __ st_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+    __ b(unlock_done);
+    // END Slow path unlock
+
+  }
+
+  // SLOW PATH Reguard the stack if needed
+
+  __ bind(reguard);
+  save_native_result(masm, ret_type, stack_slots);
+  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
+      relocInfo::runtime_call_type);
+  //add for compressedoops
+  __ reinit_heapbase();
+  restore_native_result(masm, ret_type, stack_slots);
+  __ b(reguard_done);
+
+  // BEGIN EXCEPTION PROCESSING
+  if (!is_critical_native) {
+    // Forward  the exception
+    __ bind(exception_pending);
+
+    // remove possible return value from FPU register stack
+    __ empty_FPU_stack();
+
+    // pop our frame
+    //forward_exception_entry need return address on stack
+    __ move(SP, FP);
+    __ pop(FP);
+
+    // and forward the exception
+    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+  }
+  __ flush();
+
+  nmethod *nm = nmethod::new_native_nmethod(method,
+                                            compile_id,
+                                            masm->code(),
+                                            vep_offset,
+                                            frame_complete,
+                                            stack_slots / VMRegImpl::slots_per_word,
+                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
+                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
+                                            oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
+  return nm;
+}
+
+#ifdef HAVE_DTRACE_H
+// ---------------------------------------------------------------------------
+// Generate a dtrace nmethod for a given signature.  The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// abi and then leaves nops at the position you would expect to call a native
+// function. When the probe is enabled the nops are replaced with a trap
+// instruction that dtrace inserts and the trace will cause a notification
+// to dtrace.
+//
+// The probes are only able to take primitive types and java/lang/String as
+// arguments.  No other java types are allowed. Strings are converted to utf8
+// strings so that from dtrace point of view java strings are converted to C
+// strings. There is an arbitrary fixed limit on the total space that a method
+// can use for converting the strings. (256 chars per string in the signature).
+// So any java string larger then this is truncated.
+
+static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
+static bool offsets_initialized = false;
+
+static VMRegPair reg64_to_VMRegPair(Register r) {
+  VMRegPair ret;
+  if (wordSize == 8) {
+    ret.set2(r->as_VMReg());
+  } else {
+    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
+  }
+  return ret;
+}
+
+
+nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
+                                                methodHandle method) {
+
+
+  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
+  // be single threaded in this method.
+  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
+
+  // Fill in the signature array, for the calling-convention call.
+  int total_args_passed = method->size_of_parameters();
+
+  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
+  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
+
+  // The signature we are going to use for the trap that dtrace will see
+  // java/lang/String is converted. We drop "this" and any other object
+  // is converted to NULL.  (A one-slot java/lang/Long object reference
+  // is converted to a two-slot long, which is why we double the allocation).
+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
+
+  int i=0;
+  int total_strings = 0;
+  int first_arg_to_pass = 0;
+  int total_c_args = 0;
+
+  // Skip the receiver as dtrace doesn't want to see it
+  if( !method->is_static() ) {
+    in_sig_bt[i++] = T_OBJECT;
+    first_arg_to_pass = 1;
+  }
+
+  SignatureStream ss(method->signature());
+  for ( ; !ss.at_return_type(); ss.next()) {
+    BasicType bt = ss.type();
+    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
+    out_sig_bt[total_c_args++] = bt;
+    if( bt == T_OBJECT) {
+      symbolOop s = ss.as_symbol_or_null();
+      if (s == vmSymbols::java_lang_String()) {
+        total_strings++;
+        out_sig_bt[total_c_args-1] = T_ADDRESS;
+      } else if (s == vmSymbols::java_lang_Boolean() ||
+                 s == vmSymbols::java_lang_Byte()) {
+        out_sig_bt[total_c_args-1] = T_BYTE;
+      } else if (s == vmSymbols::java_lang_Character() ||
+                 s == vmSymbols::java_lang_Short()) {
+        out_sig_bt[total_c_args-1] = T_SHORT;
+      } else if (s == vmSymbols::java_lang_Integer() ||
+                 s == vmSymbols::java_lang_Float()) {
+        out_sig_bt[total_c_args-1] = T_INT;
+      } else if (s == vmSymbols::java_lang_Long() ||
+                 s == vmSymbols::java_lang_Double()) {
+        out_sig_bt[total_c_args-1] = T_LONG;
+        out_sig_bt[total_c_args++] = T_VOID;
+      }
+    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
+      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
+      // We convert double to long
+      out_sig_bt[total_c_args-1] = T_LONG;
+      out_sig_bt[total_c_args++] = T_VOID;
+    } else if ( bt == T_FLOAT) {
+      // We convert float to int
+      out_sig_bt[total_c_args-1] = T_INT;
+    }
+  }
+
+  assert(i==total_args_passed, "validly parsed signature");
+
+  // Now get the compiled-Java layout as input arguments
+  int comp_args_on_stack;
+  comp_args_on_stack = SharedRuntime::java_calling_convention(
+      in_sig_bt, in_regs, total_args_passed, false);
+
+  // We have received a description of where all the java arg are located
+  // on entry to the wrapper. We need to convert these args to where
+  // the a  native (non-jni) function would expect them. To figure out
+  // where they go we convert the java signature to a C signature and remove
+  // T_VOID for any long/double we might have received.
+
+
+  // Now figure out where the args must be stored and how much stack space
+  // they require (neglecting out_preserve_stack_slots but space for storing
+  // the 1st six register arguments). It's weird see int_stk_helper.
+
+  int out_arg_slots;
+  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+  // Calculate the total number of stack slots we will need.
+
+  // First count the abi requirement plus all of the outgoing args
+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+
+  // Plus a temp for possible converion of float/double/long register args
+
+  int conversion_temp = stack_slots;
+  stack_slots += 2;
+
+
+  // Now space for the string(s) we must convert
+
+  int string_locs = stack_slots;
+  stack_slots += total_strings *
+                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
+
+  // Ok The space we have allocated will look like:
+  //
+  //
+  // FP-> |                     |
+  //      |---------------------|
+  //      | string[n]           |
+  //      |---------------------| <- string_locs[n]
+  //      | string[n-1]         |
+  //      |---------------------| <- string_locs[n-1]
+  //      | ...                 |
+  //      | ...                 |
+  //      |---------------------| <- string_locs[1]
+  //      | string[0]           |
+  //      |---------------------| <- string_locs[0]
+  //      | temp                |
+  //      |---------------------| <- conversion_temp
+  //      | outbound memory     |
+  //      | based arguments     |
+  //      |                     |
+  //      |---------------------|
+  //      |                     |
+  // SP-> | out_preserved_slots |
+  //
+  //
+
+  // Now compute actual number of stack words we need rounding to make
+  // stack properly aligned.
+  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
+
+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+  intptr_t start = (intptr_t)__ pc();
+
+  // First thing make an ic check to see if we should even be here
+
+  {
+    Label L;
+    const Register temp_reg = G3_scratch;
+    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
+    __ verify_oop(O0);
+    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
+    __ cmp(temp_reg, G5_inline_cache_reg);
+    __ brx(Assembler::equal, true, Assembler::pt, L);
+
+    __ jump_to(ic_miss, 0);
+    __ align(CodeEntryAlignment);
+    __ bind(L);
+  }
+
+  int vep_offset = ((intptr_t)__ pc()) - start;
+
+  // The instruction at the verified entry point must be 4 bytes or longer
+  // because it can be patched on the fly by make_non_entrant. The stack bang
+  // instruction fits that requirement.
+
+  // Generate stack overflow check before creating frame
+  __ generate_stack_overflow_check(stack_size);
+
+  assert(((intptr_t)__ pc() - start - vep_offset) >= 1 * BytesPerInstWord,
+         "valid size for make_non_entrant");
+
+  // Generate a new frame for the wrapper.
+  __ save(SP, -stack_size, SP);
+
+  // Frame is now completed as far a size and linkage.
+
+  int frame_complete = ((intptr_t)__ pc()) - start;
+
+#ifdef ASSERT
+  bool reg_destroyed[RegisterImpl::number_of_registers];
+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
+    reg_destroyed[r] = false;
+  }
+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
+    freg_destroyed[f] = false;
+  }
+
+#endif /* ASSERT */
+
+  VMRegPair zero;
+  const Register g0 = G0; // without this we get a compiler warning (why??)
+  zero.set2(g0->as_VMReg());
+
+  int c_arg, j_arg;
+
+  Register conversion_off = noreg;
+
+  for (j_arg = first_arg_to_pass, c_arg = 0 ;
+       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
+
+    VMRegPair src = in_regs[j_arg];
+    VMRegPair dst = out_regs[c_arg];
+
+#ifdef ASSERT
+    if (src.first()->is_Register()) {
+      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
+    } else if (src.first()->is_FloatRegister()) {
+      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
+                                               FloatRegisterImpl::S)], "ack!");
+    }
+    if (dst.first()->is_Register()) {
+      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
+    } else if (dst.first()->is_FloatRegister()) {
+      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
+                                                 FloatRegisterImpl::S)] = true;
+    }
+#endif /* ASSERT */
+
+    switch (in_sig_bt[j_arg]) {
+      case T_ARRAY:
+      case T_OBJECT:
+        {
+          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
+              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
+            // need to unbox a one-slot value
+            Register in_reg = L0;
+            Register tmp = L2;
+            if ( src.first()->is_reg() ) {
+              in_reg = src.first()->as_Register();
+            } else {
+              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
+                     "must be");
+              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
+            }
+            // If the final destination is an acceptable register
+            if ( dst.first()->is_reg() ) {
+              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
+                tmp = dst.first()->as_Register();
+              }
+            }
+
+            Label skipUnbox;
+            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
+              __ mov(G0, tmp->successor());
+            }
+            __ mov(G0, tmp);
+            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
+
+            BasicType bt = out_sig_bt[c_arg];
+            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
+            switch (bt) {
+                case T_BYTE:
+                  __ ldub(in_reg, box_offset, tmp); break;
+                case T_SHORT:
+                  __ lduh(in_reg, box_offset, tmp); break;
+                case T_INT:
+                  __ ld(in_reg, box_offset, tmp); break;
+                case T_LONG:
+                  __ ld_long(in_reg, box_offset, tmp); break;
+                default: ShouldNotReachHere();
+            }
+
+            __ bind(skipUnbox);
+            // If tmp wasn't final destination copy to final destination
+            if (tmp == L2) {
+              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
+              if (out_sig_bt[c_arg] == T_LONG) {
+                long_move(masm, tmp_as_VM, dst);
+              } else {
+                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
+              }
+            }
+            if (out_sig_bt[c_arg] == T_LONG) {
+              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
+              ++c_arg; // move over the T_VOID to keep the loop indices in sync
+            }
+          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
+            Register s =
+                src.first()->is_reg() ? src.first()->as_Register() : L2;
+            Register d =
+                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
+
+            // We store the oop now so that the conversion pass can reach
+            // while in the inner frame. This will be the only store if
+            // the oop is NULL.
+            if (s != L2) {
+              // src is register
+              if (d != L2) {
+                // dst is register
+                __ mov(s, d);
+              } else {
+                assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                          STACK_BIAS), "must be");
+                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
+              }
+            } else {
+                // src not a register
+                assert(Assembler::is_simm13(reg2offset(src.first()) +
+                           STACK_BIAS), "must be");
+                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
+                if (d == L2) {
+                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                             STACK_BIAS), "must be");
+                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
+                }
+            }
+          } else if (out_sig_bt[c_arg] != T_VOID) {
+            // Convert the arg to NULL
+            if (dst.first()->is_reg()) {
+              __ mov(G0, dst.first()->as_Register());
+            } else {
+              assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                         STACK_BIAS), "must be");
+              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
+            }
+          }
+        }
+        break;
+      case T_VOID:
+        break;
+
+      case T_FLOAT:
+        if (src.first()->is_stack()) {
+          // Stack to stack/reg is simple
+          move32_64(masm, src, dst);
+        } else {
+          if (dst.first()->is_reg()) {
+            // freg -> reg
+            int off =
+              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+            Register d = dst.first()->as_Register();
+            if (Assembler::is_simm13(off)) {
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, off);
+              __ ld(SP, off, d);
+            } else {
+              if (conversion_off == noreg) {
+                __ set(off, L6);
+                conversion_off = L6;
+              }
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, conversion_off);
+              __ ld(SP, conversion_off , d);
+            }
+          } else {
+            // freg -> mem
+            int off = STACK_BIAS + reg2offset(dst.first());
+            if (Assembler::is_simm13(off)) {
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, off);
+            } else {
+              if (conversion_off == noreg) {
+                __ set(off, L6);
+                conversion_off = L6;
+              }
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, conversion_off);
+            }
+          }
+        }
+        break;
+
+      case T_DOUBLE:
+        assert( j_arg + 1 < total_args_passed &&
+                in_sig_bt[j_arg + 1] == T_VOID &&
+                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
+        if (src.first()->is_stack()) {
+          // Stack to stack/reg is simple
+          long_move(masm, src, dst);
+        } else {
+          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
+
+          // Destination could be an odd reg on 32bit in which case
+          // we can't load direct to the destination.
+
+          if (!d->is_even() && wordSize == 4) {
+            d = L2;
+          }
+          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+          if (Assembler::is_simm13(off)) {
+            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
+                   SP, off);
+            __ ld_long(SP, off, d);
+          } else {
+            if (conversion_off == noreg) {
+              __ set(off, L6);
+              conversion_off = L6;
+            }
+            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
+                   SP, conversion_off);
+            __ ld_long(SP, conversion_off, d);
+          }
+          if (d == L2) {
+            long_move(masm, reg64_to_VMRegPair(L2), dst);
+          }
+        }
+        break;
+
+      case T_LONG :
+        // 32bit can't do a split move of something like g1 -> O0, O1
+        // so use a memory temp
+        if (src.is_single_phys_reg() && wordSize == 4) {
+          Register tmp = L2;
+          if (dst.first()->is_reg() &&
+              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
+            tmp = dst.first()->as_Register();
+          }
+
+          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+          if (Assembler::is_simm13(off)) {
+            __ stx(src.first()->as_Register(), SP, off);
+            __ ld_long(SP, off, tmp);
+          } else {
+            if (conversion_off == noreg) {
+              __ set(off, L6);
+              conversion_off = L6;
+            }
+            __ stx(src.first()->as_Register(), SP, conversion_off);
+            __ ld_long(SP, conversion_off, tmp);
+          }
+
+          if (tmp == L2) {
+            long_move(masm, reg64_to_VMRegPair(L2), dst);
+          }
+        } else {
+          long_move(masm, src, dst);
+        }
+        break;
+
+      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
+
+      default:
+        move32_64(masm, src, dst);
+    }
+  }
+
+
+  // If we have any strings we must store any register based arg to the stack
+  // This includes any still live xmm registers too.
+
+  if (total_strings > 0 ) {
+
+    // protect all the arg registers
+    __ save_frame(0);
+    __ mov(G2_thread, L7_thread_cache);
+    const Register L2_string_off = L2;
+
+    // Get first string offset
+    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
+
+    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
+      if (out_sig_bt[c_arg] == T_ADDRESS) {
+
+        VMRegPair dst = out_regs[c_arg];
+        const Register d = dst.first()->is_reg() ?
+            dst.first()->as_Register()->after_save() : noreg;
+
+        // It's a string the oop and it was already copied to the out arg
+        // position
+        if (d != noreg) {
+          __ mov(d, O0);
+        } else {
+          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
+                 "must be");
+          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
+        }
+        Label skip;
+
+        __ add_d(FP, L2_string_off, O1);
+        __ br_null(O0, false, Assembler::pn, skip);
+
+        if (d != noreg) {
+          __ mov(O1, d);
+        } else {
+          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
+                 "must be");
+          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
+        }
+
+        __ addi_d(L2_string_off, max_dtrace_string_size, L2_string_off);
+        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
+                relocInfo::runtime_call_type);
+
+        __ bind(skip);
+
+      }
+
+    }
+    __ mov(L7_thread_cache, G2_thread);
+    __ restore();
+
+  }
+
+
+  // Ok now we are done. Need to place the nop that dtrace wants in order to
+  // patch in the trap
+
+  int patch_offset = ((intptr_t)__ pc()) - start;
+
+  __ nop();
+
+
+  // Return
+
+  __ restore();
+  __ ret();
+
+  __ flush();
+  nmethod *nm = nmethod::new_dtrace_nmethod(
+      method, masm->code(), vep_offset, patch_offset, frame_complete,
+      stack_slots / VMRegImpl::slots_per_word);
+  return nm;
+}
+
+#endif // HAVE_DTRACE_H
+
+// this function returns the adjust size (in number of words) to a c2i adapter
+// activation for use during deoptimization
+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
+  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
+}
+
+// "Top of Stack" slots that may be unused by the calling convention but must
+// otherwise be preserved.
+// On Intel these are not necessary and the value can be zero.
+// On Sparc this describes the words reserved for storing a register window
+// when an interrupt occurs.
+uint SharedRuntime::out_preserve_stack_slots() {
+   return 0;
+}
+
+//------------------------------generate_deopt_blob----------------------------
+// Ought to generate an ideal graph & compile, but here's some SPARC ASM
+// instead.
+void SharedRuntime::generate_deopt_blob() {
+  // allocate space for the code
+  ResourceMark rm;
+  // setup code generation tools
+  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
+  CodeBuffer     buffer ("deopt_blob", 8000, 2048); // FIXME for debug
+  MacroAssembler* masm  = new MacroAssembler( & buffer);
+  int frame_size_in_words;
+  OopMap* map = NULL;
+  // Account for the extra args we place on the stack
+  // by the time we call fetch_unroll_info
+  const int additional_words = 2; // deopt kind, thread
+
+  OopMapSet *oop_maps = new OopMapSet();
+  RegisterSaver reg_save(false);
+
+  address start = __ pc();
+  Label cont;
+  // we use S3 for DeOpt reason register
+  Register reason = S3;
+  // use S6 for thread register
+  Register thread = TREG;
+  // use S7 for fetch_unroll_info returned UnrollBlock
+  Register unroll = S7;
+  // Prolog for non exception case!
+
+  // We have been called from the deopt handler of the deoptee.
+  //
+  // deoptee:
+  //                      ...
+  //                      call X
+  //                      ...
+  //  deopt_handler:      call_deopt_stub
+  //  cur. return pc  --> ...
+  //
+  // So currently RA points behind the call in the deopt handler.
+  // We adjust it such that it points to the start of the deopt handler.
+  // The return_pc has been stored in the frame of the deoptee and
+  // will replace the address of the deopt_handler in the call
+  // to Deoptimization::fetch_unroll_info below.
+
+  // HandlerImpl::size_deopt_handler()
+  __ addi_d(RA, RA, - NativeFarCall::instruction_size);
+  // Save everything in sight.
+  map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
+  // Normal deoptimization
+  __ li(reason, Deoptimization::Unpack_deopt);
+  __ b(cont);
+
+  int reexecute_offset = __ pc() - start;
+
+  // Reexecute case
+  // return address is the pc describes what bci to do re-execute at
+
+  // No need to update map as each call to save_live_registers will produce identical oopmap
+  (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
+  __ li(reason, Deoptimization::Unpack_reexecute);
+  __ b(cont);
+
+  int   exception_offset = __ pc() - start;
+  // Prolog for exception case
+
+  // all registers are dead at this entry point, except for V0 and
+  // V1 which contain the exception oop and exception pc
+  // respectively.  Set them in TLS and fall thru to the
+  // unpack_with_exception_in_tls entry point.
+
+  __ get_thread(thread);
+  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
+  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
+  int exception_in_tls_offset = __ pc() - start;
+  // new implementation because exception oop is now passed in JavaThread
+
+  // Prolog for exception case
+  // All registers must be preserved because they might be used by LinearScan
+  // Exceptiop oop and throwing PC are passed in JavaThread
+  // tos: stack at point of call to method that threw the exception (i.e. only
+  // args are on the stack, no return address)
+
+  // Return address will be patched later with the throwing pc. The correct value is not
+  // available now because loading it from memory would destroy registers.
+  // Save everything in sight.
+  // No need to update map as each call to save_live_registers will produce identical oopmap
+  (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
+
+  // Now it is safe to overwrite any register
+  // store the correct deoptimization type
+  __ li(reason, Deoptimization::Unpack_exception);
+  // load throwing pc from JavaThread and patch it as the return address
+  // of the current frame. Then clear the field in JavaThread
+  __ get_thread(thread);
+  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
+  __ st_ptr(V1, SP, reg_save.ra_offset()); //save ra
+  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
+
+
+#ifdef ASSERT
+  // verify that there is really an exception oop in JavaThread
+  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
+  __ verify_oop(AT);
+  // verify that there is no pending exception
+  Label no_pending_exception;
+  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
+  __ beq(AT, R0, no_pending_exception);
+  __ stop("must not have pending exception here");
+  __ bind(no_pending_exception);
+#endif
+  __ bind(cont);
+  // Compiled code leaves the floating point stack dirty, empty it.
+  __ empty_FPU_stack();
+
+
+  // Call C code.  Need thread and this frame, but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+  __ move(A0, thread);
+  __ addi_d(SP, SP, -additional_words * wordSize);
+
+  Label retaddr;
+  __ set_last_Java_frame(NOREG, NOREG, retaddr);
+
+  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
+  // this call, no GC can happen.  Call should capture return values.
+
+  // TODO: confirm reloc
+  __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
+  __ bind(retaddr);
+  oop_maps->add_gc_map(__ pc() - start, map);
+  __ addi_d(SP, SP, additional_words * wordSize);
+  __ get_thread(thread);
+  __ reset_last_Java_frame(false);
+
+  // Load UnrollBlock into S7
+  __ move(unroll, V0);
+
+
+  // Move the unpack kind to a safe place in the UnrollBlock because
+  // we are very short of registers
+
+  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
+  __ st_w(reason, unpack_kind);
+  // save the unpack_kind value
+  // Retrieve the possible live values (return values)
+  // All callee save registers representing jvm state
+  // are now in the vframeArray.
+
+  Label noException;
+  __ li(AT, Deoptimization::Unpack_exception);
+  __ bne(AT, reason, noException);// Was exception pending?
+  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
+  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
+  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
+  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
+
+  __ verify_oop(V0);
+
+  // Overwrite the result registers with the exception results.
+  __ st_ptr(V0, SP, reg_save.v0_offset());
+  __ st_ptr(V1, SP, reg_save.v1_offset());
+
+  __ bind(noException);
+
+
+  // Stack is back to only having register save data on the stack.
+  // Now restore the result registers. Everything else is either dead or captured
+  // in the vframeArray.
+
+  reg_save.restore_result_registers(masm);
+  // All of the register save area has been popped of the stack. Only the
+  // return address remains.
+  // Pop all the frames we must move/replace.
+  // Frame picture (youngest to oldest)
+  // 1: self-frame (no frame link)
+  // 2: deopting frame  (no frame link)
+  // 3: caller of deopting frame (could be compiled/interpreted).
+  //
+  // Note: by leaving the return address of self-frame on the stack
+  // and using the size of frame 2 to adjust the stack
+  // when we are done the return to frame 3 will still be on the stack.
+
+  // register for the sender's sp
+  Register sender_sp = Rsender;
+  // register for frame pcs
+  Register pcs = T0;
+  // register for frame sizes
+  Register sizes = T1;
+  // register for frame count
+  Register count = T3;
+
+  // Pop deoptimized frame
+  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
+  __ add_d(SP, SP, AT);
+  // sp should be pointing at the return address to the caller (3)
+
+  // Load array of frame pcs into pcs
+  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
+  __ addi_d(SP, SP, wordSize);  // trash the old pc
+  // Load array of frame sizes into T6
+  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
+
+
+
+  // Load count of frams into T3
+  __ ld_w(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
+  // Pick up the initial fp we should save
+  __ ld_d(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
+   // Now adjust the caller's stack to make up for the extra locals
+  // but record the original sp so that we can save it in the skeletal interpreter
+  // frame and the stack walking of interpreter_sender will get the unextended sp
+  // value and not the "real" sp value.
+  __ move(sender_sp, SP);
+  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
+  __ sub_d(SP, SP, AT);
+
+    Label loop;
+  __ bind(loop);
+  __ ld_d(T2, sizes, 0);    // Load frame size
+  __ ld_ptr(AT, pcs, 0);           // save return address
+  __ addi_d(T2, T2, -2 * wordSize);           // we'll push pc and fp, by hand
+  __ push2(AT, FP);
+  __ move(FP, SP);
+  __ sub_d(SP, SP, T2);       // Prolog!
+  // This value is corrected by layout_activation_impl
+  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
+  __ move(sender_sp, SP);  // pass to next frame
+  __ addi_d(count, count, -1);   // decrement counter
+  __ addi_d(sizes, sizes, wordSize);   // Bump array pointer (sizes)
+  __ addi_d(pcs, pcs, wordSize);   // Bump array pointer (pcs)
+  __ bne(count, R0, loop);
+  __ ld_d(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
+  // Re-push self-frame
+  __ push2(AT, FP);
+  __ move(FP, SP);
+  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
+  __ addi_d(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
+
+  // Restore frame locals after moving the frame
+  __ st_d(V0, SP, reg_save.v0_offset());
+  __ st_d(V1, SP, reg_save.v1_offset());
+  __ fst_d(F0, SP, reg_save.fpr0_offset());
+  __ fst_d(F1, SP, reg_save.fpr1_offset());
+
+  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
+  // this call, no GC can happen.
+  __ move(A1, reason);  // exec_mode
+  __ get_thread(thread);
+  __ move(A0, thread);  // thread
+  __ addi_d(SP, SP, (-additional_words) *wordSize);
+
+  // set last_Java_sp, last_Java_fp
+  Label L;
+  address the_pc = __ pc();
+  __ bind(L);
+  __ set_last_Java_frame(NOREG, FP, L);
+
+  __ li(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
+
+  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
+  // Revert SP alignment after call since we're going to do some SP relative addressing below
+  __ ld_d(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // Set an oopmap for the call site
+  oop_maps->add_gc_map(the_pc - start, new OopMap(frame_size_in_words, 0));
+
+  __ push(V0);
+
+  __ get_thread(thread);
+  __ reset_last_Java_frame(true);
+
+  // Collect return values
+  __ ld_d(V0, SP, reg_save.v0_offset() + (additional_words + 1) * wordSize);
+  __ ld_d(V1, SP, reg_save.v1_offset() + (additional_words + 1) * wordSize);
+  // Pop float stack and store in local
+  __ fld_d(F0, SP, reg_save.fpr0_offset() + (additional_words + 1) * wordSize);
+  __ fld_d(F1, SP, reg_save.fpr1_offset() + (additional_words + 1) * wordSize);
+
+  //FIXME,
+  // Clear floating point stack before returning to interpreter
+  __ empty_FPU_stack();
+  //FIXME, we should consider about float and double
+  // Push a float or double return value if necessary.
+  __ leave();
+
+  // Jump to interpreter
+  __ jr(RA);
+
+  masm->flush();
+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+}
+
+#ifdef COMPILER2
+
+//------------------------------generate_uncommon_trap_blob--------------------
+// Ought to generate an ideal graph & compile, but here's some SPARC ASM
+// instead.
+void SharedRuntime::generate_uncommon_trap_blob() {
+  // allocate space for the code
+  ResourceMark rm;
+  // setup code generation tools
+  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  enum frame_layout {
+    fp_off, fp_off2,
+    return_off, return_off2,
+    framesize
+  };
+  assert(framesize % 4 == 0, "sp not 16-byte aligned");
+  address start = __ pc();
+
+  // Push self-frame.
+  __ addi_d(SP, SP, -framesize * BytesPerInt);
+
+  __ st_d(RA, SP, return_off * BytesPerInt);
+  __ st_d(FP, SP, fp_off * BytesPerInt);
+
+  __ addi_d(FP, SP, fp_off * BytesPerInt);
+
+  // Clear the floating point exception stack
+  __ empty_FPU_stack();
+
+  Register thread = TREG;
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  // set last_Java_sp
+  Label retaddr;
+  __ set_last_Java_frame(NOREG, FP, retaddr);
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // capture callee-saved registers as well as return values.
+  __ move(A0, thread);
+  // argument already in T0
+  __ move(A1, T0);
+  __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type);
+  __ bind(retaddr);
+
+  // Set an oopmap for the call site
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map =  new OopMap( framesize, 0 );
+
+  oop_maps->add_gc_map(__ pc() - start, map);
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ reset_last_Java_frame(false);
+
+  // Load UnrollBlock into S7
+  Register unroll = S7;
+  __ move(unroll, V0);
+
+  // Pop all the frames we must move/replace.
+  //
+  // Frame picture (youngest to oldest)
+  // 1: self-frame (no frame link)
+  // 2: deopting frame  (no frame link)
+  // 3: possible-i2c-adapter-frame
+  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
+  //    and c2i here)
+
+  __ addi_d(SP, SP, framesize * BytesPerInt);
+
+  // Pop deoptimized frame
+  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
+  __ add_d(SP, SP, AT);
+
+  // register for frame pcs
+  Register pcs = T8;
+  // register for frame sizes
+  Register sizes = T4;
+  // register for frame count
+  Register count = T3;
+  // register for the sender's sp
+  Register sender_sp = T1;
+
+  // sp should be pointing at the return address to the caller (4)
+  // Load array of frame pcs
+  __ ld_d(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
+
+  // Load array of frame sizes
+  __ ld_d(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
+  __ ld_wu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
+
+  // Pick up the initial fp we should save
+  __ ld_d(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
+  // Now adjust the caller's stack to make up for the extra locals
+  // but record the original sp so that we can save it in the skeletal interpreter
+  // frame and the stack walking of interpreter_sender will get the unextended sp
+  // value and not the "real" sp value.
+
+  __ move(sender_sp, SP);
+  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
+  __ sub_d(SP, SP, AT);
+  // Push interpreter frames in a loop
+  Label loop;
+  __ bind(loop);
+  __ ld_d(T2, sizes, 0);          // Load frame size
+  __ ld_d(AT, pcs, 0);           // save return address
+  __ addi_d(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
+  __ push2(AT, FP);
+  __ move(FP, SP);
+  __ sub_d(SP, SP, T2);                   // Prolog!
+  // This value is corrected by layout_activation_impl
+  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ st_d(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
+  __ move(sender_sp, SP);       // pass to next frame
+  __ addi_d(count, count, -1);    // decrement counter
+  __ addi_d(sizes, sizes, wordSize);     // Bump array pointer (sizes)
+  __ addi_d(pcs, pcs, wordSize);      // Bump array pointer (pcs)
+  __ bne(count, R0, loop);
+
+  __ ld_d(RA, pcs, 0);
+
+  // Re-push self-frame
+  // save old & set new FP
+  // save final return address
+  __ enter();
+
+  // Use FP because the frames look interpreted now
+  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
+  // Don't need the precise return PC here, just precise enough to point into this code blob.
+  Label L;
+  address the_pc = __ pc();
+  __ bind(L);
+  __ set_last_Java_frame(NOREG, FP, L);
+
+  __ li(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
+
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // restore return values to their stack-slots with the new SP.
+  __ move(A0, thread);
+  __ li(A1, Deoptimization::Unpack_uncommon_trap);
+  __ call((address)Deoptimization::unpack_frames, relocInfo::runtime_call_type);
+  // Set an oopmap for the call site
+  oop_maps->add_gc_map(the_pc - start, new OopMap(framesize, 0));
+
+  __ reset_last_Java_frame(true);
+
+  // Pop self-frame.
+  __ leave();     // Epilog!
+
+  // Jump to interpreter
+  __ jr(RA);
+  // -------------
+  // make sure all code is generated
+  masm->flush();
+  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
+}
+
+#endif // COMPILER2
+
+//------------------------------generate_handler_blob-------------------
+//
+// Generate a special Compile2Runtime blob that saves all registers, and sets
+// up an OopMap and calls safepoint code to stop the compiled code for
+// a safepoint.
+//
+// This blob is jumped to (via a breakpoint and the signal handler) from a
+// safepoint in compiled code.
+
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
+
+  // Account for thread arg in our frame
+  const int additional_words = 0;
+  int frame_size_in_words;
+
+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+  ResourceMark rm;
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map;
+
+  // allocate space for the code
+  // setup code generation tools
+  CodeBuffer  buffer ("handler_blob", 2048, 512);
+  MacroAssembler* masm = new MacroAssembler( &buffer);
+
+  const Register thread = TREG;
+  address start   = __ pc();
+  bool cause_return = (poll_type == POLL_AT_RETURN);
+  RegisterSaver reg_save(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
+
+  // If cause_return is true we are at a poll_return and there is
+  // the return address in RA to the caller on the nmethod
+  // that is safepoint. We can leave this return in RA and
+  // effectively complete the return and safepoint in the caller.
+  // Otherwise we load exception pc to RA.
+  __ push(thread);
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+  if(!cause_return) {
+    __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
+  }
+
+  __ pop(thread);
+  map = reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+  // The following is basically a call_VM. However, we need the precise
+  // address of the call in order to generate an oopmap. Hence, we do all the
+  // work outselvs.
+
+  __ move(A0, thread);
+  Label retaddr;
+  __ set_last_Java_frame(NOREG, NOREG, retaddr);
+
+  // Do the call
+  // TODO: confirm reloc
+  __ call(call_ptr, relocInfo::runtime_call_type);
+  __ bind(retaddr);
+
+  // Set an oopmap for the call site.  This oopmap will map all
+  // oop-registers and debug-info registers as callee-saved.  This
+  // will allow deoptimization at this safepoint to find all possible
+  // debug-info recordings, as well as let GC find all oops.
+  oop_maps->add_gc_map(__ pc() - start, map);
+
+  Label noException;
+
+  // Clear last_Java_sp again
+  __ reset_last_Java_frame(false);
+
+  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
+  __ beq(AT, R0, noException);
+
+  // Exception pending
+
+  reg_save.restore_live_registers(masm);
+  //forward_exception_entry need return address on the stack
+  __ push(RA);
+  // TODO: confirm reloc
+  __ jmp((address)StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+
+  // No exception case
+  __ bind(noException);
+  // Normal exit, register restoring and exit
+  reg_save.restore_live_registers(masm);
+  __ jr(RA);
+
+  masm->flush();
+  // Fill-out other meta info
+  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
+}
+
+//
+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
+//
+// Generate a stub that calls into vm to find out the proper destination
+// of a java call. All the argument registers are live at this point
+// but since this is generic code we don't know what they are and the caller
+// must do any gc of the args.
+//
+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+  // allocate space for the code
+  ResourceMark rm;
+
+  //CodeBuffer buffer(name, 1000, 512);
+  //FIXME. code_size
+  CodeBuffer buffer(name, 2000, 2048);
+  MacroAssembler* masm  = new MacroAssembler(&buffer);
+
+  int frame_size_words;
+  RegisterSaver reg_save(false /* save_vectors */);
+  //we put the thread in A0
+
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map = NULL;
+
+  address start = __ pc();
+  map = reg_save.save_live_registers(masm, 0, &frame_size_words);
+
+
+  int frame_complete = __ offset();
+  const Register thread = T8;
+  __ get_thread(thread);
+
+  __ move(A0, thread);
+  Label retaddr;
+  __ set_last_Java_frame(noreg, FP, retaddr);
+  // align the stack before invoke native
+  __ li(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);
+
+  // TODO: confirm reloc
+  __ call(destination, relocInfo::runtime_call_type);
+  __ bind(retaddr);
+
+  // Set an oopmap for the call site.
+  // We need this not only for callee-saved registers, but also for volatile
+  // registers that the compiler might be keeping live across a safepoint.
+  oop_maps->add_gc_map(__ pc() - start, map);
+  // V0 contains the address we are going to jump to assuming no exception got installed
+  __ get_thread(thread);
+  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // clear last_Java_sp
+  __ reset_last_Java_frame(true);
+  // check for pending exceptions
+  Label pending;
+  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
+  __ bne(AT, R0, pending);
+  // get the returned Method*
+  __ get_vm_result_2(Rmethod, thread);
+  __ st_ptr(Rmethod, SP, reg_save.s3_offset());
+  __ st_ptr(V0, SP, reg_save.t5_offset());
+  reg_save.restore_live_registers(masm);
+
+  // We are back the the original state on entry and ready to go the callee method.
+  __ jr(T5);
+  // Pending exception after the safepoint
+
+  __ bind(pending);
+
+  reg_save.restore_live_registers(masm);
+
+  // exception pending => remove activation and forward to exception handler
+  //forward_exception_entry need return address on the stack
+  __ push(RA);
+  __ get_thread(thread);
+  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
+  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
+  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+  //
+  // make sure all code is generated
+  masm->flush();
+  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
+  return tmp;
+}
+
+extern "C" int SpinPause() {return 0;}
diff --git a/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp
new file mode 100644
index 00000000000..361b775144d
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/stubGenerator_loongarch_64.cpp
@@ -0,0 +1,3445 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_loongarch.hpp"
+#include "oops/instanceOop.hpp"
+#include "oops/method.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "utilities/top.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+// Declaration and definition of StubGenerator (no .hpp file).
+// For a more detailed description of the stub routine structure
+// see the comment in stubRoutines.hpp
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
+
+//#ifdef PRODUCT
+//#define BLOCK_COMMENT(str) /* nothing */
+//#else
+//#define BLOCK_COMMENT(str) __ block_comment(str)
+//#endif
+
+//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
+
+// Stub Code definitions
+
+static address handle_unsafe_access() {
+  JavaThread* thread = JavaThread::current();
+  address pc = thread->saved_exception_pc();
+  // pc is the instruction which we must emulate
+  // doing a no-op is fine:  return garbage from the load
+  // therefore, compute npc
+  address npc = (address)((unsigned long)pc + sizeof(unsigned int));
+
+  // request an async exception
+  thread->set_pending_unsafe_access_error();
+
+  // return address of next instruction to execute
+  return npc;
+}
+
+class StubGenerator: public StubCodeGenerator {
+ private:
+
+  // This fig is not LA ABI. It is call Java from C ABI.
+  // Call stubs are used to call Java from C
+  //
+  //    [ return_from_Java     ]
+  //    [ argument word n-1    ] <--- sp
+  //      ...
+  //    [ argument word 0      ]
+  //      ...
+  // -8 [ S6                   ]
+  // -7 [ S5                   ]
+  // -6 [ S4                   ]
+  // -5 [ S3                   ]
+  // -4 [ S1                   ]
+  // -3 [ TSR(S2)              ]
+  // -2 [ LVP(S7)              ]
+  // -1 [ BCP(S1)              ]
+  //  0 [ saved fp             ] <--- fp_after_call
+  //  1 [ return address       ]
+  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
+  //  3 [ result               ] <--- a1
+  //  4 [ result_type          ] <--- a2
+  //  5 [ method               ] <--- a3
+  //  6 [ entry_point          ] <--- a4
+  //  7 [ parameters           ] <--- a5
+  //  8 [ parameter_size       ] <--- a6
+  //  9 [ thread               ] <--- a7
+
+  //
+  // LA ABI does not save paras in sp.
+  //
+  //    [ return_from_Java     ]
+  //    [ argument word n-1    ] <--- sp
+  //      ...
+  //    [ argument word 0      ]
+  //      ...
+  //-13 [ thread               ]
+  //-12 [ result_type          ] <--- a2
+  //-11 [ result               ] <--- a1
+  //-10 [                      ]
+  // -9 [ ptr. to call wrapper ] <--- a0
+  // -8 [ S6                   ]
+  // -7 [ S5                   ]
+  // -6 [ S4                   ]
+  // -5 [ S3                   ]
+  // -4 [ S1                   ]
+  // -3 [ TSR(S2)              ]
+  // -2 [ LVP(S7)              ]
+  // -1 [ BCP(S1)              ]
+  //  0 [ saved fp             ] <--- fp_after_call
+  //  1 [ return address       ]
+  //  2 [                      ] <--- old sp
+  //
+  // Find a right place in the call_stub for S8.
+  // S8 will point to the starting point of Interpreter::dispatch_table(itos).
+  // It should be saved/restored before/after Java calls.
+  //
+  enum call_stub_layout {
+    RA_off             =  1,
+    FP_off             =  0,
+    BCP_off            = -1,
+    LVP_off            = -2,
+    TSR_off            = -3,
+    S1_off             = -4,
+    S3_off             = -5,
+    S4_off             = -6,
+    S5_off             = -7,
+    S6_off             = -8,
+    call_wrapper_off   = -9,
+    result_off         = -11,
+    result_type_off    = -12,
+    thread_off         = -13,
+    total_off          = thread_off - 1,
+    S8_off             = -14,
+  };
+
+  address generate_call_stub(address& return_address) {
+    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
+    StubCodeMark mark(this, "StubRoutines", "call_stub");
+    address start = __ pc();
+
+    // same as in generate_catch_exception()!
+
+    // stub code
+    // save ra and fp
+    __ enter();
+    // I think 14 is the max gap between argument and callee saved register
+    __ addi_d(SP, SP, total_off * wordSize);
+    __ st_d(BCP, FP, BCP_off * wordSize);
+    __ st_d(LVP, FP, LVP_off * wordSize);
+    __ st_d(TSR, FP, TSR_off * wordSize);
+    __ st_d(S1, FP, S1_off * wordSize);
+    __ st_d(S3, FP, S3_off * wordSize);
+    __ st_d(S4, FP, S4_off * wordSize);
+    __ st_d(S5, FP, S5_off * wordSize);
+    __ st_d(S6, FP, S6_off * wordSize);
+    __ st_d(A0, FP, call_wrapper_off * wordSize);
+    __ st_d(A1, FP, result_off * wordSize);
+    __ st_d(A2, FP, result_type_off * wordSize);
+    __ st_d(A7, FP, thread_off * wordSize);
+    __ st_d(S8, FP, S8_off * wordSize);
+
+    __ li(S8, (long)Interpreter::dispatch_table(itos));
+
+#ifdef OPT_THREAD
+    __ move(TREG, A7);
+#endif
+    //add for compressedoops
+    __ reinit_heapbase();
+
+#ifdef ASSERT
+    // make sure we have no pending exceptions
+    {
+      Label L;
+      __ ld_d(AT, A7, in_bytes(Thread::pending_exception_offset()));
+      __ beq(AT, R0, L);
+      /* FIXME: I do not know how to realize stop in LA, do it in the future */
+      __ stop("StubRoutines::call_stub: entered with pending exception");
+      __ bind(L);
+    }
+#endif
+
+    // pass parameters if any
+    // A5: parameter
+    // A6: parameter_size
+    // T0: parameter_size_tmp(--)
+    // T2: offset(++)
+    // T3: tmp
+    Label parameters_done;
+    // judge if the parameter_size equals 0
+    __ beq(A6, R0, parameters_done);
+    __ slli_d(AT, A6, Interpreter::logStackElementSize);
+    __ sub_d(SP, SP, AT);
+    __ li(AT, -StackAlignmentInBytes);
+    __ andr(SP, SP, AT);
+    // Copy Java parameters in reverse order (receiver last)
+    // Note that the argument order is inverted in the process
+    Label loop;
+    __ move(T0, A6);
+    __ move(T2, R0);
+    __ bind(loop);
+
+    // get parameter
+    __ alsl_d(T3, T0, A5, LogBytesPerWord - 1);
+    __ ld_d(AT, T3,  -wordSize);
+    __ alsl_d(T3, T2, SP, LogBytesPerWord - 1);
+    __ st_d(AT, T3, Interpreter::expr_offset_in_bytes(0));
+    __ addi_d(T2, T2, 1);
+    __ addi_d(T0, T0, -1);
+    __ bne(T0, R0, loop);
+    // advance to next parameter
+
+    // call Java function
+    __ bind(parameters_done);
+
+    // receiver in V0, methodOop in Rmethod
+
+    __ move(Rmethod, A3);
+    __ move(Rsender, SP);             //set sender sp
+    __ jalr(A4);
+    return_address = __ pc();
+
+    Label common_return;
+    __ bind(common_return);
+
+    // store result depending on type
+    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
+    __ ld_d(T0, FP, result_off * wordSize);   // result --> T0
+    Label is_long, is_float, is_double, exit;
+    __ ld_d(T2, FP, result_type_off * wordSize);  // result_type --> T2
+    __ addi_d(T3, T2, (-1) * T_LONG);
+    __ beq(T3, R0, is_long);
+    __ addi_d(T3, T2, (-1) * T_FLOAT);
+    __ beq(T3, R0, is_float);
+    __ addi_d(T3, T2, (-1) * T_DOUBLE);
+    __ beq(T3, R0, is_double);
+
+    // handle T_INT case
+    __ st_d(V0, T0, 0 * wordSize);
+    __ bind(exit);
+
+    // restore
+    __ ld_d(BCP, FP, BCP_off * wordSize);
+    __ ld_d(LVP, FP, LVP_off * wordSize);
+    __ ld_d(S8, FP, S8_off * wordSize);
+    __ ld_d(TSR, FP, TSR_off * wordSize);
+
+    __ ld_d(S1, FP, S1_off * wordSize);
+    __ ld_d(S3, FP, S3_off * wordSize);
+    __ ld_d(S4, FP, S4_off * wordSize);
+    __ ld_d(S5, FP, S5_off * wordSize);
+    __ ld_d(S6, FP, S6_off * wordSize);
+
+    __ leave();
+
+    // return
+    __ jr(RA);
+
+    // handle return types different from T_INT
+    __ bind(is_long);
+    __ st_d(V0, T0, 0 * wordSize);
+    __ b(exit);
+
+    __ bind(is_float);
+    __ fst_s(FV0, T0, 0 * wordSize);
+    __ b(exit);
+
+    __ bind(is_double);
+    __ fst_d(FV0, T0, 0 * wordSize);
+    __ b(exit);
+    StubRoutines::la::set_call_stub_compiled_return(__ pc());
+    __ b(common_return);
+    return start;
+  }
+
+  // Return point for a Java call if there's an exception thrown in
+  // Java code.  The exception is caught and transformed into a
+  // pending exception stored in JavaThread that can be tested from
+  // within the VM.
+  //
+  // Note: Usually the parameters are removed by the callee. In case
+  // of an exception crossing an activation frame boundary, that is
+  // not the case if the callee is compiled code => need to setup the
+  // sp.
+  //
+  // V0: exception oop
+
+  address generate_catch_exception() {
+    StubCodeMark mark(this, "StubRoutines", "catch_exception");
+    address start = __ pc();
+
+    Register thread = TREG;
+
+    // get thread directly
+#ifndef OPT_THREAD
+    __ ld_d(thread, FP, thread_off * wordSize);
+#endif
+
+#ifdef ASSERT
+    // verify that threads correspond
+    { Label L;
+      __ get_thread(T8);
+      __ beq(T8, thread, L);
+      __ stop("StubRoutines::catch_exception: threads must correspond");
+      __ bind(L);
+    }
+#endif
+    // set pending exception
+    __ verify_oop(V0);
+    __ st_d(V0, thread, in_bytes(Thread::pending_exception_offset()));
+    __ li(AT, (long)__FILE__);
+    __ st_d(AT, thread, in_bytes(Thread::exception_file_offset   ()));
+    __ li(AT, (long)__LINE__);
+    __ st_d(AT, thread, in_bytes(Thread::exception_line_offset   ()));
+
+    // complete return to VM
+    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
+    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
+    return start;
+  }
+
+  // Continuation point for runtime calls returning with a pending
+  // exception.  The pending exception check happened in the runtime
+  // or native call stub.  The pending exception in Thread is
+  // converted into a Java-level exception.
+  //
+  // Contract with Java-level exception handlers:
+  // V0: exception
+  // V1: throwing pc
+  //
+  // NOTE: At entry of this stub, exception-pc must be on stack !!
+
+  address generate_forward_exception() {
+    StubCodeMark mark(this, "StubRoutines", "forward exception");
+    //Register thread = TREG;
+    Register thread = TREG;
+    address start = __ pc();
+
+    // Upon entry, the sp points to the return address returning into
+    // Java (interpreted or compiled) code; i.e., the return address
+    // throwing pc.
+    //
+    // Arguments pushed before the runtime call are still on the stack
+    // but the exception handler will reset the stack pointer ->
+    // ignore them.  A potential result in registers can be ignored as
+    // well.
+
+#ifndef OPT_THREAD
+    __ get_thread(thread);
+#endif
+#ifdef ASSERT
+    // make sure this code is only executed if there is a pending exception
+    {
+      Label L;
+      __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
+      __ bne(AT, R0, L);
+      __ stop("StubRoutines::forward exception: no pending exception (1)");
+      __ bind(L);
+    }
+#endif
+
+    // compute exception handler into T4
+    __ ld_d(A1, SP, 0);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
+    __ move(T4, V0);
+    __ pop(V1);
+
+#ifndef OPT_THREAD
+    __ get_thread(thread);
+#endif
+    __ ld_d(V0, thread, in_bytes(Thread::pending_exception_offset()));
+    __ st_d(R0, thread, in_bytes(Thread::pending_exception_offset()));
+
+#ifdef ASSERT
+    // make sure exception is set
+    {
+      Label L;
+      __ bne(V0, R0, L);
+      __ stop("StubRoutines::forward exception: no pending exception (2)");
+      __ bind(L);
+    }
+#endif
+
+    // continue at exception handler (return address removed)
+    // V0: exception
+    // T4: exception handler
+    // V1: throwing pc
+    __ verify_oop(V0);
+    __ jr(T4);
+    return start;
+  }
+
+  // The following routine generates a subroutine to throw an
+  // asynchronous UnknownError when an unsafe access gets a fault that
+  // could not be reasonably prevented by the programmer.  (Example:
+  // SIGBUS/OBJERR.)
+  address generate_handler_for_unsafe_access() {
+    StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
+    address start = __ pc();
+    __ push(V0);
+    __ pushad_except_v0();                      // push registers
+    __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
+    __ popad_except_v0();
+    __ move(RA, V0);
+    __ pop(V0);
+    __ jr(RA);
+    return start;
+  }
+
+  // Non-destructive plausibility checks for oops
+  //
+  address generate_verify_oop() {
+    StubCodeMark mark(this, "StubRoutines", "verify_oop");
+    address start = __ pc();
+    __ reinit_heapbase();
+    __ verify_oop_subroutine();
+    address end = __ pc();
+    return start;
+  }
+
+  //
+  // Generate stub for array fill. If "aligned" is true, the
+  // "to" address is assumed to be heapword aligned.
+  //
+  // Arguments for generated stub:
+  //   to:    A0
+  //   value: A1
+  //   count: A2 treated as signed
+  //
+  address generate_fill(BasicType t, bool aligned, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    const Register to        = A0;  // source array address
+    const Register value     = A1;  // value
+    const Register count     = A2;  // elements count
+
+    const Register end       = T5;  // source array address end
+    const Register tmp       = T8;  // temp register
+
+    Label L_fill_elements;
+
+    int shift = -1;
+    switch (t) {
+      case T_BYTE:
+        shift = 0;
+        __ slti(AT, count, 9);              // Short arrays (<= 8 bytes) fill by element
+        __ bstrins_d(value, value, 15, 8);  //  8 bit -> 16 bit
+        __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit
+        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
+        __ bnez(AT, L_fill_elements);
+        break;
+      case T_SHORT:
+        shift = 1;
+        __ slti(AT, count, 5);              // Short arrays (<= 8 bytes) fill by element
+        __ bstrins_d(value, value, 31, 16); // 16 bit -> 32 bit
+        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
+        __ bnez(AT, L_fill_elements);
+        break;
+      case T_INT:
+        shift = 2;
+        __ slti(AT, count, 3);              // Short arrays (<= 8 bytes) fill by element
+        __ bstrins_d(value, value, 63, 32); // 32 bit -> 64 bit
+        __ bnez(AT, L_fill_elements);
+        break;
+      default: ShouldNotReachHere();
+    }
+
+    switch (t) {
+      case T_BYTE:
+        __ add_d(end, to, count);
+        break;
+      case T_SHORT:
+      case T_INT:
+        __ alsl_d(end, count, to, shift-1);
+        break;
+      default: ShouldNotReachHere();
+    }
+    if (!aligned) {
+      __ st_d(value, to,  0);
+      __ bstrins_d(to, R0, 2, 0);
+      __ addi_d(to, to, 8);
+    }
+    __ st_d(value, end, -8);
+    __ bstrins_d(end, R0, 2, 0);
+
+    //
+    //  Fill large chunks
+    //
+    Label L_loop_begin, L_not_64bytes_fill, L_loop_end;
+    __ addi_d(AT, to, 64);
+    __ blt(end, AT, L_not_64bytes_fill);
+    __ addi_d(to, to, 64);
+    __ bind(L_loop_begin);
+    __ st_d(value, to,  -8);
+    __ st_d(value, to, -16);
+    __ st_d(value, to, -24);
+    __ st_d(value, to, -32);
+    __ st_d(value, to, -40);
+    __ st_d(value, to, -48);
+    __ st_d(value, to, -56);
+    __ st_d(value, to, -64);
+    __ addi_d(to, to, 64);
+    __ bge(end, to, L_loop_begin);
+    __ addi_d(to, to, -64);
+    __ beq(to, end, L_loop_end);
+
+    __ bind(L_not_64bytes_fill);
+    // There are 0 - 7 words
+    __ pcaddi(AT, 4);
+    __ sub_d(tmp, end, to);
+    __ alsl_d(AT, tmp, AT, 1);
+    __ jr(AT);
+
+    // 0:
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 1:
+    __ st_d(value, to, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 2:
+    __ st_d(value, to, 0);
+    __ st_d(value, to, 8);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 3:
+    __ st_d(value, to,  0);
+    __ st_d(value, to,  8);
+    __ st_d(value, to, 16);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 4:
+    __ st_d(value, to,  0);
+    __ st_d(value, to,  8);
+    __ st_d(value, to, 16);
+    __ st_d(value, to, 24);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 5:
+    __ st_d(value, to,  0);
+    __ st_d(value, to,  8);
+    __ st_d(value, to, 16);
+    __ st_d(value, to, 24);
+    __ st_d(value, to, 32);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+
+    // 6:
+    __ st_d(value, to,  0);
+    __ st_d(value, to,  8);
+    __ st_d(value, to, 16);
+    __ st_d(value, to, 24);
+    __ st_d(value, to, 32);
+    __ st_d(value, to, 40);
+    __ jr(RA);
+    __ nop();
+
+    // 7:
+    __ st_d(value, to,  0);
+    __ st_d(value, to,  8);
+    __ st_d(value, to, 16);
+    __ st_d(value, to, 24);
+    __ st_d(value, to, 32);
+    __ st_d(value, to, 40);
+    __ st_d(value, to, 48);
+
+    __ bind(L_loop_end);
+    __ jr(RA);
+
+    // Short arrays (<= 8 bytes)
+    __ bind(L_fill_elements);
+    __ pcaddi(AT, 4);
+    __ slli_d(tmp, count, 4 + shift);
+    __ add_d(AT, AT, tmp);
+    __ jr(AT);
+
+    // 0:
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 1:
+    __ st_b(value, to, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+
+    // 2:
+    __ st_h(value, to, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+
+    // 3:
+    __ st_h(value, to, 0);
+    __ st_b(value, to, 2);
+    __ jr(RA);
+    __ nop();
+
+    // 4:
+    __ st_w(value, to, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+
+    // 5:
+    __ st_w(value, to, 0);
+    __ st_b(value, to, 4);
+    __ jr(RA);
+    __ nop();
+
+    // 6:
+    __ st_w(value, to, 0);
+    __ st_h(value, to, 4);
+    __ jr(RA);
+    __ nop();
+
+    // 7:
+    __ st_w(value, to, 0);
+    __ st_w(value, to, 3);
+    __ jr(RA);
+    __ nop();
+
+    // 8:
+    __ st_d(value, to, 0);
+    __ jr(RA);
+    return start;
+  }
+
+  //
+  //  Generate overlap test for array copy stubs
+  //
+  //  Input:
+  //    A0   - source array address
+  //    A1   - destination array address
+  //    A2   - element count
+  //
+  //  Temp:
+  //    AT   - destination array address - source array address
+  //    T4   - element count * element size
+  //
+  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
+    __ slli_d(T4, A2, log2_elem_size);
+    __ sub_d(AT, A1, A0);
+    __ bgeu(AT, T4, no_overlap_target);
+  }
+
+  // Generate code for an array write pre barrier
+  //
+  //   Input:
+  //     addr    -  starting address
+  //     count   -  element count
+  //
+  //  Temp:
+  //     AT      -  used to swap addr and count
+  //
+  void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
+    BarrierSet* bs = Universe::heap()->barrier_set();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCT:
+      case BarrierSet::G1SATBCTLogging:
+        // With G1, don't generate the call if we statically know that the target in uninitialized
+        if (!dest_uninitialized) {
+           if (count == A0) {
+             if (addr == A1) {
+               // exactly backwards!!
+               __ move(AT, A0);
+               __ move(A0, A1);
+               __ move(A1, AT);
+             } else {
+               __ move(A1, count);
+               __ move(A0, addr);
+             }
+           } else {
+             __ move(A0, addr);
+             __ move(A1, count);
+           }
+           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
+        }
+        break;
+      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableExtension:
+      case BarrierSet::ModRef:
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  //
+  // Generate code for an array write post barrier
+  //
+  //  Input:
+  //     start    - register containing starting address of destination array
+  //     count    - elements count
+  //     scratch  - scratch register
+  //
+  //  Temp:
+  //     AT       - used to swap addr and count
+  //
+  //  The input registers are overwritten.
+  //
+  void gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
+    assert_different_registers(start, count, scratch, AT);
+    BarrierSet* bs = Universe::heap()->barrier_set();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCT:
+      case BarrierSet::G1SATBCTLogging:
+        {
+          if (count == A0) {
+            if (start == A1) {
+              // exactly backwards!!
+              __ move(AT, A0);
+              __ move(A0, A1);
+              __ move(A1, AT);
+            } else {
+              __ move(A1, count);
+              __ move(A0, start);
+            }
+          } else {
+            __ move(A0, start);
+            __ move(A1, count);
+          }
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
+        }
+        break;
+      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableExtension:
+        {
+          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+          Label L_loop;
+          const Register end = count;
+
+          if (UseConcMarkSweepGC) {
+            __ membar(__ StoreStore);
+          }
+
+          int64_t disp = (int64_t) ct->byte_map_base;
+          __ li(scratch, disp);
+
+          __ lea(end, Address(start, count, TIMES_OOP, 0));  // end == start + count * oop_size
+          __ addi_d(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
+          __ shr(start, CardTableModRefBS::card_shift);
+          __ shr(end,   CardTableModRefBS::card_shift);
+          __ sub_d(end, end, start); // end --> cards count
+
+          __ add_d(start, start, scratch);
+
+          __ bind(L_loop);
+          __ stx_b(R0, start, count);
+          __ addi_d(count, count, -1);
+          __ bge(count, R0, L_loop);
+        }
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  // disjoint large copy
+  void generate_disjoint_large_copy(Label &entry, const char *name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+
+    Label loop, le32, le16, le8, lt8;
+
+    __ bind(entry);
+    __ add_d(A3, A1, A2);
+    __ add_d(A2, A0, A2);
+    __ ld_d(A6, A0, 0);
+    __ ld_d(A7, A2, -8);
+
+    __ andi(T1, A0, 7);
+    __ sub_d(T0, R0, T1);
+    __ addi_d(T0, T0, 8);
+
+    __ add_d(A0, A0, T0);
+    __ add_d(A5, A1, T0);
+
+    __ addi_d(A4, A2, -64);
+    __ bgeu(A0, A4, le32);
+
+    __ bind(loop);
+    __ ld_d(T0, A0, 0);
+    __ ld_d(T1, A0, 8);
+    __ ld_d(T2, A0, 16);
+    __ ld_d(T3, A0, 24);
+    __ ld_d(T4, A0, 32);
+    __ ld_d(T5, A0, 40);
+    __ ld_d(T6, A0, 48);
+    __ ld_d(T7, A0, 56);
+    __ addi_d(A0, A0, 64);
+    __ st_d(T0, A5, 0);
+    __ st_d(T1, A5, 8);
+    __ st_d(T2, A5, 16);
+    __ st_d(T3, A5, 24);
+    __ st_d(T4, A5, 32);
+    __ st_d(T5, A5, 40);
+    __ st_d(T6, A5, 48);
+    __ st_d(T7, A5, 56);
+    __ addi_d(A5, A5, 64);
+    __ bltu(A0, A4, loop);
+
+    __ bind(le32);
+    __ addi_d(A4, A2, -32);
+    __ bgeu(A0, A4, le16);
+    __ ld_d(T0, A0, 0);
+    __ ld_d(T1, A0, 8);
+    __ ld_d(T2, A0, 16);
+    __ ld_d(T3, A0, 24);
+    __ addi_d(A0, A0, 32);
+    __ st_d(T0, A5, 0);
+    __ st_d(T1, A5, 8);
+    __ st_d(T2, A5, 16);
+    __ st_d(T3, A5, 24);
+    __ addi_d(A5, A5, 32);
+
+    __ bind(le16);
+    __ addi_d(A4, A2, -16);
+    __ bgeu(A0, A4, le8);
+    __ ld_d(T0, A0, 0);
+    __ ld_d(T1, A0, 8);
+    __ addi_d(A0, A0, 16);
+    __ st_d(T0, A5, 0);
+    __ st_d(T1, A5, 8);
+    __ addi_d(A5, A5, 16);
+
+    __ bind(le8);
+    __ addi_d(A4, A2, -8);
+    __ bgeu(A0, A4, lt8);
+    __ ld_d(T0, A0, 0);
+    __ st_d(T0, A5, 0);
+
+    __ bind(lt8);
+    __ st_d(A6, A1, 0);
+    __ st_d(A7, A3, -8);
+    __ jr(RA);
+  }
+
+  // conjoint large copy
+  void generate_conjoint_large_copy(Label &entry, const char *name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+
+    Label loop, le32, le16, le8, lt8;
+
+    __ bind(entry);
+    __ add_d(A3, A1, A2);
+    __ add_d(A2, A0, A2);
+    __ ld_d(A6, A0, 0);
+    __ ld_d(A7, A2, -8);
+
+    __ andi(T1, A0, 7);
+    __ sub_d(A2, A2, T1);
+    __ sub_d(A5, A3, T1);
+
+    __ addi_d(A4, A0, 64);
+    __ bgeu(A4, A2, le32);
+
+    __ bind(loop);
+    __ ld_d(T0, A2, -8);
+    __ ld_d(T1, A2, -16);
+    __ ld_d(T2, A2, -24);
+    __ ld_d(T3, A2, -32);
+    __ ld_d(T4, A2, -40);
+    __ ld_d(T5, A2, -48);
+    __ ld_d(T6, A2, -56);
+    __ ld_d(T7, A2, -64);
+    __ addi_d(A2, A2, -64);
+    __ st_d(T0, A5, -8);
+    __ st_d(T1, A5, -16);
+    __ st_d(T2, A5, -24);
+    __ st_d(T3, A5, -32);
+    __ st_d(T4, A5, -40);
+    __ st_d(T5, A5, -48);
+    __ st_d(T6, A5, -56);
+    __ st_d(T7, A5, -64);
+    __ addi_d(A5, A5, -64);
+    __ bltu(A4, A2, loop);
+
+    __ bind(le32);
+    __ addi_d(A4, A0, 32);
+    __ bgeu(A4, A2, le16);
+    __ ld_d(T0, A2, -8);
+    __ ld_d(T1, A2, -16);
+    __ ld_d(T2, A2, -24);
+    __ ld_d(T3, A2, -32);
+    __ addi_d(A2, A2, -32);
+    __ st_d(T0, A5, -8);
+    __ st_d(T1, A5, -16);
+    __ st_d(T2, A5, -24);
+    __ st_d(T3, A5, -32);
+    __ addi_d(A5, A5, -32);
+
+    __ bind(le16);
+    __ addi_d(A4, A0, 16);
+    __ bgeu(A4, A2, le8);
+    __ ld_d(T0, A2, -8);
+    __ ld_d(T1, A2, -16);
+    __ addi_d(A2, A2, -16);
+    __ st_d(T0, A5, -8);
+    __ st_d(T1, A5, -16);
+    __ addi_d(A5, A5, -16);
+
+    __ bind(le8);
+    __ addi_d(A4, A0, 8);
+    __ bgeu(A4, A2, lt8);
+    __ ld_d(T0, A2, -8);
+    __ st_d(T0, A5, -8);
+
+    __ bind(lt8);
+    __ st_d(A6, A1, 0);
+    __ st_d(A7, A3, -8);
+    __ jr(RA);
+  }
+
+  // Byte small copy: less than 9 elements.
+  void generate_byte_small_copy(Label &entry, const char *name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+
+    Label L;
+    __ bind(entry);
+    __ lipc(AT, L);
+    __ slli_d(A2, A2, 5);
+    __ add_d(AT, AT, A2);
+    __ jr(AT);
+
+    __ bind(L);
+    // 0:
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 1:
+    __ ld_b(AT, A0, 0);
+    __ st_b(AT, A1, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 2:
+    __ ld_h(AT, A0, 0);
+    __ st_h(AT, A1, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 3:
+    __ ld_h(AT, A0, 0);
+    __ ld_b(A2, A0, 2);
+    __ st_h(AT, A1, 0);
+    __ st_b(A2, A1, 2);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 4:
+    __ ld_w(AT, A0, 0);
+    __ st_w(AT, A1, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 5:
+    __ ld_w(AT, A0, 0);
+    __ ld_b(A2, A0, 4);
+    __ st_w(AT, A1, 0);
+    __ st_b(A2, A1, 4);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 6:
+    __ ld_w(AT, A0, 0);
+    __ ld_h(A2, A0, 4);
+    __ st_w(AT, A1, 0);
+    __ st_h(A2, A1, 4);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 7:
+    __ ld_w(AT, A0, 0);
+    __ ld_w(A2, A0, 3);
+    __ st_w(AT, A1, 0);
+    __ st_w(A2, A1, 3);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 8:
+    __ ld_d(AT, A0, 0);
+    __ st_d(AT, A1, 0);
+    __ jr(RA);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   A0      - source array address
+  //   A1      - destination array address
+  //   A2      - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_byte_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_byte_copy().
+  //
+  address generate_disjoint_byte_copy(bool aligned, Label &small, Label &large,
+                                      const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    __ sltui(T0, A2, 9);
+    __ bnez(T0, small);
+
+    __ b(large);
+
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   A0      - source array address
+  //   A1      - destination array address
+  //   A2      - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  address generate_conjoint_byte_copy(bool aligned, Label &small, Label &large,
+                                      const char *name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    array_overlap_test(StubRoutines::jbyte_disjoint_arraycopy(), 0);
+
+    __ sltui(T0, A2, 9);
+    __ bnez(T0, small);
+
+    __ b(large);
+
+    return start;
+  }
+
+  // Short small copy: less than 9 elements.
+  void generate_short_small_copy(Label &entry, const char *name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+
+    Label L;
+    __ bind(entry);
+    __ lipc(AT, L);
+    __ slli_d(A2, A2, 5);
+    __ add_d(AT, AT, A2);
+    __ jr(AT);
+
+    __ bind(L);
+    // 0:
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 1:
+    __ ld_h(AT, A0, 0);
+    __ st_h(AT, A1, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 2:
+    __ ld_w(AT, A0, 0);
+    __ st_w(AT, A1, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 3:
+    __ ld_w(AT, A0, 0);
+    __ ld_h(A2, A0, 4);
+    __ st_w(AT, A1, 0);
+    __ st_h(A2, A1, 4);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 4:
+    __ ld_d(AT, A0, 0);
+    __ st_d(AT, A1, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 5:
+    __ ld_d(AT, A0, 0);
+    __ ld_h(A2, A0, 8);
+    __ st_d(AT, A1, 0);
+    __ st_h(A2, A1, 8);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 6:
+    __ ld_d(AT, A0, 0);
+    __ ld_w(A2, A0, 8);
+    __ st_d(AT, A1, 0);
+    __ st_w(A2, A1, 8);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 7:
+    __ ld_d(AT, A0, 0);
+    __ ld_d(A2, A0, 6);
+    __ st_d(AT, A1, 0);
+    __ st_d(A2, A1, 6);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 8:
+    __ ld_d(AT, A0, 0);
+    __ ld_d(A2, A0, 8);
+    __ st_d(AT, A1, 0);
+    __ st_d(A2, A1, 8);
+    __ jr(RA);
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   A0      - source array address
+  //   A1      - destination array address
+  //   A2      - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_short_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_short_copy().
+  //
+  address generate_disjoint_short_copy(bool aligned, Label &small, Label &large,
+                                       const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    __ sltui(T0, A2, 9);
+    __ bnez(T0, small);
+
+    __ slli_d(A2, A2, 1);
+    __ b(large);
+
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   A0      - source array address
+  //   A1      - destination array address
+  //   A2      - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+  // let the hardware handle it.  The two or four words within dwords
+  // or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  address generate_conjoint_short_copy(bool aligned, Label &small, Label &large,
+                                       const char *name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    array_overlap_test(StubRoutines::jshort_disjoint_arraycopy(), 1);
+
+    __ sltui(T0, A2, 9);
+    __ bnez(T0, small);
+
+    __ slli_d(A2, A2, 1);
+    __ b(large);
+
+    return start;
+  }
+
+  // Short small copy: less than 7 elements.
+  void generate_int_small_copy(Label &entry, const char *name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+
+    Label L;
+    __ bind(entry);
+    __ lipc(AT, L);
+    __ slli_d(A2, A2, 5);
+    __ add_d(AT, AT, A2);
+    __ jr(AT);
+
+    __ bind(L);
+    // 0:
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 1:
+    __ ld_w(AT, A0, 0);
+    __ st_w(AT, A1, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 2:
+    __ ld_d(AT, A0, 0);
+    __ st_d(AT, A1, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 3:
+    __ ld_d(AT, A0, 0);
+    __ ld_w(A2, A0, 8);
+    __ st_d(AT, A1, 0);
+    __ st_w(A2, A1, 8);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 4:
+    __ ld_d(AT, A0, 0);
+    __ ld_d(A2, A0, 8);
+    __ st_d(AT, A1, 0);
+    __ st_d(A2, A1, 8);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 5:
+    __ ld_d(AT, A0, 0);
+    __ ld_d(A2, A0, 8);
+    __ ld_w(A3, A0, 16);
+    __ st_d(AT, A1, 0);
+    __ st_d(A2, A1, 8);
+    __ st_w(A3, A1, 16);
+    __ jr(RA);
+    __ nop();
+
+    // 6:
+    __ ld_d(AT, A0, 0);
+    __ ld_d(A2, A0, 8);
+    __ ld_d(A3, A0, 16);
+    __ st_d(AT, A1, 0);
+    __ st_d(A2, A1, 8);
+    __ st_d(A3, A1, 16);
+    __ jr(RA);
+  }
+
+  // Generate maybe oop copy
+  void gen_maybe_oop_copy(bool is_oop, Label &small, Label &large,
+                          const char *name, int small_limit, int log2_elem_size,
+                          bool dest_uninitialized = false) {
+    Label post, _large;
+
+    if (is_oop) {
+      __ addi_d(SP, SP, -4 * wordSize);
+      __ st_d(A2, SP, 3 * wordSize);
+      __ st_d(A1, SP, 2 * wordSize);
+      __ st_d(A0, SP, 1 * wordSize);
+      __ st_d(RA, SP, 0 * wordSize);
+
+      gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
+
+      __ ld_d(A2, SP, 3 * wordSize);
+      __ ld_d(A1, SP, 2 * wordSize);
+      __ ld_d(A0, SP, 1 * wordSize);
+    }
+
+    __ sltui(T0, A2, small_limit);
+    if (is_oop) {
+      __ beqz(T0, _large);
+      __ bl(small);
+      __ b(post);
+    } else {
+      __ bnez(T0, small);
+    }
+
+    __ bind(_large);
+    __ slli_d(A2, A2, log2_elem_size);
+
+    if (is_oop) {
+      __ bl(large);
+    } else {
+      __ b(large);
+    }
+
+    if (is_oop) {
+      __ bind(post);
+      __ ld_d(A2, SP, 3 * wordSize);
+      __ ld_d(A1, SP, 2 * wordSize);
+
+      gen_write_ref_array_post_barrier(A1, A2, T1);
+
+      __ ld_d(RA, SP, 0 * wordSize);
+      __ addi_d(SP, SP, 4 * wordSize);
+      __ jr(RA);
+    }
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   A0      - source array address
+  //   A1      - destination array address
+  //   A2      - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  // Side Effects:
+  //   disjoint_int_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_int_oop_copy().
+  //
+  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, Label &small,
+                                         Label &large, const char *name,
+                                         bool dest_uninitialized = false) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    gen_maybe_oop_copy(is_oop, small, large, name, 7, 2, dest_uninitialized);
+
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   A0      - source array address
+  //   A1      - destination array address
+  //   A2      - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop,
+                                         Label &small, Label &large, const char *name,
+                                         bool dest_uninitialized = false) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    if (is_oop) {
+      array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 2);
+    } else {
+      array_overlap_test(StubRoutines::jint_disjoint_arraycopy(), 2);
+    }
+
+    gen_maybe_oop_copy(is_oop, small, large, name, 7, 2, dest_uninitialized);
+
+    return start;
+  }
+
+  // Long small copy: less than 4 elements.
+  void generate_long_small_copy(Label &entry, const char *name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+
+    Label L;
+    __ bind(entry);
+    __ lipc(AT, L);
+    __ slli_d(A2, A2, 5);
+    __ add_d(AT, AT, A2);
+    __ jr(AT);
+
+    __ bind(L);
+    // 0:
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 1:
+    __ ld_d(AT, A0, 0);
+    __ st_d(AT, A1, 0);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 2:
+    __ ld_d(AT, A0, 0);
+    __ ld_d(A2, A0, 8);
+    __ st_d(AT, A1, 0);
+    __ st_d(A2, A1, 8);
+    __ jr(RA);
+    __ nop();
+    __ nop();
+    __ nop();
+
+    // 3:
+    __ ld_d(AT, A0, 0);
+    __ ld_d(A2, A0, 8);
+    __ ld_d(A3, A0, 16);
+    __ st_d(AT, A1, 0);
+    __ st_d(A2, A1, 8);
+    __ st_d(A3, A1, 16);
+    __ jr(RA);
+    __ nop();
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   A0      - source array address
+  //   A1      - destination array address
+  //   A2      - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  // Side Effects:
+  //   disjoint_int_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_int_oop_copy().
+  //
+  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
+                                          Label &large, const char *name,
+                                          bool dest_uninitialized = false) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    gen_maybe_oop_copy(is_oop, small, large, name, 4, 3, dest_uninitialized);
+
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   A0      - source array address
+  //   A1      - destination array address
+  //   A2      - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, Label &small,
+                                          Label &large, const char *name,
+                                          bool dest_uninitialized = false) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    if (is_oop) {
+      array_overlap_test(StubRoutines::oop_disjoint_arraycopy(), 3);
+    } else {
+      array_overlap_test(StubRoutines::jlong_disjoint_arraycopy(), 3);
+    }
+
+    gen_maybe_oop_copy(is_oop, small, large, name, 4, 3, dest_uninitialized);
+
+    return start;
+  }
+
+  void generate_arraycopy_stubs() {
+    Label disjoint_large_copy, conjoint_large_copy;
+    Label byte_small_copy, short_small_copy, int_small_copy, long_small_copy;
+
+    generate_disjoint_large_copy(disjoint_large_copy, "disjoint_large_copy");
+    generate_conjoint_large_copy(conjoint_large_copy, "conjoint_large_copy");
+    generate_byte_small_copy(byte_small_copy, "jbyte_small_copy");
+    generate_short_small_copy(short_small_copy, "jshort_small_copy");
+    generate_int_small_copy(int_small_copy, "jint_small_copy");
+    generate_long_small_copy(long_small_copy, "jlong_small_copy");
+
+    if (UseCompressedOops) {
+      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy");
+      StubRoutines::_oop_arraycopy                   = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, "oop_arraycopy");
+      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_int_oop_copy(false, true, int_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", true);
+      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_int_oop_copy(false, true, int_small_copy, conjoint_large_copy, "oop_arraycopy_uninit", true);
+    } else {
+      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy");
+      StubRoutines::_oop_arraycopy                   = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, "oop_arraycopy");
+      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_long_oop_copy(false, true, long_small_copy, disjoint_large_copy, "oop_disjoint_arraycopy_uninit", true);
+      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_long_oop_copy(false, true, long_small_copy, conjoint_large_copy, "oop_arraycopy_uninit", true);
+    }
+
+    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, byte_small_copy, disjoint_large_copy, "jbyte_disjoint_arraycopy");
+    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, short_small_copy, disjoint_large_copy, "jshort_disjoint_arraycopy");
+    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_oop_copy(false, false, int_small_copy, disjoint_large_copy, "jint_disjoint_arraycopy");
+    StubRoutines::_jlong_disjoint_arraycopy          = generate_disjoint_long_oop_copy(false, false, long_small_copy, disjoint_large_copy, "jlong_disjoint_arraycopy", false);
+
+    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, byte_small_copy, conjoint_large_copy, "jbyte_arraycopy");
+    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, short_small_copy, conjoint_large_copy, "jshort_arraycopy");
+    StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, int_small_copy, conjoint_large_copy, "jint_arraycopy");
+    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_oop_copy(false, false, long_small_copy, conjoint_large_copy, "jlong_arraycopy", false);
+
+    // We don't generate specialized code for HeapWord-aligned source
+    // arrays, so just use the code we've already generated
+    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
+    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
+
+    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
+    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
+
+    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
+    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
+
+    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
+    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
+
+    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
+    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
+
+    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
+    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
+
+    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
+    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
+    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
+    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
+    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
+    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   A0        - source byte array address
+  //   A1        - destination byte array address
+  //   A2        - K (key) in little endian int array
+  //   A3        - r vector byte array address
+  //   A4        - input length
+  //
+  // Output:
+  //   A0        - input length
+  //
+  address generate_aescrypt_encryptBlock(bool cbc) {
+    static const uint32_t ft_consts[256] = {
+      0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
+      0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
+      0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
+      0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
+      0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
+      0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
+      0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
+      0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
+      0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
+      0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
+      0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
+      0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
+      0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
+      0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
+      0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
+      0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
+      0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
+      0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
+      0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
+      0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
+      0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
+      0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
+      0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
+      0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
+      0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
+      0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
+      0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
+      0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
+      0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
+      0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
+      0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
+      0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
+      0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
+      0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
+      0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
+      0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
+      0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
+      0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
+      0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
+      0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
+      0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
+      0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
+      0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
+      0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
+      0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
+      0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
+      0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
+      0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
+      0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
+      0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
+      0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
+      0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
+      0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
+      0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
+      0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
+      0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
+      0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
+      0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
+      0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
+      0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
+      0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
+      0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
+      0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
+      0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
+    };
+    static const uint8_t fsb_consts[256] = {
+      0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
+      0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+      0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+      0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+      0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
+      0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+      0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
+      0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+      0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+      0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+      0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
+      0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+      0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
+      0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+      0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+      0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+      0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
+      0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+      0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+      0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+      0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+      0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+      0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
+      0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+      0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
+      0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+      0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+      0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+      0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
+      0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+      0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
+      0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+    };
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+
+    // Allocate registers
+    Register src = A0;
+    Register dst = A1;
+    Register key = A2;
+    Register rve = A3;
+    Register srclen = A4;
+    Register keylen = T8;
+    Register srcend = A5;
+    Register keyold = A6;
+    Register t0 = A7;
+    Register t1, t2, t3, ftp;
+    Register xa[4] = { T0, T1, T2, T3 };
+    Register ya[4] = { T4, T5, T6, T7 };
+
+    Label loop, tail, done;
+    address start = __ pc();
+
+    if (cbc) {
+      t1 = S0;
+      t2 = S1;
+      t3 = S2;
+      ftp = S3;
+
+      __ beqz(srclen, done);
+
+      __ addi_d(SP, SP, -4 * wordSize);
+      __ st_d(S3, SP, 3 * wordSize);
+      __ st_d(S2, SP, 2 * wordSize);
+      __ st_d(S1, SP, 1 * wordSize);
+      __ st_d(S0, SP, 0 * wordSize);
+
+      __ add_d(srcend, src, srclen);
+      __ move(keyold, key);
+    } else {
+      t1 = A3;
+      t2 = A4;
+      t3 = A5;
+      ftp = A6;
+    }
+
+    __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
+
+    // Round 1
+    if (cbc) {
+      for (int i = 0; i < 4; i++) {
+        __ ld_w(xa[i], rve, 4 * i);
+      }
+
+      __ bind(loop);
+
+      for (int i = 0; i < 4; i++) {
+        __ ld_w(ya[i], src, 4 * i);
+      }
+      for (int i = 0; i < 4; i++) {
+        __ XOR(xa[i], xa[i], ya[i]);
+      }
+    } else {
+      for (int i = 0; i < 4; i++) {
+        __ ld_w(xa[i], src, 4 * i);
+      }
+    }
+    for (int i = 0; i < 4; i++) {
+      __ ld_w(ya[i], key, 4 * i);
+    }
+    for (int i = 0; i < 4; i++) {
+      __ revb_2h(xa[i], xa[i]);
+    }
+    for (int i = 0; i < 4; i++) {
+      __ rotri_w(xa[i], xa[i], 16);
+    }
+    for (int i = 0; i < 4; i++) {
+      __ XOR(xa[i], xa[i], ya[i]);
+    }
+
+    __ li(ftp, (intptr_t)ft_consts);
+
+    // Round 2 - (N-1)
+    for (int r = 0; r < 14; r++) {
+      Register *xp;
+      Register *yp;
+
+      if (r & 1) {
+        xp = xa;
+        yp = ya;
+      } else {
+        xp = ya;
+        yp = xa;
+      }
+
+      for (int i = 0; i < 4; i++) {
+        __ ld_w(xp[i], key, 4 * (4 * (r + 1) + i));
+      }
+
+      for (int i = 0; i < 4; i++) {
+        __ bstrpick_d(t0, yp[(i + 3) & 3], 7, 0);
+        __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8);
+        __ bstrpick_d(t2, yp[(i + 1) & 3], 23, 16);
+        __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24);
+        __ slli_w(t0, t0, 2);
+        __ slli_w(t1, t1, 2);
+        __ slli_w(t2, t2, 2);
+        __ slli_w(t3, t3, 2);
+        __ ldx_w(t0, ftp, t0);
+        __ ldx_w(t1, ftp, t1);
+        __ ldx_w(t2, ftp, t2);
+        __ ldx_w(t3, ftp, t3);
+        __ rotri_w(t0, t0, 24);
+        __ rotri_w(t1, t1, 16);
+        __ rotri_w(t2, t2, 8);
+        __ XOR(xp[i], xp[i], t0);
+        __ XOR(t0, t1, t2);
+        __ XOR(xp[i], xp[i], t3);
+        __ XOR(xp[i], xp[i], t0);
+      }
+
+      if (r == 8) {
+        // AES 128
+        __ li(t0, 44);
+        __ beq(t0, keylen, tail);
+      } else if (r == 10) {
+        // AES 192
+        __ li(t0, 52);
+        __ beq(t0, keylen, tail);
+      }
+    }
+
+    __ bind(tail);
+    __ li(ftp, (intptr_t)fsb_consts);
+    __ alsl_d(key, keylen, key, 2 - 1);
+
+    // Round N
+    for (int i = 0; i < 4; i++) {
+      __ bstrpick_d(t0, ya[(i + 3) & 3], 7, 0);
+      __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8);
+      __ bstrpick_d(t2, ya[(i + 1) & 3], 23, 16);
+      __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24);
+      __ ldx_bu(t0, ftp, t0);
+      __ ldx_bu(t1, ftp, t1);
+      __ ldx_bu(t2, ftp, t2);
+      __ ldx_bu(t3, ftp, t3);
+      __ ld_w(xa[i], key, 4 * i - 16);
+      __ slli_w(t1, t1, 8);
+      __ slli_w(t2, t2, 16);
+      __ slli_w(t3, t3, 24);
+      __ XOR(xa[i], xa[i], t0);
+      __ XOR(t0, t1, t2);
+      __ XOR(xa[i], xa[i], t3);
+      __ XOR(xa[i], xa[i], t0);
+    }
+
+    for (int i = 0; i < 4; i++) {
+      __ revb_2h(xa[i], xa[i]);
+    }
+    for (int i = 0; i < 4; i++) {
+      __ rotri_w(xa[i], xa[i], 16);
+    }
+    for (int i = 0; i < 4; i++) {
+      __ st_w(xa[i], dst, 4 * i);
+    }
+
+    if (cbc) {
+      __ move(key, keyold);
+      __ addi_d(src, src, 16);
+      __ addi_d(dst, dst, 16);
+      __ blt(src, srcend, loop);
+
+      for (int i = 0; i < 4; i++) {
+        __ st_w(xa[i], rve, 4 * i);
+      }
+
+      __ ld_d(S3, SP, 3 * wordSize);
+      __ ld_d(S2, SP, 2 * wordSize);
+      __ ld_d(S1, SP, 1 * wordSize);
+      __ ld_d(S0, SP, 0 * wordSize);
+      __ addi_d(SP, SP, 4 * wordSize);
+
+      __ bind(done);
+      __ move(A0, srclen);
+    }
+
+    __ jr(RA);
+
+    return start;
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   A0        - source byte array address
+  //   A1        - destination byte array address
+  //   A2        - K (key) in little endian int array
+  //   A3        - r vector byte array address
+  //   A4        - input length
+  //
+  // Output:
+  //   A0        - input length
+  //
+  address generate_aescrypt_decryptBlock(bool cbc) {
+    static const uint32_t rt_consts[256] = {
+      0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
+      0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
+      0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
+      0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
+      0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
+      0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
+      0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
+      0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
+      0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
+      0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
+      0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
+      0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
+      0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
+      0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
+      0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
+      0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
+      0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
+      0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
+      0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
+      0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
+      0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
+      0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
+      0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
+      0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
+      0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
+      0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
+      0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
+      0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
+      0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
+      0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
+      0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
+      0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
+      0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
+      0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
+      0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
+      0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
+      0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
+      0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
+      0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
+      0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
+      0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
+      0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
+      0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
+      0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
+      0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
+      0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
+      0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
+      0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
+      0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
+      0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
+      0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
+      0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
+      0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
+      0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
+      0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
+      0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
+      0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
+      0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
+      0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
+      0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
+      0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
+      0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
+      0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
+      0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
+    };
+    static const uint8_t rsb_consts[256] = {
+      0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
+      0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+      0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
+      0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+      0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
+      0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+      0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
+      0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+      0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
+      0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+      0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
+      0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+      0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
+      0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+      0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
+      0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+      0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
+      0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+      0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
+      0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+      0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
+      0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+      0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
+      0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+      0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
+      0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+      0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
+      0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+      0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
+      0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+      0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
+      0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+    };
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+
+    // Allocate registers
+    Register src = A0;
+    Register dst = A1;
+    Register key = A2;
+    Register rve = A3;
+    Register srclen = A4;
+    Register keylen = T8;
+    Register srcend = A5;
+    Register t0 = A6;
+    Register t1 = A7;
+    Register t2, t3, rtp, rvp;
+    Register xa[4] = { T0, T1, T2, T3 };
+    Register ya[4] = { T4, T5, T6, T7 };
+
+    Label loop, tail, done;
+    address start = __ pc();
+
+    if (cbc) {
+      t2 = S0;
+      t3 = S1;
+      rtp = S2;
+      rvp = S3;
+
+      __ beqz(srclen, done);
+
+      __ addi_d(SP, SP, -4 * wordSize);
+      __ st_d(S3, SP, 3 * wordSize);
+      __ st_d(S2, SP, 2 * wordSize);
+      __ st_d(S1, SP, 1 * wordSize);
+      __ st_d(S0, SP, 0 * wordSize);
+
+      __ add_d(srcend, src, srclen);
+      __ move(rvp, rve);
+    } else {
+      t2 = A3;
+      t3 = A4;
+      rtp = A5;
+    }
+
+    __ ld_w(keylen, key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT));
+
+    __ bind(loop);
+
+    // Round 1
+    for (int i = 0; i < 4; i++) {
+      __ ld_w(xa[i], src, 4 * i);
+    }
+    for (int i = 0; i < 4; i++) {
+      __ ld_w(ya[i], key, 4 * (4 + i));
+    }
+    for (int i = 0; i < 4; i++) {
+      __ revb_2h(xa[i], xa[i]);
+    }
+    for (int i = 0; i < 4; i++) {
+      __ rotri_w(xa[i], xa[i], 16);
+    }
+    for (int i = 0; i < 4; i++) {
+      __ XOR(xa[i], xa[i], ya[i]);
+    }
+
+    __ li(rtp, (intptr_t)rt_consts);
+
+    // Round 2 - (N-1)
+    for (int r = 0; r < 14; r++) {
+      Register *xp;
+      Register *yp;
+
+      if (r & 1) {
+        xp = xa;
+        yp = ya;
+      } else {
+        xp = ya;
+        yp = xa;
+      }
+
+      for (int i = 0; i < 4; i++) {
+        __ ld_w(xp[i], key, 4 * (4 * (r + 1) + 4 + i));
+      }
+
+      for (int i = 0; i < 4; i++) {
+        __ bstrpick_d(t0, yp[(i + 1) & 3], 7, 0);
+        __ bstrpick_d(t1, yp[(i + 2) & 3], 15, 8);
+        __ bstrpick_d(t2, yp[(i + 3) & 3], 23, 16);
+        __ bstrpick_d(t3, yp[(i + 0) & 3], 31, 24);
+        __ slli_w(t0, t0, 2);
+        __ slli_w(t1, t1, 2);
+        __ slli_w(t2, t2, 2);
+        __ slli_w(t3, t3, 2);
+        __ ldx_w(t0, rtp, t0);
+        __ ldx_w(t1, rtp, t1);
+        __ ldx_w(t2, rtp, t2);
+        __ ldx_w(t3, rtp, t3);
+        __ rotri_w(t0, t0, 24);
+        __ rotri_w(t1, t1, 16);
+        __ rotri_w(t2, t2, 8);
+        __ XOR(xp[i], xp[i], t0);
+        __ XOR(t0, t1, t2);
+        __ XOR(xp[i], xp[i], t3);
+        __ XOR(xp[i], xp[i], t0);
+      }
+
+      if (r == 8) {
+        // AES 128
+        __ li(t0, 44);
+        __ beq(t0, keylen, tail);
+      } else if (r == 10) {
+        // AES 192
+        __ li(t0, 52);
+        __ beq(t0, keylen, tail);
+      }
+    }
+
+    __ bind(tail);
+    __ li(rtp, (intptr_t)rsb_consts);
+
+    // Round N
+    for (int i = 0; i < 4; i++) {
+      __ bstrpick_d(t0, ya[(i + 1) & 3], 7, 0);
+      __ bstrpick_d(t1, ya[(i + 2) & 3], 15, 8);
+      __ bstrpick_d(t2, ya[(i + 3) & 3], 23, 16);
+      __ bstrpick_d(t3, ya[(i + 0) & 3], 31, 24);
+      __ ldx_bu(t0, rtp, t0);
+      __ ldx_bu(t1, rtp, t1);
+      __ ldx_bu(t2, rtp, t2);
+      __ ldx_bu(t3, rtp, t3);
+      __ ld_w(xa[i], key, 4 * i);
+      __ slli_w(t1, t1, 8);
+      __ slli_w(t2, t2, 16);
+      __ slli_w(t3, t3, 24);
+      __ XOR(xa[i], xa[i], t0);
+      __ XOR(t0, t1, t2);
+      __ XOR(xa[i], xa[i], t3);
+      __ XOR(xa[i], xa[i], t0);
+    }
+
+    if (cbc) {
+      for (int i = 0; i < 4; i++) {
+        __ ld_w(ya[i], rvp, 4 * i);
+      }
+    }
+    for (int i = 0; i < 4; i++) {
+      __ revb_2h(xa[i], xa[i]);
+    }
+    for (int i = 0; i < 4; i++) {
+      __ rotri_w(xa[i], xa[i], 16);
+    }
+    if (cbc) {
+      for (int i = 0; i < 4; i++) {
+        __ XOR(xa[i], xa[i], ya[i]);
+      }
+    }
+    for (int i = 0; i < 4; i++) {
+      __ st_w(xa[i], dst, 4 * i);
+    }
+
+    if (cbc) {
+      __ move(rvp, src);
+      __ addi_d(src, src, 16);
+      __ addi_d(dst, dst, 16);
+      __ blt(src, srcend, loop);
+
+      __ ld_d(t0, src, -16);
+      __ ld_d(t1, src, -8);
+      __ st_d(t0, rve, 0);
+      __ st_d(t1, rve, 8);
+
+      __ ld_d(S3, SP, 3 * wordSize);
+      __ ld_d(S2, SP, 2 * wordSize);
+      __ ld_d(S1, SP, 1 * wordSize);
+      __ ld_d(S0, SP, 0 * wordSize);
+      __ addi_d(SP, SP, 4 * wordSize);
+
+      __ bind(done);
+      __ move(A0, srclen);
+    }
+
+    __ jr(RA);
+
+    return start;
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   A0        - byte[]  source+offset
+  //   A1        - int[]   SHA.state
+  //   A2        - int     offset
+  //   A3        - int     limit
+  //
+  void generate_sha1_implCompress(const char *name, address &entry, address &entry_mb) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    Label keys, loop;
+
+    // Keys
+    __ bind(keys);
+    __ emit_int32(0x5a827999);
+    __ emit_int32(0x6ed9eba1);
+    __ emit_int32(0x8f1bbcdc);
+    __ emit_int32(0xca62c1d6);
+
+    // Allocate registers
+    Register t0 = T5;
+    Register t1 = T6;
+    Register t2 = T7;
+    Register t3 = T8;
+    Register buf = A0;
+    Register state = A1;
+    Register ofs = A2;
+    Register limit = A3;
+    Register ka[4] = { A4, A5, A6, A7 };
+    Register sa[5] = { T0, T1, T2, T3, T4 };
+
+    // Entry
+    entry = __ pc();
+    __ move(ofs, R0);
+    __ move(limit, R0);
+
+    // Entry MB
+    entry_mb = __ pc();
+
+    // Allocate scratch space
+    __ addi_d(SP, SP, -64);
+
+    // Load keys
+    __ lipc(t0, keys);
+    __ ld_w(ka[0], t0, 0);
+    __ ld_w(ka[1], t0, 4);
+    __ ld_w(ka[2], t0, 8);
+    __ ld_w(ka[3], t0, 12);
+
+    __ bind(loop);
+    // Load arguments
+    __ ld_w(sa[0], state, 0);
+    __ ld_w(sa[1], state, 4);
+    __ ld_w(sa[2], state, 8);
+    __ ld_w(sa[3], state, 12);
+    __ ld_w(sa[4], state, 16);
+
+    // 80 rounds of hashing
+    for (int i = 0; i < 80; i++) {
+      Register a = sa[(5 - (i % 5)) % 5];
+      Register b = sa[(6 - (i % 5)) % 5];
+      Register c = sa[(7 - (i % 5)) % 5];
+      Register d = sa[(8 - (i % 5)) % 5];
+      Register e = sa[(9 - (i % 5)) % 5];
+
+      if (i < 16) {
+        __ ld_w(t0, buf, i * 4);
+        __ revb_2h(t0, t0);
+        __ rotri_w(t0, t0, 16);
+        __ add_w(e, e, t0);
+        __ st_w(t0, SP, i * 4);
+        __ XOR(t0, c, d);
+        __ AND(t0, t0, b);
+        __ XOR(t0, t0, d);
+      } else {
+        __ ld_w(t0, SP, ((i - 3) & 0xF) * 4);
+        __ ld_w(t1, SP, ((i - 8) & 0xF) * 4);
+        __ ld_w(t2, SP, ((i - 14) & 0xF) * 4);
+        __ ld_w(t3, SP, ((i - 16) & 0xF) * 4);
+        __ XOR(t0, t0, t1);
+        __ XOR(t0, t0, t2);
+        __ XOR(t0, t0, t3);
+        __ rotri_w(t0, t0, 31);
+        __ add_w(e, e, t0);
+        __ st_w(t0, SP, (i & 0xF) * 4);
+
+        if (i < 20) {
+          __ XOR(t0, c, d);
+          __ AND(t0, t0, b);
+          __ XOR(t0, t0, d);
+        } else if (i < 40 || i >= 60) {
+          __ XOR(t0, b, c);
+          __ XOR(t0, t0, d);
+        } else if (i < 60) {
+          __ OR(t0, c, d);
+          __ AND(t0, t0, b);
+          __ AND(t2, c, d);
+          __ OR(t0, t0, t2);
+        }
+      }
+
+      __ rotri_w(b, b, 2);
+      __ add_w(e, e, t0);
+      __ add_w(e, e, ka[i / 20]);
+      __ rotri_w(t0, a, 27);
+      __ add_w(e, e, t0);
+    }
+
+    // Save updated state
+    __ ld_w(t0, state, 0);
+    __ ld_w(t1, state, 4);
+    __ ld_w(t2, state, 8);
+    __ ld_w(t3, state, 12);
+    __ add_w(sa[0], sa[0], t0);
+    __ ld_w(t0, state, 16);
+    __ add_w(sa[1], sa[1], t1);
+    __ add_w(sa[2], sa[2], t2);
+    __ add_w(sa[3], sa[3], t3);
+    __ add_w(sa[4], sa[4], t0);
+    __ st_w(sa[0], state, 0);
+    __ st_w(sa[1], state, 4);
+    __ st_w(sa[2], state, 8);
+    __ st_w(sa[3], state, 12);
+    __ st_w(sa[4], state, 16);
+
+    __ addi_w(ofs, ofs, 64);
+    __ addi_d(buf, buf, 64);
+    __ bge(limit, ofs, loop);
+    __ move(V0, ofs); // return ofs
+
+    __ addi_d(SP, SP, 64);
+    __ jr(RA);
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   A0        - byte[]  source+offset
+  //   A1        - int[]   SHA.state
+  //   A2        - int     offset
+  //   A3        - int     limit
+  //
+  void generate_sha256_implCompress(const char *name, address &entry, address &entry_mb) {
+    static const uint32_t round_consts[64] = {
+      0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+      0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+      0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+      0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+      0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+      0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+      0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+      0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+      0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+      0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+      0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+      0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+      0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+      0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+      0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+      0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+    };
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    Label loop;
+
+    // Allocate registers
+    Register t0 = A4;
+    Register t1 = A5;
+    Register t2 = A6;
+    Register t3 = A7;
+    Register buf = A0;
+    Register state = A1;
+    Register ofs = A2;
+    Register limit = A3;
+    Register kptr = T8;
+    Register sa[8] = { T0, T1, T2, T3, T4, T5, T6, T7 };
+
+    // Entry
+    entry = __ pc();
+    __ move(ofs, R0);
+    __ move(limit, R0);
+
+    // Entry MB
+    entry_mb = __ pc();
+
+    // Allocate scratch space
+    __ addi_d(SP, SP, -64);
+
+    // Load keys base address
+    __ li(kptr, (intptr_t)round_consts);
+
+    __ bind(loop);
+    // Load state
+    __ ld_w(sa[0], state, 0);
+    __ ld_w(sa[1], state, 4);
+    __ ld_w(sa[2], state, 8);
+    __ ld_w(sa[3], state, 12);
+    __ ld_w(sa[4], state, 16);
+    __ ld_w(sa[5], state, 20);
+    __ ld_w(sa[6], state, 24);
+    __ ld_w(sa[7], state, 28);
+
+    // Do 64 rounds of hashing
+    for (int i = 0; i < 64; i++) {
+      Register a = sa[(0 - i) & 7];
+      Register b = sa[(1 - i) & 7];
+      Register c = sa[(2 - i) & 7];
+      Register d = sa[(3 - i) & 7];
+      Register e = sa[(4 - i) & 7];
+      Register f = sa[(5 - i) & 7];
+      Register g = sa[(6 - i) & 7];
+      Register h = sa[(7 - i) & 7];
+
+      if (i < 16) {
+        __ ld_w(t1, buf, i * 4);
+        __ revb_2h(t1, t1);
+        __ rotri_w(t1, t1, 16);
+      } else {
+        __ ld_w(t0, SP, ((i - 15) & 0xF) * 4);
+        __ ld_w(t1, SP, ((i - 16) & 0xF) * 4);
+        __ ld_w(t2, SP, ((i - 7) & 0xF) * 4);
+        __ add_w(t1, t1, t2);
+        __ rotri_w(t2, t0, 18);
+        __ srli_w(t3, t0, 3);
+        __ rotri_w(t0, t0, 7);
+        __ XOR(t2, t2, t3);
+        __ XOR(t0, t0, t2);
+        __ add_w(t1, t1, t0);
+        __ ld_w(t0, SP, ((i - 2) & 0xF) * 4);
+        __ rotri_w(t2, t0, 19);
+        __ srli_w(t3, t0, 10);
+        __ rotri_w(t0, t0, 17);
+        __ XOR(t2, t2, t3);
+        __ XOR(t0, t0, t2);
+        __ add_w(t1, t1, t0);
+      }
+
+      __ rotri_w(t2, e, 11);
+      __ rotri_w(t3, e, 25);
+      __ rotri_w(t0, e, 6);
+      __ XOR(t2, t2, t3);
+      __ XOR(t0, t0, t2);
+      __ XOR(t2, g, f);
+      __ ld_w(t3, kptr, i * 4);
+      __ AND(t2, t2, e);
+      __ XOR(t2, t2, g);
+      __ add_w(t0, t0, t2);
+      __ add_w(t0, t0, t3);
+      __ add_w(h, h, t1);
+      __ add_w(h, h, t0);
+      __ add_w(d, d, h);
+      __ rotri_w(t2, a, 13);
+      __ rotri_w(t3, a, 22);
+      __ rotri_w(t0, a, 2);
+      __ XOR(t2, t2, t3);
+      __ XOR(t0, t0, t2);
+      __ add_w(h, h, t0);
+      __ OR(t0, c, b);
+      __ AND(t2, c, b);
+      __ AND(t0, t0, a);
+      __ OR(t0, t0, t2);
+      __ add_w(h, h, t0);
+      __ st_w(t1, SP, (i & 0xF) * 4);
+    }
+
+    // Add to state
+    __ ld_w(t0, state, 0);
+    __ ld_w(t1, state, 4);
+    __ ld_w(t2, state, 8);
+    __ ld_w(t3, state, 12);
+    __ add_w(sa[0], sa[0], t0);
+    __ add_w(sa[1], sa[1], t1);
+    __ add_w(sa[2], sa[2], t2);
+    __ add_w(sa[3], sa[3], t3);
+    __ ld_w(t0, state, 16);
+    __ ld_w(t1, state, 20);
+    __ ld_w(t2, state, 24);
+    __ ld_w(t3, state, 28);
+    __ add_w(sa[4], sa[4], t0);
+    __ add_w(sa[5], sa[5], t1);
+    __ add_w(sa[6], sa[6], t2);
+    __ add_w(sa[7], sa[7], t3);
+    __ st_w(sa[0], state, 0);
+    __ st_w(sa[1], state, 4);
+    __ st_w(sa[2], state, 8);
+    __ st_w(sa[3], state, 12);
+    __ st_w(sa[4], state, 16);
+    __ st_w(sa[5], state, 20);
+    __ st_w(sa[6], state, 24);
+    __ st_w(sa[7], state, 28);
+
+    __ addi_w(ofs, ofs, 64);
+    __ addi_d(buf, buf, 64);
+    __ bge(limit, ofs, loop);
+    __ move(V0, ofs); // return ofs
+
+    __ addi_d(SP, SP, 64);
+    __ jr(RA);
+  }
+
+  // Do NOT delete this node which stands for stub routine placeholder
+  address generate_updateBytesCRC32() {
+    assert(UseCRC32Intrinsics, "need CRC32 instructions support");
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");
+
+    address start = __ pc();
+
+    const Register crc = A0;  // crc
+    const Register buf = A1;  // source java byte array address
+    const Register len = A2;  // length
+    const Register tmp = A3;
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    __ kernel_crc32(crc, buf, len, tmp);
+
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ jr(RA);
+
+    return start;
+  }
+
+  // add a function to implement SafeFetch32 and SafeFetchN
+  void generate_safefetch(const char* name, int size, address* entry,
+                          address* fault_pc, address* continuation_pc) {
+    // safefetch signatures:
+    //   int      SafeFetch32(int*      adr, int      errValue);
+    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
+    //
+    // arguments:
+    //   A0 = adr
+    //   A1 = errValue
+    //
+    // result:
+    //   PPC_RET  = *adr or errValue
+    StubCodeMark mark(this, "StubRoutines", name);
+
+    // Entry point, pc or function descriptor.
+    *entry = __ pc();
+
+    // Load *adr into A1, may fault.
+    *fault_pc = __ pc();
+    switch (size) {
+      case 4:
+        // int32_t
+        __ ld_w(A1, A0, 0);
+        break;
+      case 8:
+        // int64_t
+        __ ld_d(A1, A0, 0);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+
+    // return errValue or *adr
+    *continuation_pc = __ pc();
+    __ add_d(V0, A1, R0);
+    __ jr(RA);
+  }
+
+
+#undef __
+#define __ masm->
+
+  // Continuation point for throwing of implicit exceptions that are
+  // not handled in the current activation. Fabricates an exception
+  // oop and initiates normal exception dispatching in this
+  // frame. Since we need to preserve callee-saved values (currently
+  // only for C2, but done for C1 as well) we need a callee-saved oop
+  // map and therefore have to make these stubs into RuntimeStubs
+  // rather than BufferBlobs.  If the compiler needs all registers to
+  // be preserved between the fault point and the exception handler
+  // then it must assume responsibility for that in
+  // AbstractCompiler::continuation_for_implicit_null_exception or
+  // continuation_for_implicit_division_by_zero_exception. All other
+  // implicit exceptions (e.g., NullPointerException or
+  // AbstractMethodError on entry) are either at call sites or
+  // otherwise assume that stack unwinding will be initiated, so
+  // caller saved registers were assumed volatile in the compiler.
+  address generate_throw_exception(const char* name,
+                                   address runtime_entry,
+                                   bool restore_saved_exception_pc) {
+    // Information about frame layout at time of blocking runtime call.
+    // Note that we only have to preserve callee-saved registers since
+    // the compilers are responsible for supplying a continuation point
+    // if they expect all registers to be preserved.
+    enum layout {
+      thread_off,    // last_java_sp
+      S7_off,        // callee saved register      sp + 1
+      S6_off,        // callee saved register      sp + 2
+      S5_off,        // callee saved register      sp + 3
+      S4_off,        // callee saved register      sp + 4
+      S3_off,        // callee saved register      sp + 5
+      S2_off,        // callee saved register      sp + 6
+      S1_off,        // callee saved register      sp + 7
+      S0_off,        // callee saved register      sp + 8
+      FP_off,
+      ret_address,
+      framesize
+    };
+
+    int insts_size = 2048;
+    int locs_size  = 32;
+
+    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
+    //  NULL, NULL, NULL, false, NULL, name, false);
+    CodeBuffer code (name , insts_size, locs_size);
+    OopMapSet* oop_maps  = new OopMapSet();
+    MacroAssembler* masm = new MacroAssembler(&code);
+
+    address start = __ pc();
+
+    // This is an inlined and slightly modified version of call_VM
+    // which has the ability to fetch the return PC out of
+    // thread-local storage and also sets up last_Java_sp slightly
+    // differently than the real call_VM
+#ifndef OPT_THREAD
+    Register java_thread = TREG;
+    __ get_thread(java_thread);
+#else
+    Register java_thread = TREG;
+#endif
+    if (restore_saved_exception_pc) {
+      __ ld_d(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
+    }
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    __ addi_d(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
+    __ st_d(S0, SP, S0_off * wordSize);
+    __ st_d(S1, SP, S1_off * wordSize);
+    __ st_d(S2, SP, S2_off * wordSize);
+    __ st_d(S3, SP, S3_off * wordSize);
+    __ st_d(S4, SP, S4_off * wordSize);
+    __ st_d(S5, SP, S5_off * wordSize);
+    __ st_d(S6, SP, S6_off * wordSize);
+    __ st_d(S7, SP, S7_off * wordSize);
+
+    int frame_complete = __ pc() - start;
+    // push java thread (becomes first argument of C function)
+    __ st_d(java_thread, SP, thread_off * wordSize);
+    if (java_thread != A0)
+      __ move(A0, java_thread);
+
+    // Set up last_Java_sp and last_Java_fp
+    Label before_call;
+    address the_pc = __ pc();
+    __ bind(before_call);
+    __ set_last_Java_frame(java_thread, SP, FP, before_call);
+    // Align stack
+    __ li(AT, -(StackAlignmentInBytes));
+    __ andr(SP, SP, AT);
+
+    // Call runtime
+    // TODO: confirm reloc
+    __ call(runtime_entry, relocInfo::runtime_call_type);
+    // Generate oop map
+    OopMap* map =  new OopMap(framesize, 0);
+    oop_maps->add_gc_map(the_pc - start,  map);
+
+    // restore the thread (cannot use the pushed argument since arguments
+    // may be overwritten by C code generated by an optimizing compiler);
+    // however can use the register value directly if it is callee saved.
+#ifndef OPT_THREAD
+    __ get_thread(java_thread);
+#endif
+
+    __ ld_d(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+    __ reset_last_Java_frame(java_thread, true);
+
+    // Restore callee save registers.  This must be done after resetting the Java frame
+    __ ld_d(S0, SP, S0_off * wordSize);
+    __ ld_d(S1, SP, S1_off * wordSize);
+    __ ld_d(S2, SP, S2_off * wordSize);
+    __ ld_d(S3, SP, S3_off * wordSize);
+    __ ld_d(S4, SP, S4_off * wordSize);
+    __ ld_d(S5, SP, S5_off * wordSize);
+    __ ld_d(S6, SP, S6_off * wordSize);
+    __ ld_d(S7, SP, S7_off * wordSize);
+
+    // discard arguments
+    __ move(SP, FP); // epilog
+    __ pop(FP);
+    // check for pending exceptions
+#ifdef ASSERT
+    Label L;
+    __ ld_d(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
+    __ bne(AT, R0, L);
+    __ should_not_reach_here();
+    __ bind(L);
+#endif //ASSERT
+    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+
+    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
+                                                      &code,
+                                                      frame_complete,
+                                                      framesize,
+                                                      oop_maps, false);
+    return stub->entry_point();
+  }
+
+  class MontgomeryMultiplyGenerator : public MacroAssembler {
+
+    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Rlen2, Ra, Rb, Rm,
+      Rn, Iam, Ibn, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
+
+    bool _squaring;
+
+  public:
+    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
+      : MacroAssembler(as->code()), _squaring(squaring) {
+
+      // Register allocation
+
+      Register reg = A0;
+      Pa_base = reg;      // Argument registers:
+      if (squaring)
+        Pb_base = Pa_base;
+      else
+        Pb_base = ++reg;
+      Pn_base = ++reg;
+      Rlen = ++reg;
+      inv = ++reg;
+      Rlen2 = inv;        // Reuse inv
+      Pm_base = ++reg;
+
+                          // Working registers:
+      Ra = ++reg;         // The current digit of a, b, n, and m.
+      Rb = ++reg;
+      Rm = ++reg;
+      Rn = ++reg;
+
+      Iam = ++reg;        // Index to the current/next digit of a, b, n, and m.
+      Ibn = ++reg;
+
+      t0 = ++reg;         // Three registers which form a
+      t1 = ++reg;         // triple-precision accumuator.
+      t2 = ++reg;
+
+      Ri = ++reg;         // Inner and outer loop indexes.
+      Rj = ++reg;
+
+      if (squaring) {
+        Rhi_ab = ++reg;   // Product registers: low and high parts
+        reg = S0;
+        Rlo_ab = ++reg;   // of a*b and m*n.
+      } else {
+        reg = S0;
+        Rhi_ab = reg;     // Product registers: low and high parts
+        Rlo_ab = ++reg;   // of a*b and m*n.
+      }
+
+      Rhi_mn = ++reg;
+      Rlo_mn = ++reg;
+    }
+
+  private:
+    void enter() {
+      addi_d(SP, SP, -6 * wordSize);
+      st_d(FP, SP, 0 * wordSize);
+      move(FP, SP);
+    }
+
+    void leave() {
+      addi_d(T0, FP, 6 * wordSize);
+      ld_d(FP, FP, 0 * wordSize);
+      move(SP, T0);
+    }
+
+    void save_regs() {
+      if (!_squaring)
+        st_d(Rhi_ab, FP, 5 * wordSize);
+      st_d(Rlo_ab, FP, 4 * wordSize);
+      st_d(Rhi_mn, FP, 3 * wordSize);
+      st_d(Rlo_mn, FP, 2 * wordSize);
+      st_d(Pm_base, FP, 1 * wordSize);
+    }
+
+    void restore_regs() {
+      if (!_squaring)
+        ld_d(Rhi_ab, FP, 5 * wordSize);
+      ld_d(Rlo_ab, FP, 4 * wordSize);
+      ld_d(Rhi_mn, FP, 3 * wordSize);
+      ld_d(Rlo_mn, FP, 2 * wordSize);
+      ld_d(Pm_base, FP, 1 * wordSize);
+    }
+
+    template <typename T>
+    void unroll_2(Register count, T block, Register tmp) {
+      Label loop, end, odd;
+      andi(tmp, count, 1);
+      bnez(tmp, odd);
+      beqz(count, end);
+      align(16);
+      bind(loop);
+      (this->*block)();
+      bind(odd);
+      (this->*block)();
+      addi_w(count, count, -2);
+      blt(R0, count, loop);
+      bind(end);
+    }
+
+    template <typename T>
+    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
+      Label loop, end, odd;
+      andi(tmp, count, 1);
+      bnez(tmp, odd);
+      beqz(count, end);
+      align(16);
+      bind(loop);
+      (this->*block)(d, s, tmp);
+      bind(odd);
+      (this->*block)(d, s, tmp);
+      addi_w(count, count, -2);
+      blt(R0, count, loop);
+      bind(end);
+    }
+
+    void acc(Register Rhi, Register Rlo,
+             Register t0, Register t1, Register t2, Register t, Register c) {
+      add_d(t0, t0, Rlo);
+      OR(t, t1, Rhi);
+      sltu(c, t0, Rlo);
+      add_d(t1, t1, Rhi);
+      add_d(t1, t1, c);
+      sltu(c, t1, t);
+      add_d(t2, t2, c);
+    }
+
+    void pre1(Register i) {
+      block_comment("pre1");
+      // Iam = 0;
+      // Ibn = i;
+
+      slli_w(Ibn, i, LogBytesPerWord);
+
+      // Ra = Pa_base[Iam];
+      // Rb = Pb_base[Ibn];
+      // Rm = Pm_base[Iam];
+      // Rn = Pn_base[Ibn];
+
+      ld_d(Ra, Pa_base, 0);
+      ldx_d(Rb, Pb_base, Ibn);
+      ld_d(Rm, Pm_base, 0);
+      ldx_d(Rn, Pn_base, Ibn);
+
+      move(Iam, R0);
+
+      // Zero the m*n result.
+      move(Rhi_mn, R0);
+      move(Rlo_mn, R0);
+    }
+
+    // The core multiply-accumulate step of a Montgomery
+    // multiplication.  The idea is to schedule operations as a
+    // pipeline so that instructions with long latencies (loads and
+    // multiplies) have time to complete before their results are
+    // used.  This most benefits in-order implementations of the
+    // architecture but out-of-order ones also benefit.
+    void step() {
+      block_comment("step");
+      // MACC(Ra, Rb, t0, t1, t2);
+      // Ra = Pa_base[++Iam];
+      // Rb = Pb_base[--Ibn];
+      addi_d(Iam, Iam, wordSize);
+      addi_d(Ibn, Ibn, -wordSize);
+      mul_d(Rlo_ab, Ra, Rb);
+      mulh_du(Rhi_ab, Ra, Rb);
+      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb); // The pending m*n from the
+                                               // previous iteration.
+      ldx_d(Ra, Pa_base, Iam);
+      ldx_d(Rb, Pb_base, Ibn);
+
+      // MACC(Rm, Rn, t0, t1, t2);
+      // Rm = Pm_base[Iam];
+      // Rn = Pn_base[Ibn];
+      mul_d(Rlo_mn, Rm, Rn);
+      mulh_du(Rhi_mn, Rm, Rn);
+      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Rm, Rn);
+      ldx_d(Rm, Pm_base, Iam);
+      ldx_d(Rn, Pn_base, Ibn);
+    }
+
+    void post1() {
+      block_comment("post1");
+
+      // MACC(Ra, Rb, t0, t1, t2);
+      mul_d(Rlo_ab, Ra, Rb);
+      mulh_du(Rhi_ab, Ra, Rb);
+      acc(Rhi_mn, Rlo_mn, t0, t1, t2, Ra, Rb);  // The pending m*n
+      acc(Rhi_ab, Rlo_ab, t0, t1, t2, Ra, Rb);
+
+      // Pm_base[Iam] = Rm = t0 * inv;
+      mul_d(Rm, t0, inv);
+      stx_d(Rm, Pm_base, Iam);
+
+      // MACC(Rm, Rn, t0, t1, t2);
+      // t0 = t1; t1 = t2; t2 = 0;
+      mulh_du(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+      // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
+      {
+        mul_d(Rlo_mn, Rm, Rn);
+        add_d(Rlo_mn, t0, Rlo_mn);
+        Label ok;
+        beqz(Rlo_mn, ok); {
+          stop("broken Montgomery multiply");
+        } bind(ok);
+      }
+#endif
+
+      // We have very carefully set things up so that
+      // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
+      // the lower half of Rm * Rn because we know the result already:
+      // it must be -t0.  t0 + (-t0) must generate a carry iff
+      // t0 != 0.  So, rather than do a mul and an adds we just set
+      // the carry flag iff t0 is nonzero.
+      //
+      // mul_d(Rlo_mn, Rm, Rn);
+      // add_d(t0, t0, Rlo_mn);
+      OR(Ra, t1, Rhi_mn);
+      sltu(Rb, R0, t0);
+      add_d(t0, t1, Rhi_mn);
+      add_d(t0, t0, Rb);
+      sltu(Rb, t0, Ra);
+      add_d(t1, t2, Rb);
+      move(t2, R0);
+    }
+
+    void pre2(Register i, Register len) {
+      block_comment("pre2");
+
+      // Rj == i-len
+      sub_w(Rj, i, len);
+
+      // Iam = i - len;
+      // Ibn = len;
+      slli_w(Iam, Rj, LogBytesPerWord);
+      slli_w(Ibn, len, LogBytesPerWord);
+
+      // Ra = Pa_base[++Iam];
+      // Rb = Pb_base[--Ibn];
+      // Rm = Pm_base[++Iam];
+      // Rn = Pn_base[--Ibn];
+      addi_d(Iam, Iam, wordSize);
+      addi_d(Ibn, Ibn, -wordSize);
+
+      ldx_d(Ra, Pa_base, Iam);
+      ldx_d(Rb, Pb_base, Ibn);
+      ldx_d(Rm, Pm_base, Iam);
+      ldx_d(Rn, Pn_base, Ibn);
+
+      move(Rhi_mn, R0);
+      move(Rlo_mn, R0);
+    }
+
+    void post2(Register i, Register len) {
+      block_comment("post2");
+
+      sub_w(Rj, i, len);
+      slli_w(Iam, Rj, LogBytesPerWord);
+
+      add_d(t0, t0, Rlo_mn); // The pending m*n, low part
+
+      // As soon as we know the least significant digit of our result,
+      // store it.
+      // Pm_base[i-len] = t0;
+      stx_d(t0, Pm_base, Iam);
+
+      // t0 = t1; t1 = t2; t2 = 0;
+      OR(Ra, t1, Rhi_mn);
+      sltu(Rb, t0, Rlo_mn);
+      add_d(t0, t1, Rhi_mn); // The pending m*n, high part
+      add_d(t0, t0, Rb);
+      sltu(Rb, t0, Ra);
+      add_d(t1, t2, Rb);
+      move(t2, R0);
+    }
+
+    // A carry in t0 after Montgomery multiplication means that we
+    // should subtract multiples of n from our result in m.  We'll
+    // keep doing that until there is no carry.
+    void normalize(Register len) {
+      block_comment("normalize");
+      // while (t0)
+      //   t0 = sub(Pm_base, Pn_base, t0, len);
+      Label loop, post, again;
+      Register cnt = t1, i = t2, b = Ra, t = Rb; // Re-use registers; we're done with them now
+      beqz(t0, post); {
+        bind(again); {
+          move(i, R0);
+          move(b, R0);
+          slli_w(cnt, len, LogBytesPerWord);
+          align(16);
+          bind(loop); {
+            ldx_d(Rm, Pm_base, i);
+            ldx_d(Rn, Pn_base, i);
+            sltu(t, Rm, b);
+            sub_d(Rm, Rm, b);
+            sltu(b, Rm, Rn);
+            sub_d(Rm, Rm, Rn);
+            OR(b, b, t);
+            stx_d(Rm, Pm_base, i);
+            addi_w(i, i, BytesPerWord);
+          } blt(i, cnt, loop);
+          sub_d(t0, t0, b);
+        } bnez(t0, again);
+      } bind(post);
+    }
+
+    // Move memory at s to d, reversing words.
+    //    Increments d to end of copied memory
+    //    Destroys tmp1, tmp2, tmp3
+    //    Preserves len
+    //    Leaves s pointing to the address which was in d at start
+    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
+      assert(tmp1 < S0 && tmp2 < S0, "register corruption");
+
+      alsl_d(s, len, s, LogBytesPerWord - 1);
+      move(tmp1, len);
+      unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
+      slli_w(s, len, LogBytesPerWord);
+      sub_d(s, d, s);
+    }
+
+    // where
+    void reverse1(Register d, Register s, Register tmp) {
+      ld_d(tmp, s, -wordSize);
+      addi_d(s, s, -wordSize);
+      addi_d(d, d, wordSize);
+      rotri_d(tmp, tmp, 32);
+      st_d(tmp, d, -wordSize);
+    }
+
+  public:
+    /**
+     * Fast Montgomery multiplication.  The derivation of the
+     * algorithm is in A Cryptographic Library for the Motorola
+     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
+     *
+     * Arguments:
+     *
+     * Inputs for multiplication:
+     *   A0   - int array elements a
+     *   A1   - int array elements b
+     *   A2   - int array elements n (the modulus)
+     *   A3   - int length
+     *   A4   - int inv
+     *   A5   - int array elements m (the result)
+     *
+     * Inputs for squaring:
+     *   A0   - int array elements a
+     *   A1   - int array elements n (the modulus)
+     *   A2   - int length
+     *   A3   - int inv
+     *   A4   - int array elements m (the result)
+     *
+     */
+    address generate_multiply() {
+      Label argh, nothing;
+      bind(argh);
+      stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+      align(CodeEntryAlignment);
+      address entry = pc();
+
+      beqz(Rlen, nothing);
+
+      enter();
+
+      // Make room.
+      sltui(Ra, Rlen, 513);
+      beqz(Ra, argh);
+      slli_w(Ra, Rlen, exact_log2(4 * sizeof (jint)));
+      sub_d(Ra, SP, Ra);
+
+      srli_w(Rlen, Rlen, 1); // length in longwords = len/2
+
+      {
+        // Copy input args, reversing as we go.  We use Ra as a
+        // temporary variable.
+        reverse(Ra, Pa_base, Rlen, t0, t1);
+        if (!_squaring)
+          reverse(Ra, Pb_base, Rlen, t0, t1);
+        reverse(Ra, Pn_base, Rlen, t0, t1);
+      }
+
+      // Push all call-saved registers and also Pm_base which we'll need
+      // at the end.
+      save_regs();
+
+#ifndef PRODUCT
+      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+      {
+        ld_d(Rn, Pn_base, 0);
+        li(t0, -1);
+        mul_d(Rlo_mn, Rn, inv);
+        Label ok;
+        beq(Rlo_mn, t0, ok); {
+          stop("broken inverse in Montgomery multiply");
+        } bind(ok);
+      }
+#endif
+
+      move(Pm_base, Ra);
+
+      move(t0, R0);
+      move(t1, R0);
+      move(t2, R0);
+
+      block_comment("for (int i = 0; i < len; i++) {");
+      move(Ri, R0); {
+        Label loop, end;
+        bge(Ri, Rlen, end);
+
+        bind(loop);
+        pre1(Ri);
+
+        block_comment("  for (j = i; j; j--) {"); {
+          move(Rj, Ri);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
+        } block_comment("  } // j");
+
+        post1();
+        addi_w(Ri, Ri, 1);
+        blt(Ri, Rlen, loop);
+        bind(end);
+        block_comment("} // i");
+      }
+
+      block_comment("for (int i = len; i < 2*len; i++) {");
+      move(Ri, Rlen);
+      slli_w(Rlen2, Rlen, 1); {
+        Label loop, end;
+        bge(Ri, Rlen2, end);
+
+        bind(loop);
+        pre2(Ri, Rlen);
+
+        block_comment("  for (j = len*2-i-1; j; j--) {"); {
+          sub_w(Rj, Rlen2, Ri);
+          addi_w(Rj, Rj, -1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step, Rlo_ab);
+        } block_comment("  } // j");
+
+        post2(Ri, Rlen);
+        addi_w(Ri, Ri, 1);
+        blt(Ri, Rlen2, loop);
+        bind(end);
+      }
+      block_comment("} // i");
+
+      normalize(Rlen);
+
+      move(Ra, Pm_base);  // Save Pm_base in Ra
+      restore_regs();  // Restore caller's Pm_base
+
+      // Copy our result into caller's Pm_base
+      reverse(Pm_base, Ra, Rlen, t0, t1);
+
+      leave();
+      bind(nothing);
+      jr(RA);
+
+      return entry;
+    }
+    // In C, approximately:
+
+    // void
+    // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[],
+    //                     unsigned long Pn_base[], unsigned long Pm_base[],
+    //                     unsigned long inv, int len) {
+    //   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+    //   unsigned long Ra, Rb, Rn, Rm;
+    //   int i, Iam, Ibn;
+
+    //   assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
+
+    //   for (i = 0; i < len; i++) {
+    //     int j;
+
+    //     Iam = 0;
+    //     Ibn = i;
+
+    //     Ra = Pa_base[Iam];
+    //     Rb = Pb_base[Iam];
+    //     Rm = Pm_base[Ibn];
+    //     Rn = Pn_base[Ibn];
+
+    //     int iters = i;
+    //     for (j = 0; iters--; j++) {
+    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
+    //       MACC(Ra, Rb, t0, t1, t2);
+    //       Ra = Pa_base[++Iam];
+    //       Rb = pb_base[--Ibn];
+    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+    //       MACC(Rm, Rn, t0, t1, t2);
+    //       Rm = Pm_base[++Iam];
+    //       Rn = Pn_base[--Ibn];
+    //     }
+
+    //     assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be");
+    //     MACC(Ra, Rb, t0, t1, t2);
+    //     Pm_base[Iam] = Rm = t0 * inv;
+    //     assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
+    //     MACC(Rm, Rn, t0, t1, t2);
+
+    //     assert(t0 == 0, "broken Montgomery multiply");
+
+    //     t0 = t1; t1 = t2; t2 = 0;
+    //   }
+
+    //   for (i = len; i < 2*len; i++) {
+    //     int j;
+
+    //     Iam = i - len;
+    //     Ibn = len;
+
+    //     Ra = Pa_base[++Iam];
+    //     Rb = Pb_base[--Ibn];
+    //     Rm = Pm_base[++Iam];
+    //     Rn = Pn_base[--Ibn];
+
+    //     int iters = len*2-i-1;
+    //     for (j = i-len+1; iters--; j++) {
+    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
+    //       MACC(Ra, Rb, t0, t1, t2);
+    //       Ra = Pa_base[++Iam];
+    //       Rb = Pb_base[--Ibn];
+    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+    //       MACC(Rm, Rn, t0, t1, t2);
+    //       Rm = Pm_base[++Iam];
+    //       Rn = Pn_base[--Ibn];
+    //     }
+
+    //     Pm_base[i-len] = t0;
+    //     t0 = t1; t1 = t2; t2 = 0;
+    //   }
+
+    //   while (t0)
+    //     t0 = sub(Pm_base, Pn_base, t0, len);
+    // }
+  };
+
+  // Initialization
+  void generate_initial() {
+    // Generates all stubs and initializes the entry points
+
+    //-------------------------------------------------------------
+    //-----------------------------------------------------------
+    // entry points that exist in all platforms
+    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
+    // than the disadvantage of having a much more complicated generator structure.
+    // See also comment in stubRoutines.hpp.
+    StubRoutines::_forward_exception_entry = generate_forward_exception();
+    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
+    // is referenced by megamorphic call
+    StubRoutines::_catch_exception_entry = generate_catch_exception();
+
+    StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
+
+    StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception",
+                                                                              CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
+  }
+
+  void generate_all() {
+    // Generates all stubs and initializes the entry points
+
+    // These entry points require SharedInfo::stack0 to be set up in
+    // non-core builds and need to be relocatable, so they each
+    // fabricate a RuntimeStub internally.
+    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
+                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
+
+    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
+                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
+
+    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
+                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
+
+    // entry points that are platform specific
+
+    // support for verify_oop (must happen after universe_init)
+    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
+#ifndef CORE
+    // arraycopy stubs used by compilers
+    generate_arraycopy_stubs();
+#endif
+
+    // Safefetch stubs.
+    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
+                                                       &StubRoutines::_safefetch32_fault_pc,
+                                                       &StubRoutines::_safefetch32_continuation_pc);
+    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
+                                                       &StubRoutines::_safefetchN_fault_pc,
+                                                       &StubRoutines::_safefetchN_continuation_pc);
+
+    if (UseMontgomeryMultiplyIntrinsic) {
+      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
+      MontgomeryMultiplyGenerator g(_masm, false /* squaring */);
+      StubRoutines::_montgomeryMultiply = g.generate_multiply();
+    }
+
+    if (UseMontgomerySquareIntrinsic) {
+      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
+      MontgomeryMultiplyGenerator g(_masm, true /* squaring */);
+      // We use generate_multiply() rather than generate_square()
+      // because it's faster for the sizes of modulus we care about.
+      StubRoutines::_montgomerySquare = g.generate_multiply();
+    }
+
+    if (UseAESIntrinsics) {
+      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false);
+      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(false);
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_aescrypt_encryptBlock(true);
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_aescrypt_decryptBlock(true);
+    }
+
+    if (UseSHA1Intrinsics) {
+      generate_sha1_implCompress("sha1_implCompress", StubRoutines::_sha1_implCompress, StubRoutines::_sha1_implCompressMB);
+    }
+
+    if (UseSHA256Intrinsics) {
+      generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB);
+    }
+
+    if (UseCRC32Intrinsics) {
+      // set table address before stub generation which use it
+      StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table;
+      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
+    }
+  }
+
+ public:
+  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
+    if (all) {
+      generate_all();
+    } else {
+      generate_initial();
+    }
+  }
+}; // end class declaration
+
+void StubGenerator_generate(CodeBuffer* code, bool all) {
+  StubGenerator g(code, all);
+}
diff --git a/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp
new file mode 100644
index 00000000000..f0f3d55a4ea
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.cpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+
+// a description of how to extend it, see the stubRoutines.hpp file.
+
+//find the last fp value
+address StubRoutines::la::_call_stub_compiled_return                        = NULL;
+
+/**
+ *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
+ */
+juint StubRoutines::la::_crc_table[] =
+{
+    // Table 0
+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+    0x2d02ef8dUL,
+
+    // Table 1
+    0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
+    0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
+    0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
+    0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
+    0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
+    0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
+    0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
+    0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
+    0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
+    0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
+    0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
+    0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
+    0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
+    0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
+    0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
+    0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
+    0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
+    0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
+    0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
+    0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
+    0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
+    0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
+    0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
+    0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
+    0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
+    0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
+    0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
+    0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
+    0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
+    0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
+    0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
+    0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
+    0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
+    0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
+    0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
+    0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
+    0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
+    0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
+    0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
+    0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
+    0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
+    0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
+    0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
+    0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
+    0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
+    0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
+    0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
+    0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
+    0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
+    0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
+    0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
+    0x9324fd72UL,
+
+    // Table 2
+    0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
+    0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
+    0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
+    0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
+    0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
+    0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
+    0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
+    0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
+    0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
+    0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
+    0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
+    0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
+    0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
+    0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
+    0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
+    0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
+    0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
+    0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
+    0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
+    0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
+    0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
+    0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
+    0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
+    0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
+    0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
+    0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
+    0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
+    0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
+    0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
+    0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
+    0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
+    0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
+    0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
+    0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
+    0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
+    0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
+    0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
+    0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
+    0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
+    0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
+    0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
+    0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
+    0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
+    0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
+    0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
+    0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
+    0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
+    0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
+    0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
+    0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
+    0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
+    0xbe9834edUL,
+
+    // Table 3
+    0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
+    0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
+    0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
+    0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
+    0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
+    0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
+    0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
+    0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
+    0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
+    0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
+    0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
+    0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
+    0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
+    0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
+    0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
+    0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
+    0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
+    0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
+    0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
+    0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
+    0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
+    0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
+    0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
+    0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
+    0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
+    0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
+    0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
+    0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
+    0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
+    0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
+    0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
+    0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
+    0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
+    0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
+    0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
+    0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
+    0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
+    0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
+    0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
+    0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
+    0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
+    0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
+    0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
+    0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
+    0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
+    0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
+    0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
+    0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
+    0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
+    0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
+    0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
+    0xde0506f1UL,
+    // Constants for Neon CRC232 implementation
+    // k3 = 0x78ED02D5 = x^288 mod poly - bit reversed
+    // k4 = 0xED627DAE = x^256 mod poly - bit reversed
+    0x78ED02D5UL, 0xED627DAEUL,         // k4:k3
+    0xED78D502UL, 0x62EDAE7DUL,         // byte swap
+    0x02D578EDUL, 0x7DAEED62UL,         // word swap
+    0xD502ED78UL, 0xAE7D62EDUL,         // byte swap of word swap
+};
diff --git a/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp
new file mode 100644
index 00000000000..d020a527e49
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/stubRoutines_loongarch_64.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP
+#define CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to
+// extend it.
+
+static bool    returns_to_call_stub(address return_pc){
+  return return_pc == _call_stub_return_address||return_pc == la::get_call_stub_compiled_return();
+}
+
+enum platform_dependent_constants {
+  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
+  code_size2 = 60000    // simply increase if too small (assembler will crash if too small)
+};
+
+class la {
+  friend class StubGenerator;
+  friend class VMStructs;
+ private:
+  // If we call compiled code directly from the call stub we will
+  // need to adjust the return back to the call stub to a specialized
+  // piece of code that can handle compiled results and cleaning the fpu
+  // stack. The variable holds that location.
+  static address _call_stub_compiled_return;
+  static juint   _crc_table[];
+
+public:
+  // Call back points for traps in compiled code
+  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
+  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
+
+};
+
+#endif // CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp
new file mode 100644
index 00000000000..213e69b0b21
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/templateInterpreterGenerator_loongarch.hpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP
+
+ protected:
+
+ void generate_fixed_frame(bool native_call);
+
+ // address generate_asm_interpreter_entry(bool synchronized);
+
+#endif // CPU_LOONGARCH_VM_TEMPLATEINTERPRETERGENERATOR_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp
new file mode 100644
index 00000000000..39e3ad7bb57
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP
+
+
+  protected:
+
+  // Size of interpreter code.  Increase if too small.  Interpreter will
+  // fail with a guarantee ("not enough space for interpreter generation");
+  // if too small.
+  // Run with +PrintInterpreter to get the VM to print out the size.
+  // Max size with JVMTI
+  // The sethi() instruction generates lots more instructions when shell
+  // stack limit is unlimited, so that's why this is much bigger.
+  const static int InterpreterCodeSize = 500 * K;
+
+#endif // CPU_LOONGARCH_VM_TEMPLATEINTERPRETER_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp
new file mode 100644
index 00000000000..b25086a3997
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/templateInterpreter_loongarch_64.cpp
@@ -0,0 +1,2335 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterGenerator.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+#ifndef CC_INTERP
+
+// asm based interpreter deoptimization helpers
+int AbstractInterpreter::size_activation(int max_stack,
+                                         int temps,
+                                         int extra_args,
+                                         int monitors,
+                                         int callee_params,
+                                         int callee_locals,
+                                         bool is_top_frame) {
+  // Note: This calculation must exactly parallel the frame setup
+  // in AbstractInterpreterGenerator::generate_method_entry.
+
+  // fixed size of an interpreter frame:
+  int overhead = frame::sender_sp_offset -
+                 frame::interpreter_frame_initial_sp_offset;
+  // Our locals were accounted for by the caller (or last_frame_adjust
+  // on the transistion) Since the callee parameters already account
+  // for the callee's params we only need to account for the extra
+  // locals.
+  int size = overhead +
+         (callee_locals - callee_params)*Interpreter::stackElementWords +
+         monitors * frame::interpreter_frame_monitor_size() +
+         temps* Interpreter::stackElementWords + extra_args;
+
+  return size;
+}
+
+
+const int Interpreter::return_sentinel = 0xfeedbeed;
+const int method_offset = frame::interpreter_frame_method_offset * wordSize;
+const int bci_offset    = frame::interpreter_frame_bcx_offset    * wordSize;
+const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
+
+//-----------------------------------------------------------------------------
+
+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
+  address entry = __ pc();
+
+#ifdef ASSERT
+  {
+    Label L;
+    __ addi_d(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    __ sub_d(T1, T1, SP); // T1 = maximal sp for current fp
+    __ bge(T1, R0, L);     // check if frame is complete
+    __ stop("interpreter frame not set up");
+    __ bind(L);
+  }
+#endif // ASSERT
+  // Restore bcp under the assumption that the current frame is still
+  // interpreted
+  // FIXME: please change the func restore_bcp
+  // S0 is the conventional register for bcp
+  __ restore_bcp();
+
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  // throw exception
+  // FIXME: why do not pass parameter thread ?
+  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(
+        const char* name) {
+  address entry = __ pc();
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  __ li(A1, (long)name);
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
+  address entry = __ pc();
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  __ empty_FPU_stack();
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_exception_handler_common(
+        const char* name, const char* message, bool pass_oop) {
+  assert(!pass_oop || message == NULL, "either oop or message but not both");
+  address entry = __ pc();
+
+  // expression stack must be empty before entering the VM if an exception happened
+  __ empty_expression_stack();
+  // setup parameters
+  __ li(A1, (long)name);
+  if (pass_oop) {
+    __ call_VM(V0,
+    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
+  } else {
+    __ li(A2, (long)message);
+    __ call_VM(V0,
+    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
+  }
+  // throw exception
+  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
+  return entry;
+}
+
+
+address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
+  address entry = __ pc();
+  // NULL last_sp until next java call
+  __ st_d(R0,Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ dispatch_next(state);
+  return entry;
+}
+
+
+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
+  address entry = __ pc();
+
+  // Restore stack bottom in case i2c adjusted stack
+  __ ld_d(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+  // and NULL it as marker that sp is now tos until next java call
+  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+  __ restore_bcp();
+  __ restore_locals();
+
+  // mdp: T8
+  // ret: FSR
+  // tmp: T4
+  if (state == atos) {
+    Register mdp = T8;
+    Register tmp = T4;
+    __ profile_return_type(mdp, FSR, tmp);
+  }
+
+
+  const Register cache = T4;
+  const Register index = T3;
+  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
+
+  const Register flags = cache;
+  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
+  __ ld_w(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
+  __ alsl_d(SP, flags, SP, Interpreter::stackElementScale() - 1);
+
+  __ dispatch_next(state, step);
+
+  return entry;
+}
+
+
+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
+                                                               int step) {
+  address entry = __ pc();
+  // NULL last_sp until next java call
+  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ restore_bcp();
+  __ restore_locals();
+  // handle exceptions
+  {
+    Label L;
+    const Register thread = TREG;
+#ifndef OPT_THREAD
+    __ get_thread(thread);
+#endif
+    __ ld_d(AT, thread, in_bytes(Thread::pending_exception_offset()));
+    __ beq(AT, R0, L);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
+    __ should_not_reach_here();
+    __ bind(L);
+  }
+  __ dispatch_next(state, step);
+  return entry;
+}
+
+int AbstractInterpreter::BasicType_as_index(BasicType type) {
+  int i = 0;
+  switch (type) {
+    case T_BOOLEAN: i = 0; break;
+    case T_CHAR   : i = 1; break;
+    case T_BYTE   : i = 2; break;
+    case T_SHORT  : i = 3; break;
+    case T_INT    : // fall through
+    case T_LONG   : // fall through
+    case T_VOID   : i = 4; break;
+    case T_FLOAT  : i = 5; break;
+    case T_DOUBLE : i = 6; break;
+    case T_OBJECT : // fall through
+    case T_ARRAY  : i = 7; break;
+    default       : ShouldNotReachHere();
+  }
+  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
+         "index out of bounds");
+  return i;
+}
+
+
+address TemplateInterpreterGenerator::generate_result_handler_for(
+        BasicType type) {
+  address entry = __ pc();
+  switch (type) {
+    case T_BOOLEAN: __ c2bool(V0);                break;
+    case T_CHAR   : __ bstrpick_d(V0, V0, 15, 0); break;
+    case T_BYTE   : __ sign_extend_byte (V0);     break;
+    case T_SHORT  : __ sign_extend_short(V0);     break;
+    case T_INT    : /* nothing to do */           break;
+    case T_FLOAT  : /* nothing to do */           break;
+    case T_DOUBLE : /* nothing to do */           break;
+    case T_OBJECT :
+    {
+      __ ld_d(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+      __ verify_oop(V0);         // and verify it
+    }
+    break;
+    default       : ShouldNotReachHere();
+  }
+  __ jr(RA);                                  // return from result handler
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_safept_entry_for(
+        TosState state,
+        address runtime_entry) {
+  address entry = __ pc();
+  __ push(state);
+  __ call_VM(noreg, runtime_entry);
+  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
+  return entry;
+}
+
+
+
+// Helpers for commoning out cases in the various type of method entries.
+//
+
+
+// increment invocation count & check for overflow
+//
+// Note: checking for negative value instead of overflow
+//       so we have a 'sticky' overflow test
+//
+// Rmethod: method
+// T3     : invocation counter
+//
+void InterpreterGenerator::generate_counter_incr(
+        Label* overflow,
+        Label* profile_method,
+        Label* profile_method_continue) {
+  Label done;
+  if (TieredCompilation) {
+    int increment = InvocationCounter::count_increment;
+    int mask = ((1 << Tier0InvokeNotifyFreqLog)  - 1) << InvocationCounter::count_shift;
+    Label no_mdo;
+    if (ProfileInterpreter) {
+      // Are we profiling?
+      __ ld_d(FSR, Address(Rmethod, Method::method_data_offset()));
+      __ beq(FSR, R0, no_mdo);
+      // Increment counter in the MDO
+      const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) +
+                                                in_bytes(InvocationCounter::counter_offset()));
+      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
+      __ beq(R0, R0, done);
+    }
+    __ bind(no_mdo);
+    // Increment counter in MethodCounters
+    const Address invocation_counter(FSR,
+                  MethodCounters::invocation_counter_offset() +
+                  InvocationCounter::counter_offset());
+    __ get_method_counters(Rmethod, FSR, done);
+    __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
+    __ bind(done);
+  } else {
+    const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
+        + in_bytes(InvocationCounter::counter_offset()));
+    const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
+        + in_bytes(InvocationCounter::counter_offset()));
+
+    __ get_method_counters(Rmethod, FSR, done);
+
+    if (ProfileInterpreter) { // %%% Merge this into methodDataOop
+      __ ld_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
+      __ addi_d(T4, T4, 1);
+      __ st_w(T4, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
+    }
+    // Update standard invocation counters
+    __ ld_w(T3, invocation_counter);
+    __ increment(T3, InvocationCounter::count_increment);
+    __ st_w(T3, invocation_counter);  // save invocation count
+
+    __ ld_w(FSR, backedge_counter);  // load backedge counter
+    __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
+    __ andr(FSR, FSR, AT);
+
+    __ add_d(T3, T3, FSR);          // add both counters
+
+    if (ProfileInterpreter && profile_method != NULL) {
+      // Test to see if we should create a method data oop
+      if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) {
+        __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
+        __ bne_far(AT, R0, *profile_method_continue);
+      } else {
+        __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
+        __ ld_w(AT, AT, 0);
+        __ blt_far(T3, AT, *profile_method_continue, true /* signed */);
+      }
+
+      // if no method data exists, go to profile_method
+      __ test_method_data_pointer(FSR, *profile_method);
+    }
+
+    if (Assembler::is_simm(CompileThreshold, 12)) {
+      __ srli_w(AT, T3, InvocationCounter::count_shift);
+      __ slti(AT, AT, CompileThreshold);
+      __ beq_far(AT, R0, *overflow);
+    } else {
+      __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
+      __ ld_w(AT, AT, 0);
+      __ bge_far(T3, AT, *overflow, true /* signed */);
+    }
+
+    __ bind(done);
+  }
+}
+
+void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
+
+  // Asm interpreter on entry
+  // S7 - locals
+  // S0 - bcp
+  // Rmethod - method
+  // FP - interpreter frame
+
+  // On return (i.e. jump to entry_point)
+  // Rmethod - method
+  // RA - return address of interpreter caller
+  // tos - the last parameter to Java method
+  // SP - sender_sp
+
+  // the bcp is valid if and only if it's not null
+  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+      InterpreterRuntime::frequency_counter_overflow), R0);
+  __ ld_d(Rmethod, FP, method_offset);
+  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
+  __ b_far(*do_continue);
+}
+
+// See if we've got enough room on the stack for locals plus overhead.
+// The expression stack grows down incrementally, so the normal guard
+// page mechanism will work for that.
+//
+// NOTE: Since the additional locals are also always pushed (wasn't
+// obvious in generate_method_entry) so the guard should work for them
+// too.
+//
+// Args:
+//      T2: number of additional locals this frame needs (what we must check)
+//      T0: Method*
+//
+void InterpreterGenerator::generate_stack_overflow_check(void) {
+  // see if we've got enough room on the stack for locals plus overhead.
+  // the expression stack grows down incrementally, so the normal guard
+  // page mechanism will work for that.
+  //
+  // Registers live on entry:
+  //
+  // T0: Method*
+  // T2: number of additional locals this frame needs (what we must check)
+
+  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
+  // generate_method_entry) so the guard should work for them too.
+  //
+
+  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
+
+  // total overhead size: entry_size + (saved fp thru expr stack bottom).
+  // be sure to change this if you add/subtract anything to/from the overhead area
+  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
+    + entry_size;
+
+  const int page_size = os::vm_page_size();
+  Label after_frame_check;
+
+  // see if the frame is greater than one page in size. If so,
+  // then we need to verify there is enough stack space remaining
+  // for the additional locals.
+  __ li(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
+  __ bge(AT, T2, after_frame_check);
+
+  // compute sp as if this were going to be the last frame on
+  // the stack before the red zone
+#ifndef OPT_THREAD
+  Register thread = T1;
+  __ get_thread(thread);
+#else
+  Register thread = TREG;
+#endif
+
+  // locals + overhead, in bytes
+  __ slli_d(T3, T2, Interpreter::stackElementScale());
+  __ addi_d(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
+
+#ifdef ASSERT
+  Label stack_base_okay, stack_size_okay;
+  // verify that thread stack base is non-zero
+  __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset()));
+  __ bne(AT, R0, stack_base_okay);
+  __ stop("stack base is zero");
+  __ bind(stack_base_okay);
+  // verify that thread stack size is non-zero
+  __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset()));
+  __ bne(AT, R0, stack_size_okay);
+  __ stop("stack size is zero");
+  __ bind(stack_size_okay);
+#endif
+
+  // Add stack base to locals and subtract stack size
+  __ ld_d(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
+  __ add_d(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
+  __ ld_d(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
+  __ sub_d(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
+
+
+  // add in the redzone and yellow size
+  __ li(AT, (StackRedPages+StackYellowPages) * page_size);
+  __ add_d(T3, T3, AT);
+
+  // check against the current stack bottom
+  __ blt(T3, SP, after_frame_check);
+
+  // Note: the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  __ move(SP, Rsender);
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
+
+  // all done with frame size check
+  __ bind(after_frame_check);
+}
+
+// Allocate monitor and lock method (asm interpreter)
+// Rmethod - Method*
+void InterpreterGenerator::lock_method(void) {
+  // synchronize method
+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+#ifdef ASSERT
+  { Label L;
+    __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
+    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
+    __ bne(T0, R0, L);
+    __ stop("method doesn't need synchronization");
+    __ bind(L);
+  }
+#endif // ASSERT
+  // get synchronization object
+  {
+    Label done;
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+    __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
+    __ andi(T2, T0, JVM_ACC_STATIC);
+    __ ld_d(T0, LVP, Interpreter::local_offset_in_bytes(0));
+    __ beq(T2, R0, done);
+    __ ld_d(T0, Rmethod, in_bytes(Method::const_offset()));
+    __ ld_d(T0, T0, in_bytes(ConstMethod::constants_offset()));
+    __ ld_d(T0, T0, ConstantPool::pool_holder_offset_in_bytes());
+    __ ld_d(T0, T0, mirror_offset);
+    __ bind(done);
+  }
+  // add space for monitor & lock
+  __ addi_d(SP, SP, (-1) * entry_size);           // add space for a monitor entry
+  __ st_d(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+  // set new monitor block top
+  __ st_d(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
+  // FIXME: I do not know what lock_object will do and what it will need
+  __ move(c_rarg0, SP);      // object address
+  __ lock_object(c_rarg0);
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.update(int crc, int b)
+ */
+address InterpreterGenerator::generate_CRC32_update_entry() {
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    // rmethod: Method*
+    // Rsender: senderSP must preserved for slow path
+    // SP: args
+
+    Label slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    __ li(AT, SafepointSynchronize::_not_synchronized);
+    __ li(T8, (long)SafepointSynchronize::address_of_state());
+    __ bne(T8, AT, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    const Register crc = A0;  // crc
+    const Register val = A1;  // source java byte value
+    const Register tbl = A2;  // scratch
+
+    // Arguments are reversed on java expression stack
+    __ ld_w(val, SP, 0);              // byte value
+    __ ld_w(crc, SP, wordSize);       // Initial CRC
+
+    __ li(tbl, (long)StubRoutines::crc_table_addr());
+
+    __ nor(crc, crc, R0); // ~crc
+    __ update_byte_crc32(crc, val, tbl);
+    __ nor(crc, crc, R0); // ~crc
+
+    // restore caller SP
+    __ move(SP, Rsender);
+    __ jr(RA);
+
+    // generate a vanilla native entry as the slow path
+    __ bind(slow_path);
+
+    (void) generate_native_entry(false);
+
+    return entry;
+  }
+  return generate_native_entry(false);
+}
+
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
+ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+ */
+address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  if (UseCRC32Intrinsics) {
+    address entry = __ pc();
+
+    // rmethod: Method*
+    // Rsender: senderSP must preserved for slow path
+    // SP: args
+
+    Label slow_path;
+    // If we need a safepoint check, generate full interpreter entry.
+    __ li(AT, SafepointSynchronize::_not_synchronized);
+    __ li(T8, (long)SafepointSynchronize::address_of_state());
+    __ bne(T8, AT, slow_path);
+
+    // We don't generate local frame and don't align stack because
+    // we call stub code and there is no safepoint on this path.
+
+    const Register crc = A0;  // crc
+    const Register buf = A1;  // source java byte array address
+    const Register len = A2;  // length
+    const Register tmp = A3;
+
+    const Register off = len; // offset (never overlaps with 'len')
+
+    // Arguments are reversed on java expression stack
+    // Calculate address of start element
+    __ ld_w(off, SP, wordSize);       // int offset
+    __ ld_d(buf, SP, 2 * wordSize);   // byte[] buf | long buf
+    __ add_d(buf, buf, off);          // + offset
+    if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
+      __ ld_w(crc, SP, 4 * wordSize); // long crc
+    } else {
+      __ addi_d(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size
+      __ ld_w(crc, SP, 3 * wordSize); // long crc
+    }
+
+    // Can now load 'len' since we're finished with 'off'
+    __ ld_w(len, SP, 0); // length
+
+    __ kernel_crc32(crc, buf, len, tmp);
+
+    // restore caller SP
+    __ move(SP, Rsender);
+    __ jr(RA);
+
+    // generate a vanilla native entry as the slow path
+    __ bind(slow_path);
+
+    (void) generate_native_entry(false);
+
+    return entry;
+  }
+  return generate_native_entry(false);
+}
+
+// Generate a fixed interpreter frame. This is identical setup for
+// interpreted methods and for native methods hence the shared code.
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+
+  // [ local var m-1      ] <--- sp
+  //   ...
+  // [ local var 0        ]
+  // [ argumnet word n-1  ] <--- T0(sender's sp)
+  //   ...
+  // [ argument word 0    ] <--- S7
+
+  // initialize fixed part of activation frame
+  // sender's sp in Rsender
+  int i = 0;
+  int frame_size = 9;
+#ifndef CORE
+  ++frame_size;
+#endif
+  __ addi_d(SP, SP, (-frame_size) * wordSize);
+  __ st_d(RA, SP, (frame_size - 1) * wordSize);   // save return address
+  __ st_d(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
+  __ addi_d(FP, SP, (frame_size - 2) * wordSize);
+  __ st_d(Rsender, FP, (-++i) * wordSize);  // save sender's sp
+  __ st_d(R0, FP,(-++i) * wordSize);       //save last_sp as null
+  __ st_d(LVP, FP, (-++i) * wordSize);  // save locals offset
+  __ ld_d(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
+  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
+  __ st_d(Rmethod, FP, (-++i) * wordSize);                              // save Method*
+#ifndef CORE
+  if (ProfileInterpreter) {
+    Label method_data_continue;
+    __ ld_d(AT, Rmethod,  in_bytes(Method::method_data_offset()));
+    __ beq(AT, R0, method_data_continue);
+    __ addi_d(AT, AT, in_bytes(MethodData::data_offset()));
+    __ bind(method_data_continue);
+    __ st_d(AT, FP,  (-++i) * wordSize);
+  } else {
+    __ st_d(R0, FP, (-++i) * wordSize);
+  }
+#endif // !CORE
+
+  __ ld_d(T2, Rmethod, in_bytes(Method::const_offset()));
+  __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset()));
+  __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes());
+  __ st_d(T2, FP, (-++i) * wordSize);                    // set constant pool cache
+  if (native_call) {
+    __ st_d(R0, FP, (-++i) * wordSize);          // no bcp
+  } else {
+    __ st_d(BCP, FP, (-++i) * wordSize);          // set bcp
+  }
+  __ st_d(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
+  assert(i + 2 == frame_size, "i + 2 should be equal to frame_size");
+}
+
+// End of helpers
+
+// Various method entries
+//------------------------------------------------------------------------------------------------------------------------
+//
+//
+
+// Call an accessor method (assuming it is resolved, otherwise drop
+// into vanilla (slow path) entry
+address InterpreterGenerator::generate_accessor_entry(void) {
+  // Rmethod: Method*
+  // V0: receiver (preserve for slow entry into asm interpreter)
+  //  Rsender: senderSP must preserved for slow path, set SP to it on fast path
+
+  address entry_point = __ pc();
+  Label xreturn_path;
+  // do fastpath for resolved accessor methods
+  if (UseFastAccessorMethods) {
+    Label slow_path;
+    __ li(T2, SafepointSynchronize::address_of_state());
+    __ ld_w(AT, T2, 0);
+    __ addi_d(AT, AT, -(SafepointSynchronize::_not_synchronized));
+    __ bne(AT, R0, slow_path);
+    // Code: _aload_0, _(i|a)getfield, _(i|a)return or any rewrites thereof;
+    // parameter size = 1
+    // Note: We can only use this code if the getfield has been resolved
+    //       and if we don't have a null-pointer exception => check for
+    //       these conditions first and use slow path if necessary.
+    // Rmethod: method
+    // V0: receiver
+
+    // [ receiver  ] <-- sp
+    __ ld_d(T0, SP, 0);
+
+    // check if local 0 != NULL and read field
+    __ beq(T0, R0, slow_path);
+    __ ld_d(T2, Rmethod, in_bytes(Method::const_offset()));
+    __ ld_d(T2, T2, in_bytes(ConstMethod::constants_offset()));
+    // read first instruction word and extract bytecode @ 1 and index @ 2
+    __ ld_d(T3, Rmethod, in_bytes(Method::const_offset()));
+    __ ld_w(T3, T3, in_bytes(ConstMethod::codes_offset()));
+    // Shift codes right to get the index on the right.
+    // The bytecode fetched looks like <index><0xb4><0x2a>
+    __ srli_d(T3, T3, 2 * BitsPerByte);
+    // FIXME: maybe it's wrong
+    __ slli_d(T3, T3, exact_log2(in_words(ConstantPoolCacheEntry::size())));
+    __ ld_d(T2, T2, ConstantPool::cache_offset_in_bytes());
+
+    // T0: local 0
+    // Rmethod: method
+    // V0: receiver - do not destroy since it is needed for slow path!
+    // T1: scratch use which register instead ?
+    // T3: constant pool cache index
+    // T2: constant pool cache
+    // Rsender: send's sp
+    // check if getfield has been resolved and read constant pool cache entry
+    // check the validity of the cache entry by testing whether _indices field
+    // contains Bytecode::_getfield in b1 byte.
+    assert(in_words(ConstantPoolCacheEntry::size()) == 4, "adjust shift below");
+
+    __ slli_d(T8, T3, Address::times_8);
+    __ li(T1, in_bytes(ConstantPoolCache::base_offset()
+    + ConstantPoolCacheEntry::indices_offset()));
+    __ add_d(T1, T8, T1);
+    __ ldx_w(T1, T1, T2);
+    __ srli_d(T1, T1, 2 * BitsPerByte);
+    __ andi(T1, T1, 0xFF);
+    __ addi_d(T1, T1, (-1) * Bytecodes::_getfield);
+    __ bne(T1, R0, slow_path);
+
+    // Note: constant pool entry is not valid before bytecode is resolved
+
+    __ li(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+    __ add_d(T1, T1, T8);
+    __ ldx_w(AT, T1, T2);
+
+    __ li(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+    __ add_d(T1, T1, T8);
+    __ ldx_w(T3, T1, T2);
+
+    Label notByte, notBool, notShort, notChar, notObj;
+
+    // Need to differentiate between igetfield, agetfield, bgetfield etc.
+    // because they are different sizes.
+    // Use the type from the constant pool cache
+    __ srli_w(T3, T3, ConstantPoolCacheEntry::tos_state_shift);
+    // Make sure we don't need to mask T3 for tosBits after the above shift
+    ConstantPoolCacheEntry::verify_tos_state_shift();
+    // btos = 0
+    __ add_d(T0, T0, AT);
+    __ bne(T3, R0, notByte);
+
+    __ ld_b(V0, T0, 0);
+    __ b(xreturn_path);
+
+    //ztos
+    __ bind(notByte);
+    __ addi_d(T1, T3, (-1) * ztos);
+    __ bne(T1, R0, notBool);
+    __ ld_b(V0, T0, 0);
+    __ b(xreturn_path);
+
+    //stos
+    __ bind(notBool);
+    __ addi_d(T1, T3, (-1) * stos);
+    __ bne(T1, R0, notShort);
+    __ ld_h(V0, T0, 0);
+    __ b(xreturn_path);
+
+    //ctos
+    __ bind(notShort);
+    __ addi_d(T1, T3, (-1) * ctos);
+    __ bne(T1, R0, notChar);
+    __ ld_hu(V0, T0, 0);
+    __ b(xreturn_path);
+
+    //atos
+    __ bind(notChar);
+    __ addi_d(T1, T3, (-1) * atos);
+    __ bne(T1, R0, notObj);
+    //add for compressedoops
+    __ load_heap_oop(V0, Address(T0, 0));
+    __ b(xreturn_path);
+
+    //itos
+    __ bind(notObj);
+#ifdef ASSERT
+    Label okay;
+    __ addi_d(T1, T3, (-1) * itos);
+    __ beq(T1, R0, okay);
+    __ stop("what type is this?");
+    __ bind(okay);
+#endif // ASSERT
+    __ ld_w(V0, T0, 0);
+
+    __ bind(xreturn_path);
+
+    // _ireturn/_areturn
+    //FIXME
+    __ move(SP, Rsender);//FIXME, set sender's fp to SP
+    __ jr(RA);
+
+    // generate a vanilla interpreter entry as the slow path
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+  } else {
+    (void) generate_normal_entry(false);
+  }
+  return entry_point;
+}
+
+// Method entry for java.lang.ref.Reference.get.
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#if INCLUDE_ALL_GCS
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    2. The slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:-
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_enty.
+  //
+  // Rmethod: Method*
+
+  // Rsender: senderSP must preserve for slow path, set SP to it on fast path (Rsender)
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+  if (UseG1GC) {
+    Label slow_path;
+
+    // Check if local 0 != NULL
+    // If the receiver is null then it is OK to jump to the slow path.
+    __ ld_d(V0, SP, 0);
+
+    __ beq(V0, R0, slow_path);
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+
+    // Load the value of the referent field.
+    const Address field_address(V0, referent_offset);
+    __ load_heap_oop(V0, field_address);
+
+    __ push(RA);
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    __ g1_write_barrier_pre(noreg /* obj */,
+                            V0 /* pre_val */,
+                            TREG /* thread */,
+                            Rmethod /* tmp */,
+                            true /* tosca_live */,
+                            true /* expand_call */);
+    __ pop(RA);
+
+    __ add_d(SP, Rsender, R0);      // set sp to sender sp
+    __ jr(RA);
+
+    // generate a vanilla interpreter entry as the slow path
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+
+    return entry;
+  }
+#endif // INCLUDE_ALL_GCS
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
+// Interpreter stub for calling a native method. (asm interpreter)
+// This sets up a somewhat different looking stack for calling the
+// native method than the typical interpreter frame setup.
+address InterpreterGenerator::generate_native_entry(bool synchronized) {
+  // determine code generation flags
+  bool inc_counter  = UseCompiler || CountCompiledCalls;
+  // Rsender: sender's sp
+  // Rmethod: Method*
+  address entry_point = __ pc();
+
+#ifndef CORE
+  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
+  InvocationCounter::counter_offset()));
+#endif
+  // get parameter size (always needed)
+  // the size in the java stack
+  __ ld_d(V0, Rmethod, in_bytes(Method::const_offset()));
+  __ ld_hu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
+
+  // native calls don't need the stack size check since they have no expression stack
+  // and the arguments are already on the stack and we only add a handful of words
+  // to the stack
+
+  // Rmethod: Method*
+  // V0: size of parameters
+  // Layout of frame at this point
+  //
+  // [ argument word n-1  ] <--- sp
+  //   ...
+  // [ argument word 0    ]
+
+  // for natives the size of locals is zero
+
+  // compute beginning of parameters (S7)
+  __ slli_d(LVP, V0, Address::times_8);
+  __ addi_d(LVP, LVP, (-1) * wordSize);
+  __ add_d(LVP, LVP, SP);
+
+
+  // add 2 zero-initialized slots for native calls
+  // 1 slot for native oop temp offset (setup via runtime)
+  // 1 slot for static native result handler3 (setup via runtime)
+  __ push2(R0, R0);
+
+  // Layout of frame at this point
+  // [ method holder mirror  ] <--- sp
+  // [ result type info      ]
+  // [ argument word n-1     ] <--- T0
+  //   ...
+  // [ argument word 0       ] <--- LVP
+
+
+#ifndef CORE
+  if (inc_counter) __ ld_w(T3, invocation_counter);  // (pre-)fetch invocation count
+#endif
+
+  // initialize fixed part of activation frame
+  generate_fixed_frame(true);
+  // after this function, the layout of frame is as following
+  //
+  // [ monitor block top        ] <--- sp ( the top monitor entry )
+  // [ byte code pointer (0)    ] (if native, bcp = 0)
+  // [ constant pool cache      ]
+  // [ Method*                  ]
+  // [ locals offset            ]
+  // [ sender's sp              ]
+  // [ sender's fp              ]
+  // [ return address           ] <--- fp
+  // [ method holder mirror     ]
+  // [ result type info         ]
+  // [ argumnet word n-1        ] <--- sender's sp
+  //   ...
+  // [ argument word 0          ] <--- S7
+
+
+  // make sure method is native & not abstract
+#ifdef ASSERT
+  __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
+  {
+    Label L;
+    __ andi(AT, T0, JVM_ACC_NATIVE);
+    __ bne(AT, R0, L);
+    __ stop("tried to execute native method as non-native");
+    __ bind(L);
+  }
+  {
+    Label L;
+    __ andi(AT, T0, JVM_ACC_ABSTRACT);
+    __ beq(AT, R0, L);
+    __ stop("tried to execute abstract method in interpreter");
+    __ bind(L);
+  }
+#endif
+
+  // Since at this point in the method invocation the exception handler
+  // would try to exit the monitor of synchronized methods which hasn't
+  // been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. The remove_activation will
+  // check this flag.
+  Register thread = TREG;
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ li(AT, (int)true);
+  __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+#ifndef CORE
+  // increment invocation count & check for overflow
+  Label invocation_counter_overflow;
+  if (inc_counter) {
+    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
+  }
+
+  Label continue_after_compile;
+  __ bind(continue_after_compile);
+#endif // CORE
+
+  bang_stack_shadow_pages(true);
+
+  // reset the _do_not_unlock_if_synchronized flag
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+  // check for synchronized methods
+  // Must happen AFTER invocation_counter check and stack overflow check,
+  // so method is not locked if overflows.
+  if (synchronized) {
+    lock_method();
+  } else {
+    // no synchronization necessary
+#ifdef ASSERT
+    {
+      Label L;
+      __ ld_w(T0, Rmethod, in_bytes(Method::access_flags_offset()));
+      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
+      __ beq(AT, R0, L);
+      __ stop("method needs synchronization");
+      __ bind(L);
+    }
+#endif
+  }
+
+  // after method_lock, the layout of frame is as following
+  //
+  // [ monitor entry            ] <--- sp
+  //   ...
+  // [ monitor entry            ]
+  // [ monitor block top        ] ( the top monitor entry )
+  // [ byte code pointer (0)    ] (if native, bcp = 0)
+  // [ constant pool cache      ]
+  // [ Method*                  ]
+  // [ locals offset            ]
+  // [ sender's sp              ]
+  // [ sender's fp              ]
+  // [ return address           ] <--- fp
+  // [ method holder mirror     ]
+  // [ result type info         ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- S7
+
+  // start execution
+#ifdef ASSERT
+  {
+    Label L;
+    __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    __ beq(AT, SP, L);
+    __ stop("broken stack frame setup in interpreter in asm");
+    __ bind(L);
+  }
+#endif
+
+  // jvmti/jvmpi support
+  __ notify_method_entry();
+
+  // work registers
+  const Register method = Rmethod;
+  //const Register thread = T2;
+  const Register t      = T8;
+
+  __ get_method(method);
+  __ verify_oop(method);
+  {
+    Label L, Lstatic;
+    __ ld_d(t,method,in_bytes(Method::const_offset()));
+    __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
+    // LoongArch ABI: caller does not reserve space for the register auguments.
+    // A0 and A1(if needed)
+    __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
+    __ andi(AT, AT, JVM_ACC_STATIC);
+    __ beq(AT, R0, Lstatic);
+    __ addi_d(t, t, 1);
+    __ bind(Lstatic);
+    __ addi_d(t, t, -7);
+    __ bge(R0, t, L);
+    __ slli_d(t, t, Address::times_8);
+    __ sub_d(SP, SP, t);
+    __ bind(L);
+  }
+  __ li(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);
+  __ move(AT, SP);
+  // [                          ] <--- sp
+  //   ...                        (size of parameters - 8 )
+  // [ monitor entry            ]
+  //   ...
+  // [ monitor entry            ]
+  // [ monitor block top        ] ( the top monitor entry )
+  // [ byte code pointer (0)    ] (if native, bcp = 0)
+  // [ constant pool cache      ]
+  // [ Method*                  ]
+  // [ locals offset            ]
+  // [ sender's sp              ]
+  // [ sender's fp              ]
+  // [ return address           ] <--- fp
+  // [ method holder mirror     ]
+  // [ result type info         ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- LVP
+
+  // get signature handler
+  {
+    Label L;
+    __ ld_d(T4, method, in_bytes(Method::signature_handler_offset()));
+    __ bne(T4, R0, L);
+    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+               InterpreterRuntime::prepare_native_call), method);
+    __ get_method(method);
+    __ ld_d(T4, method, in_bytes(Method::signature_handler_offset()));
+    __ bind(L);
+  }
+
+  // call signature handler
+  // FIXME: when change codes in InterpreterRuntime, note this point
+  // from: begin of parameters
+  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
+  // to: current sp
+  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
+  // temp: T3
+  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
+
+  __ jalr(T4);
+  __ get_method(method);
+
+  //
+  // if native function is static, and its second parameter has type length of double word,
+  // and first parameter has type length of word, we have to reserve one word
+  // for the first parameter, according to LoongArch abi.
+  // if native function is not static, and its third parameter has type length of double word,
+  // and second parameter has type length of word, we have to reserve one word for the second
+  // parameter.
+  //
+
+
+  // result handler is in V0
+  // set result handler
+  __ st_d(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
+
+#define FIRSTPARA_SHIFT_COUNT 5
+#define SECONDPARA_SHIFT_COUNT 9
+#define THIRDPARA_SHIFT_COUNT 13
+#define PARA_MASK  0xf
+
+  // pass mirror handle if static call
+  {
+    Label L;
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+    __ ld_w(t, method, in_bytes(Method::access_flags_offset()));
+    __ andi(AT, t, JVM_ACC_STATIC);
+    __ beq(AT, R0, L);
+
+    // get mirror
+    __ ld_d(t, method, in_bytes(Method:: const_offset()));
+    __ ld_d(t, t, in_bytes(ConstMethod::constants_offset())); //??
+    __ ld_d(t, t, ConstantPool::pool_holder_offset_in_bytes());
+    __ ld_d(t, t, mirror_offset);
+    // copy mirror into activation frame
+    //__ st_w(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+    // pass handle to mirror
+    __ st_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+    __ addi_d(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+    __ move(A1, t);
+    __ bind(L);
+  }
+
+  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
+  // [                          ]                              |
+  //   ...                        size of parameters(or +1)    |
+  // [ monitor entry            ]                              |
+  //   ...                                                     |
+  // [ monitor entry            ]                              |
+  // [ monitor block top        ] ( the top monitor entry )    |
+  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
+  // [ constant pool cache      ]                              |
+  // [ Method*                  ]                              |
+  // [ locals offset            ]                              |
+  // [ sender's sp              ]                              |
+  // [ sender's fp              ]                              |
+  // [ return address           ] <--- fp                      |
+  // [ method holder mirror     ] <----------------------------|
+  // [ result type info         ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- S7
+
+  // get native function entry point
+  { Label L;
+    __ ld_d(T4, method, in_bytes(Method::native_function_offset()));
+    __ li(T6, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
+    __ bne(T6, T4, L);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
+    __ get_method(method);
+    __ verify_oop(method);
+    __ ld_d(T4, method, in_bytes(Method::native_function_offset()));
+    __ bind(L);
+  }
+
+  // pass JNIEnv
+  // native function in T4
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ addi_d(t, thread, in_bytes(JavaThread::jni_environment_offset()));
+  __ move(A0, t);
+  // [ jni environment          ] <--- sp
+  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
+  // [                          ]                              |
+  //   ...                        size of parameters           |
+  // [ monitor entry            ]                              |
+  //   ...                                                     |
+  // [ monitor entry            ]                              |
+  // [ monitor block top        ] ( the top monitor entry )    |
+  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
+  // [ constant pool cache      ]                              |
+  // [ Method*                  ]                              |
+  // [ locals offset            ]                              |
+  // [ sender's sp              ]                              |
+  // [ sender's fp              ]                              |
+  // [ return address           ] <--- fp                      |
+  // [ method holder mirror     ] <----------------------------|
+  // [ result type info         ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- S7
+
+  // Set the last Java PC in the frame anchor to be the return address from
+  // the call to the native method: this will allow the debugger to
+  // generate an accurate stack trace.
+  Label native_return;
+  __ set_last_Java_frame(thread, SP, FP, native_return);
+
+  // change thread state
+#ifdef ASSERT
+  {
+    Label L;
+    __ ld_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
+    __ addi_d(t, t, (-1) * _thread_in_Java);
+    __ beq(t, R0, L);
+    __ stop("Wrong thread state in native stub");
+    __ bind(L);
+  }
+#endif
+
+  __ li(t, _thread_in_native);
+  if (os::is_MP()) {
+    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
+  }
+  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
+
+  // call native method
+  __ jalr(T4);
+  __ bind(native_return);
+  // result potentially in V0 or F0
+
+
+  // via _last_native_pc and not via _last_jave_sp
+  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
+  //  If the order changes or anything else is added to the stack the code in
+  // interpreter_frame_result will have to be changed.
+  //FIXME, should modify here
+  // save return value to keep the value from being destroyed by other calls
+  __ push(dtos);
+  __ push(ltos);
+
+  // change thread state
+  __ get_thread(thread);
+  __ li(t, _thread_in_native_trans);
+  if (os::is_MP()) {
+    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
+  }
+  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
+
+  if(os::is_MP()) {
+    if (UseMembar) {
+      // Force this write out before the read below
+      __ membar(__ AnyAny);
+    } else {
+      // Write serialization page so VM thread can do a pseudo remote membar.
+      // We use the current thread pointer to calculate a thread specific
+      // offset to write to within the page. This minimizes bus traffic
+      // due to cache line collision.
+      __ serialize_memory(thread, A0);
+    }
+  }
+
+  // check for safepoint operation in progress and/or pending suspend requests
+  { Label Continue;
+
+    // Don't use call_VM as it will see a possible pending exception and forward it
+    // and never return here preventing us from clearing _last_native_pc down below.
+    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
+    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
+    // by hand.
+    //
+    Label L;
+    __ li(AT, SafepointSynchronize::address_of_state());
+    __ ld_w(AT, AT, 0);
+    __ bne(AT, R0, L);
+    __ ld_w(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
+    __ beq(AT, R0, Continue);
+    __ bind(L);
+    __ move(A0, thread);
+    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
+                             relocInfo::runtime_call_type);
+
+#ifndef OPT_THREAD
+    __ get_thread(thread);
+#endif
+    //add for compressedoops
+    __ reinit_heapbase();
+    __ bind(Continue);
+  }
+
+  // change thread state
+  __ li(t, _thread_in_Java);
+  if (os::is_MP()) {
+    __ membar(Assembler::Membar_mask_bits(__ LoadStore|__ StoreStore)); // store release
+  }
+  __ st_w(t, thread, in_bytes(JavaThread::thread_state_offset()));
+  __ reset_last_Java_frame(thread, true);
+
+  // reset handle block
+  __ ld_d(t, thread, in_bytes(JavaThread::active_handles_offset()));
+  __ st_w(R0, t, JNIHandleBlock::top_offset_in_bytes());
+
+  // If result was an oop then unbox and save it in the frame
+  {
+    Label no_oop;
+    //FIXME, addi only support 12-bit imeditate
+    __ ld_d(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
+    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
+    __ bne(AT, T0, no_oop);
+    __ pop(ltos);
+    // Unbox oop result, e.g. JNIHandles::resolve value.
+    __ resolve_jobject(V0, thread, T4);
+    __ st_d(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
+    // keep stack depth as expected by pushing oop which will eventually be discarded
+    __ push(ltos);
+    __ bind(no_oop);
+  }
+  {
+    Label no_reguard;
+    __ ld_w(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
+    __ li(AT,(int) JavaThread::stack_guard_yellow_disabled);
+    __ bne(t, AT, no_reguard);
+    __ pushad();
+    __ move(S5_heapbase, SP);
+    __ li(AT, -StackAlignmentInBytes);
+    __ andr(SP, SP, AT);
+    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
+    __ move(SP, S5_heapbase);
+    __ popad();
+    //add for compressedoops
+    __ reinit_heapbase();
+    __ bind(no_reguard);
+  }
+  // restore BCP to have legal interpreter frame,
+  // i.e., bci == 0 <=> BCP == code_base()
+  // Can't call_VM until bcp is within reasonable.
+  __ get_method(method);      // method is junk from thread_in_native to now.
+  __ verify_oop(method);
+  __ ld_d(BCP, method, in_bytes(Method::const_offset()));
+  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
+  // handle exceptions (exception handling will handle unlocking!)
+  {
+    Label L;
+    __ ld_d(t, thread, in_bytes(Thread::pending_exception_offset()));
+    __ beq(t, R0, L);
+    // Note: At some point we may want to unify this with the code used in
+    // call_VM_base();
+    // i.e., we should use the StubRoutines::forward_exception code. For now this
+    // doesn't work here because the sp is not correctly set at this point.
+    __ MacroAssembler::call_VM(noreg,
+                               CAST_FROM_FN_PTR(address,
+                               InterpreterRuntime::throw_pending_exception));
+    __ should_not_reach_here();
+    __ bind(L);
+  }
+
+  // do unlocking if necessary
+  {
+    Label L;
+    __ ld_w(t, method, in_bytes(Method::access_flags_offset()));
+    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
+    __ addi_d(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
+    __ beq(t, R0, L);
+    // the code below should be shared with interpreter macro assembler implementation
+    {
+      Label unlock;
+      // BasicObjectLock will be first in list,
+      // since this is a synchronized method. However, need
+      // to check that the object has not been unlocked by
+      // an explicit monitorexit bytecode.
+      // address of first monitor
+
+      __ ld_d(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
+      __ bne(t, R0, unlock);
+
+      // Entry already unlocked, need to throw exception
+      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
+      InterpreterRuntime::throw_illegal_monitor_state_exception));
+      __ should_not_reach_here();
+
+      __ bind(unlock);
+      __ unlock_object(c_rarg0);
+    }
+    __ bind(L);
+  }
+
+  // jvmti/jvmpi support
+  // Note: This must happen _after_ handling/throwing any exceptions since
+  //       the exception handler code notifies the runtime of method exits
+  //       too. If this happens before, method entry/exit notifications are
+  //       not properly paired (was bug - gri 11/22/99).
+  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
+
+  // restore potential result in V0,
+  // call result handler to restore potential result in ST0 & handle result
+
+  __ pop(ltos);
+  __ pop(dtos);
+
+  __ ld_d(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
+  __ jalr(t);
+
+
+  // remove activation
+  __ ld_d(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
+  __ ld_d(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address
+  __ ld_d(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
+  __ jr(RA);
+
+#ifndef CORE
+  if (inc_counter) {
+    // Handle overflow of counter and compile method
+    __ bind(invocation_counter_overflow);
+    generate_counter_overflow(&continue_after_compile);
+    // entry_point is the beginning of this
+    // function and checks again for compiled code
+  }
+#endif
+  return entry_point;
+}
+
+//
+// Generic interpreted method entry to (asm) interpreter
+//
+// Layout of frame just at the entry
+//
+//   [ argument word n-1  ] <--- sp
+//     ...
+//   [ argument word 0    ]
+// assume Method* in Rmethod before call this method.
+// prerequisites to the generated stub : the callee Method* in Rmethod
+// note you must save the caller bcp before call the generated stub
+//
+address InterpreterGenerator::generate_normal_entry(bool synchronized) {
+  // determine code generation flags
+  bool inc_counter  = UseCompiler || CountCompiledCalls;
+
+  // Rmethod: Method*
+  // Rsender: sender 's sp
+  address entry_point = __ pc();
+
+  const Address invocation_counter(Rmethod,
+      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
+
+  // get parameter size (always needed)
+  __ ld_d(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
+  __ ld_hu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
+
+  // Rmethod: Method*
+  // V0: size of parameters
+  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
+  // get size of locals in words to T2
+  __ ld_hu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
+  // T2 = no. of additional locals, locals include parameters
+  __ sub_d(T2, T2, V0);
+
+  // see if we've got enough room on the stack for locals plus overhead.
+  // Layout of frame at this point
+  //
+  // [ argument word n-1  ] <--- sp
+  //   ...
+  // [ argument word 0    ]
+  generate_stack_overflow_check();
+  // after this function, the layout of frame does not change
+
+  // compute beginning of parameters (LVP)
+  __ slli_d(LVP, V0, LogBytesPerWord);
+  __ addi_d(LVP, LVP, (-1) * wordSize);
+  __ add_d(LVP, LVP, SP);
+
+  // T2 - # of additional locals
+  // allocate space for locals
+  // explicitly initialize locals
+  {
+    Label exit, loop;
+    __ beq(T2, R0, exit);
+
+    __ bind(loop);
+    __ addi_d(SP, SP, (-1) * wordSize);
+    __ addi_d(T2, T2, -1);               // until everything initialized
+    __ st_d(R0, SP, 0);                  // initialize local variables
+    __ bne(T2, R0, loop);
+
+    __ bind(exit);
+  }
+
+  //
+  // [ local var m-1      ] <--- sp
+  //   ...
+  // [ local var 0        ]
+  // [ argument word n-1  ] <--- T0?
+  //   ...
+  // [ argument word 0    ] <--- LVP
+
+  // initialize fixed part of activation frame
+
+  generate_fixed_frame(false);
+
+
+  // after this function, the layout of frame is as following
+  //
+  // [ monitor block top        ] <--- sp ( the top monitor entry )
+  // [ byte code pointer        ] (if native, bcp = 0)
+  // [ constant pool cache      ]
+  // [ Method*                  ]
+  // [ locals offset            ]
+  // [ sender's sp              ]
+  // [ sender's fp              ] <--- fp
+  // [ return address           ]
+  // [ local var m-1            ]
+  //   ...
+  // [ local var 0              ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- LVP
+
+
+  // make sure method is not native & not abstract
+#ifdef ASSERT
+  __ ld_d(AT, Rmethod, in_bytes(Method::access_flags_offset()));
+  {
+    Label L;
+    __ andi(T2, AT, JVM_ACC_NATIVE);
+    __ beq(T2, R0, L);
+    __ stop("tried to execute native method as non-native");
+    __ bind(L);
+  }
+  {
+    Label L;
+    __ andi(T2, AT, JVM_ACC_ABSTRACT);
+    __ beq(T2, R0, L);
+    __ stop("tried to execute abstract method in interpreter");
+    __ bind(L);
+  }
+#endif
+
+  // Since at this point in the method invocation the exception handler
+  // would try to exit the monitor of synchronized methods which hasn't
+  // been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. The remove_activation will
+  // check this flag.
+
+#ifndef OPT_THREAD
+  Register thread = T8;
+  __ get_thread(thread);
+#else
+  Register thread = TREG;
+#endif
+  __ li(AT, (int)true);
+  __ st_b(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+#ifndef CORE
+
+  // mdp : T8
+  // tmp1: T4
+  // tmp2: T2
+   __ profile_parameters_type(T8, T4, T2);
+
+  // increment invocation count & check for overflow
+  Label invocation_counter_overflow;
+  Label profile_method;
+  Label profile_method_continue;
+  if (inc_counter) {
+    generate_counter_incr(&invocation_counter_overflow,
+                          &profile_method,
+                          &profile_method_continue);
+    if (ProfileInterpreter) {
+      __ bind(profile_method_continue);
+    }
+  }
+
+  Label continue_after_compile;
+  __ bind(continue_after_compile);
+
+#endif // CORE
+
+  bang_stack_shadow_pages(false);
+
+  // reset the _do_not_unlock_if_synchronized flag
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ st_b(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+  // check for synchronized methods
+  // Must happen AFTER invocation_counter check and stack overflow check,
+  // so method is not locked if overflows.
+  //
+  if (synchronized) {
+    // Allocate monitor and lock method
+    lock_method();
+  } else {
+    // no synchronization necessary
+#ifdef ASSERT
+    { Label L;
+      __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
+      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
+      __ beq(T2, R0, L);
+      __ stop("method needs synchronization");
+      __ bind(L);
+    }
+#endif
+  }
+
+  // layout of frame after lock_method
+  // [ monitor entry            ] <--- sp
+  //   ...
+  // [ monitor entry            ]
+  // [ monitor block top        ] ( the top monitor entry )
+  // [ byte code pointer        ] (if native, bcp = 0)
+  // [ constant pool cache      ]
+  // [ Method*                  ]
+  // [ locals offset            ]
+  // [ sender's sp              ]
+  // [ sender's fp              ]
+  // [ return address           ] <--- fp
+  // [ local var m-1            ]
+  //   ...
+  // [ local var 0              ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- LVP
+
+
+  // start execution
+#ifdef ASSERT
+  {
+    Label L;
+    __ ld_d(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    __ beq(AT, SP, L);
+    __ stop("broken stack frame setup in interpreter in native");
+    __ bind(L);
+  }
+#endif
+
+  // jvmti/jvmpi support
+  __ notify_method_entry();
+
+  __ dispatch_next(vtos);
+
+  // invocation counter overflow
+  if (inc_counter) {
+    if (ProfileInterpreter) {
+      // We have decided to profile this method in the interpreter
+      __ bind(profile_method);
+      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                 InterpreterRuntime::profile_method));
+      __ set_method_data_pointer_for_bcp();
+      __ get_method(Rmethod);
+      __ b(profile_method_continue);
+    }
+    // Handle overflow of counter and compile method
+    __ bind(invocation_counter_overflow);
+    generate_counter_overflow(&continue_after_compile);
+  }
+
+  return entry_point;
+}
+
+// Entry points
+//
+// Here we generate the various kind of entries into the interpreter.
+// The two main entry type are generic bytecode methods and native
+// call method.  These both come in synchronized and non-synchronized
+// versions but the frame layout they create is very similar. The
+// other method entry types are really just special purpose entries
+// that are really entry and interpretation all in one. These are for
+// trivial methods like accessor, empty, or special math methods.
+//
+// When control flow reaches any of the entry types for the interpreter
+// the following holds ->
+//
+// Arguments:
+//
+// Rmethod: Method*
+// V0: receiver
+//
+//
+// Stack layout immediately at entry
+//
+// [ parameter n-1            ] <--- sp
+//   ...
+// [ parameter 0              ]
+// [ expression stack         ] (caller's java expression stack)
+
+// Assuming that we don't go to one of the trivial specialized entries
+// the stack will look like below when we are ready to execute the
+// first bytecode (or call the native routine). The register usage
+// will be as the template based interpreter expects (see
+// interpreter_loongarch_64.hpp).
+//
+// local variables follow incoming parameters immediately; i.e.
+// the return address is moved to the end of the locals).
+//
+// [ monitor entry            ] <--- sp
+//   ...
+// [ monitor entry            ]
+// [ monitor block top        ] ( the top monitor entry )
+// [ byte code pointer        ] (if native, bcp = 0)
+// [ constant pool cache      ]
+// [ Method*                  ]
+// [ locals offset            ]
+// [ sender's sp              ]
+// [ sender's fp              ]
+// [ return address           ] <--- fp
+// [ local var m-1            ]
+//   ...
+// [ local var 0              ]
+// [ argumnet word n-1        ] <--- ( sender's sp )
+//   ...
+// [ argument word 0          ] <--- S7
+
+address AbstractInterpreterGenerator::generate_method_entry(
+                                        AbstractInterpreter::MethodKind kind) {
+  // determine code generation flags
+  bool synchronized = false;
+  address entry_point = NULL;
+  switch (kind) {
+    case Interpreter::zerolocals             :
+      break;
+    case Interpreter::zerolocals_synchronized:
+      synchronized = true;
+      break;
+    case Interpreter::native                 :
+      entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false);
+      break;
+    case Interpreter::native_synchronized    :
+      entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true);
+      break;
+    case Interpreter::empty                  :
+      entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();
+      break;
+    case Interpreter::accessor               :
+      entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();
+      break;
+    case Interpreter::abstract               :
+      entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();
+      break;
+
+    case Interpreter::java_lang_math_sin     : // fall thru
+    case Interpreter::java_lang_math_cos     : // fall thru
+    case Interpreter::java_lang_math_tan     : // fall thru
+    case Interpreter::java_lang_math_log     : // fall thru
+    case Interpreter::java_lang_math_log10   : // fall thru
+    case Interpreter::java_lang_math_pow     : // fall thru
+    case Interpreter::java_lang_math_exp     : break;
+    case Interpreter::java_lang_math_abs     : // fall thru
+    case Interpreter::java_lang_math_sqrt    :
+      entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);    break;
+    case Interpreter::java_lang_ref_reference_get:
+      entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+    case Interpreter::java_util_zip_CRC32_update:
+      entry_point = ((InterpreterGenerator*)this)->generate_CRC32_update_entry();  break;
+    case Interpreter::java_util_zip_CRC32_updateBytes: // fall thru
+    case Interpreter::java_util_zip_CRC32_updateByteBuffer:
+      entry_point = ((InterpreterGenerator*)this)->generate_CRC32_updateBytes_entry(kind); break;
+    default:
+      fatal(err_msg("unexpected method kind: %d", kind));
+      break;
+  }
+  if (entry_point) return entry_point;
+
+  return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized);
+}
+
+// These should never be compiled since the interpreter will prefer
+// the compiled version to the intrinsic version.
+bool AbstractInterpreter::can_be_compiled(methodHandle m) {
+  switch (method_kind(m)) {
+    case Interpreter::java_lang_math_sin     : // fall thru
+    case Interpreter::java_lang_math_cos     : // fall thru
+    case Interpreter::java_lang_math_tan     : // fall thru
+    case Interpreter::java_lang_math_abs     : // fall thru
+    case Interpreter::java_lang_math_log     : // fall thru
+    case Interpreter::java_lang_math_log10   : // fall thru
+    case Interpreter::java_lang_math_sqrt    : // fall thru
+    case Interpreter::java_lang_math_pow     : // fall thru
+    case Interpreter::java_lang_math_exp     :
+      return false;
+    default:
+      return true;
+  }
+}
+
+// How much stack a method activation needs in words.
+int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
+
+  const int entry_size    = frame::interpreter_frame_monitor_size();
+
+  // total overhead size: entry_size + (saved fp thru expr stack bottom).
+  // be sure to change this if you add/subtract anything to/from the overhead area
+  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
+
+  const int stub_code = 6;  // see generate_call_stub
+  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
+  const int method_stack = (method->max_locals() + method->max_stack()) *
+          Interpreter::stackElementWords;
+  return overhead_size + method_stack + stub_code;
+}
+
+void AbstractInterpreter::layout_activation(Method* method,
+                                           int tempcount,
+                                           int popframe_extra_args,
+                                           int moncount,
+                                           int caller_actual_parameters,
+                                           int callee_param_count,
+                                           int callee_locals,
+                                           frame* caller,
+                                           frame* interpreter_frame,
+                                           bool is_top_frame,
+                                           bool is_bottom_frame) {
+  // Note: This calculation must exactly parallel the frame setup
+  // in AbstractInterpreterGenerator::generate_method_entry.
+  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
+  // The frame interpreter_frame, if not NULL, is guaranteed to be the
+  // right size, as determined by a previous call to this method.
+  // It is also guaranteed to be walkable even though it is in a skeletal state
+
+  // fixed size of an interpreter frame:
+
+  int max_locals = method->max_locals() * Interpreter::stackElementWords;
+  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
+
+#ifdef ASSERT
+  if (!EnableInvokeDynamic) {
+    // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences?
+    // Probably, since deoptimization doesn't work yet.
+    assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
+  }
+  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
+#endif
+
+  interpreter_frame->interpreter_frame_set_method(method);
+  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
+  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
+  // and sender_sp is fp+8
+  intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
+
+#ifdef ASSERT
+  if (caller->is_interpreted_frame()) {
+    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
+  }
+#endif
+
+  interpreter_frame->interpreter_frame_set_locals(locals);
+  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
+  BasicObjectLock* monbot = montop - moncount;
+  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
+
+  //set last sp;
+  intptr_t*  sp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
+                      popframe_extra_args;
+  interpreter_frame->interpreter_frame_set_last_sp(sp);
+  // All frames but the initial interpreter frame we fill in have a
+  // value for sender_sp that allows walking the stack but isn't
+  // truly correct. Correct the value here.
+  //
+  if (extra_locals != 0 &&
+      interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
+    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
+  }
+  *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateInterpreterGenerator::generate_throw_exception() {
+  // Entry point in previous activation (i.e., if the caller was
+  // interpreted)
+  Interpreter::_rethrow_exception_entry = __ pc();
+  // Restore sp to interpreter_frame_last_sp even though we are going
+  // to empty the expression stack for the exception processing.
+  __ st_d(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+  // V0: exception
+  // V1: return address/pc that threw exception
+  __ restore_bcp();                              // BCP points to call/send
+  __ restore_locals();
+
+  //add for compressedoops
+  __ reinit_heapbase();
+  // Entry point for exceptions thrown within interpreter code
+  Interpreter::_throw_exception_entry = __ pc();
+  // expression stack is undefined here
+  // V0: exception
+  // BCP: exception bcp
+  __ verify_oop(V0);
+
+  // expression stack must be empty before entering the VM in case of an exception
+  __ empty_expression_stack();
+  // find exception handler address and preserve exception oop
+  __ move(A1, V0);
+  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
+  // V0: exception handler entry point
+  // V1: preserved exception oop
+  // S0: bcp for exception handler
+  __ push(V1);                                 // push exception which is now the only value on the stack
+  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
+
+  // If the exception is not handled in the current frame the frame is removed and
+  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
+  //
+  // Note: At this point the bci is still the bxi for the instruction which caused
+  //       the exception and the expression stack is empty. Thus, for any VM calls
+  //       at this point, GC will find a legal oop map (with empty expression stack).
+
+  // In current activation
+  // V0: exception
+  // BCP: exception bcp
+
+  //
+  // JVMTI PopFrame support
+  //
+
+  Interpreter::_remove_activation_preserving_args_entry = __ pc();
+  __ empty_expression_stack();
+  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
+  // currently handling popframe, so that call_VMs that may happen later do not trigger new
+  // popframe handling cycles.
+#ifndef OPT_THREAD
+  Register thread = T2;
+  __ get_thread(T2);
+#else
+  Register thread = TREG;
+#endif
+  __ ld_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
+  __ ori(T3, T3, JavaThread::popframe_processing_bit);
+  __ st_w(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
+
+#ifndef CORE
+  {
+    // Check to see whether we are returning to a deoptimized frame.
+    // (The PopFrame call ensures that the caller of the popped frame is
+    // either interpreted or compiled and deoptimizes it if compiled.)
+    // In this case, we can't call dispatch_next() after the frame is
+    // popped, but instead must save the incoming arguments and restore
+    // them after deoptimization has occurred.
+    //
+    // Note that we don't compare the return PC against the
+    // deoptimization blob's unpack entry because of the presence of
+    // adapter frames in C2.
+    Label caller_not_deoptimized;
+    __ ld_d(A0, FP, frame::return_addr_offset * wordSize);
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
+    __ bne(V0, R0, caller_not_deoptimized);
+
+    // Compute size of arguments for saving when returning to deoptimized caller
+    __ get_method(A1);
+    __ verify_oop(A1);
+    __ ld_d(A1, A1, in_bytes(Method::const_offset()));
+    __ ld_hu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
+    __ shl(A1, Interpreter::logStackElementSize);
+    __ restore_locals();
+    __ sub_d(A2, LVP, A1);
+    __ addi_d(A2, A2, wordSize);
+    // Save these arguments
+#ifndef OPT_THREAD
+    __ get_thread(A0);
+#else
+    __ move(A0, TREG);
+#endif
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
+
+    __ remove_activation(vtos, T4, false, false, false);
+
+    // Inform deoptimization that it is responsible for restoring these arguments
+#ifndef OPT_THREAD
+    __ get_thread(thread);
+#endif
+    __ li(AT, JavaThread::popframe_force_deopt_reexecution_bit);
+    __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
+    // Continue in deoptimization handler
+    __ jr(T4);
+
+    __ bind(caller_not_deoptimized);
+  }
+#endif /* !CORE */
+
+  __ remove_activation(vtos, T3,
+                       /* throw_monitor_exception */ false,
+                       /* install_monitor_exception */ false,
+                       /* notify_jvmdi */ false);
+
+  // Clear the popframe condition flag
+  // Finish with popframe handling
+  // A previous I2C followed by a deoptimization might have moved the
+  // outgoing arguments further up the stack. PopFrame expects the
+  // mutations to those outgoing arguments to be preserved and other
+  // constraints basically require this frame to look exactly as
+  // though it had previously invoked an interpreted activation with
+  // no space between the top of the expression stack (current
+  // last_sp) and the top of stack. Rather than force deopt to
+  // maintain this kind of invariant all the time we call a small
+  // fixup routine to move the mutated arguments onto the top of our
+  // expression stack if necessary.
+  __ move(T8, SP);
+  __ ld_d(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  // PC must point into interpreter here
+  Label L;
+  __ bind(L);
+  __ set_last_Java_frame(thread, noreg, FP, L);
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
+  __ reset_last_Java_frame(thread, true);
+  // Restore the last_sp and null it out
+  __ ld_d(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+
+
+  __ li(AT, JavaThread::popframe_inactive);
+  __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
+
+  // Finish with popframe handling
+  __ restore_bcp();
+  __ restore_locals();
+#ifndef CORE
+  // The method data pointer was incremented already during
+  // call profiling. We have to restore the mdp for the current bcp.
+  if (ProfileInterpreter) {
+    __ set_method_data_pointer_for_bcp();
+  }
+#endif // !CORE
+  // Clear the popframe condition flag
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ li(AT, JavaThread::popframe_inactive);
+  __ st_w(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
+
+#if INCLUDE_JVMTI
+  {
+    Label L_done;
+
+    __ ld_bu(AT, BCP, 0);
+    __ addi_d(AT, AT, -1 * Bytecodes::_invokestatic);
+    __ bne(AT, R0, L_done);
+
+    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
+    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
+
+    __ get_method(T4);
+    __ ld_d(T8, LVP, 0);
+    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T4, BCP);
+
+    __ beq(T8, R0, L_done);
+
+    __ st_d(T8, SP, 0);
+    __ bind(L_done);
+  }
+#endif // INCLUDE_JVMTI
+
+  __ dispatch_next(vtos);
+  // end of PopFrame support
+
+  Interpreter::_remove_activation_entry = __ pc();
+
+  // preserve exception over this code sequence
+  __ pop(T0);
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ st_d(T0, thread, in_bytes(JavaThread::vm_result_offset()));
+  // remove the activation (without doing throws on illegalMonitorExceptions)
+  __ remove_activation(vtos, T3, false, true, false);
+  // restore exception
+  __ get_vm_result(T0, thread);
+  __ verify_oop(T0);
+
+  // In between activations - previous activation type unknown yet
+  // compute continuation point - the continuation point expects
+  // the following registers set up:
+  //
+  // T0: exception
+  // T1: return address/pc that threw exception
+  // SP: expression stack of caller
+  // FP: fp of caller
+  __ push2(T0, T3);             // save exception and return address
+  __ move(A1, T3);
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
+  __ move(T4, V0);                             // save exception handler
+  __ pop2(V0, V1);                   // restore return address and exception
+
+  // Note that an "issuing PC" is actually the next PC after the call
+  __ jr(T4);                                   // jump to exception handler of caller
+}
+
+
+//
+// JVMTI ForceEarlyReturn support
+//
+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
+  address entry = __ pc();
+  __ restore_bcp();
+  __ restore_locals();
+  __ empty_expression_stack();
+  __ empty_FPU_stack();
+  __ load_earlyret_value(state);
+
+#ifndef OPT_THREAD
+  __ get_thread(TREG);
+#endif
+  __ ld_ptr(T4, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
+  const Address cond_addr(T4, in_bytes(JvmtiThreadState::earlyret_state_offset()));
+  // Clear the earlyret state
+  __ li(AT, JvmtiThreadState::earlyret_inactive);
+  __ st_w(AT, cond_addr);
+  __ membar(__ AnyAny);//no membar here for aarch64
+
+
+  __ remove_activation(state, T0,
+                         false, /* throw_monitor_exception */
+                         false, /* install_monitor_exception */
+                         true); /* notify_jvmdi */
+  __ membar(__ AnyAny);
+  __ jr(T0);
+
+  return entry;
+} // end of ForceEarlyReturn support
+
+
+//-----------------------------------------------------------------------------
+// Helper for vtos entry point generation
+
+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
+                                                         address& bep,
+                                                         address& cep,
+                                                         address& sep,
+                                                         address& aep,
+                                                         address& iep,
+                                                         address& lep,
+                                                         address& fep,
+                                                         address& dep,
+                                                         address& vep) {
+  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
+  Label L;
+  fep = __ pc(); __ push(ftos); __ b(L);
+  dep = __ pc(); __ push(dtos); __ b(L);
+  lep = __ pc(); __ push(ltos); __ b(L);
+  aep  =__ pc(); __ push(atos); __ b(L);
+  bep = cep = sep =
+  iep = __ pc(); __ push(itos);
+  vep = __ pc();
+  __ bind(L);
+  generate_and_dispatch(t);
+}
+
+
+//-----------------------------------------------------------------------------
+// Generation of individual instructions
+
+// helpers for generate_and_dispatch
+
+
+InterpreterGenerator::InterpreterGenerator(StubQueue* code)
+  : TemplateInterpreterGenerator(code) {
+   generate_all(); // down here so it can be "virtual"
+}
+
+//-----------------------------------------------------------------------------
+
+// Non-product code
+#ifndef PRODUCT
+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
+  address entry = __ pc();
+
+  // prepare expression stack
+  __ push(state);       // save tosca
+
+  // tos & tos2
+  // trace_bytecode need actually 4 args, the last two is tos&tos2
+  // this work fine for x86. but LA ABI calling convention will store A2-A3
+  // to the stack position it think is the tos&tos2
+  // when the expression stack have no more than 2 data, error occur.
+  __ ld_d(A2, SP, 0);
+  __ ld_d(A3, SP, 1 * wordSize);
+
+  // pass arguments & call tracer
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), RA, A2, A3);
+  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
+
+  // restore expression stack
+  __ pop(state);        // restore tosca
+
+  // return
+  __ jr(RA);
+  return entry;
+}
+
+void TemplateInterpreterGenerator::count_bytecode() {
+  __ li(T8, (long)&BytecodeCounter::_counter_value);
+  __ ld_w(AT, T8, 0);
+  __ addi_d(AT, AT, 1);
+  __ st_w(AT, T8, 0);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
+  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
+  __ ld_w(AT, T8, 0);
+  __ addi_d(AT, AT, 1);
+  __ st_w(AT, T8, 0);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
+  __ li(T8, (long)&BytecodePairHistogram::_index);
+  __ ld_w(T4, T8, 0);
+  __ srli_d(T4, T4, BytecodePairHistogram::log2_number_of_codes);
+  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
+  __ orr(T4, T4, T8);
+  __ li(T8, (long)&BytecodePairHistogram::_index);
+  __ st_w(T4, T8, 0);
+  __ slli_d(T4, T4, 2);
+  __ li(T8, (long)BytecodePairHistogram::_counters);
+  __ add_d(T8, T8, T4);
+  __ ld_w(AT, T8, 0);
+  __ addi_d(AT, AT, 1);
+  __ st_w(AT, T8, 0);
+}
+
+
+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
+  // Call a little run-time stub to avoid blow-up for each bytecode.
+  // The run-time runtime saves the right registers, depending on
+  // the tosca in-state for the given template.
+  address entry = Interpreter::trace_code(t->tos_in());
+  assert(entry != NULL, "entry must have been generated");
+  __ call(entry, relocInfo::none);
+  //add for compressedoops
+  __ reinit_heapbase();
+}
+
+
+void TemplateInterpreterGenerator::stop_interpreter_at() {
+  Label L;
+  __ li(T8, long(&BytecodeCounter::_counter_value));
+  __ ld_w(T8, T8, 0);
+  __ li(AT, StopInterpreterAt);
+  __ bne(T8, AT, L);
+  __ brk(5);
+  __ bind(L);
+}
+#endif // !PRODUCT
+#endif // ! CC_INTERP
diff --git a/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp
new file mode 100644
index 00000000000..228217f0017
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.cpp
@@ -0,0 +1,4024 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/methodData.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "utilities/macros.hpp"
+
+
+#ifndef CC_INTERP
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+// Platform-dependent initialization
+
+void TemplateTable::pd_initialize() {
+  // No LoongArch specific initialization
+}
+
+// Address computation: local variables
+
+static inline Address iaddress(int n) {
+  return Address(LVP, Interpreter::local_offset_in_bytes(n));
+}
+
+static inline Address laddress(int n) {
+  return iaddress(n + 1);
+}
+
+static inline Address faddress(int n) {
+  return iaddress(n);
+}
+
+static inline Address daddress(int n) {
+  return laddress(n);
+}
+
+static inline Address aaddress(int n) {
+  return iaddress(n);
+}
+static inline Address haddress(int n)            { return iaddress(n + 0); }
+
+
+static inline Address at_sp()             {  return Address(SP,   0); }
+static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
+static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
+
+// At top of Java expression stack which may be different than sp().
+// It isn't for category 1 objects.
+static inline Address at_tos   () {
+  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
+  return tos;
+}
+
+static inline Address at_tos_p1() {
+  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
+}
+
+static inline Address at_tos_p2() {
+  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
+}
+
+static inline Address at_tos_p3() {
+  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
+}
+
+// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
+Address TemplateTable::at_bcp(int offset) {
+  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
+  return Address(BCP, offset);
+}
+
+// Miscelaneous helper routines
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == FSR, "parameter is just for looks");
+  switch (barrier) {
+#if INCLUDE_ALL_GCS
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != T3) {
+            __ move(T3, obj.base());
+          }
+        } else {
+          __ lea(T3, obj);
+        }
+        __ g1_write_barrier_pre(T3 /* obj */,
+                                T1 /* pre_val */,
+                                TREG /* thread */,
+                                T4  /* tmp */,
+                                val != noreg /* tosca_live */,
+                                false /* expand_call */);
+        if (val == noreg) {
+          __ store_heap_oop_null(Address(T3, 0));
+        } else {
+          // G1 barrier needs uncompressed oop for region cross check.
+          Register new_val = val;
+          if (UseCompressedOops) {
+            new_val = T1;
+            __ move(new_val, val);
+          }
+          __ store_heap_oop(Address(T3, 0), val);
+          __ g1_write_barrier_post(T3 /* store_adr */,
+                                   new_val /* new_val */,
+                                   TREG /* thread */,
+                                   T4 /* tmp */,
+                                   T1 /* tmp2 */);
+        }
+      }
+      break;
+#endif // INCLUDE_ALL_GCS
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ store_heap_oop_null(obj);
+        } else {
+          __ store_heap_oop(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            //TODO: LA
+            __ lea(T4, obj);
+            __ store_check(T4);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ store_heap_oop_null(obj);
+      } else {
+        __ store_heap_oop(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
+// bytecode folding
+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
+                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
+                                   int byte_no) {
+  if (!RewriteBytecodes)  return;
+  Label L_patch_done;
+
+  switch (bc) {
+  case Bytecodes::_fast_aputfield:
+  case Bytecodes::_fast_bputfield:
+  case Bytecodes::_fast_zputfield:
+  case Bytecodes::_fast_cputfield:
+  case Bytecodes::_fast_dputfield:
+  case Bytecodes::_fast_fputfield:
+  case Bytecodes::_fast_iputfield:
+  case Bytecodes::_fast_lputfield:
+  case Bytecodes::_fast_sputfield:
+    {
+      // We skip bytecode quickening for putfield instructions when
+      // the put_code written to the constant pool cache is zero.
+      // This is required so that every execution of this instruction
+      // calls out to InterpreterRuntime::resolve_get_put to do
+      // additional, required work.
+      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
+      __ addi_d(bc_reg, R0, bc);
+      __ beq(tmp_reg, R0, L_patch_done);
+    }
+    break;
+  default:
+    assert(byte_no == -1, "sanity");
+    // the pair bytecodes have already done the load.
+    if (load_bc_into_bc_reg) {
+      __ li(bc_reg, bc);
+    }
+  }
+
+  if (JvmtiExport::can_post_breakpoint()) {
+    Label L_fast_patch;
+    // if a breakpoint is present we can't rewrite the stream directly
+    __ ld_bu(tmp_reg, at_bcp(0));
+    __ li(AT, Bytecodes::_breakpoint);
+    __ bne(tmp_reg, AT, L_fast_patch);
+
+    __ get_method(tmp_reg);
+    // Let breakpoint table handling rewrite to quicker bytecode
+    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
+
+    __ b(L_patch_done);
+    __ bind(L_fast_patch);
+  }
+
+#ifdef ASSERT
+  Label L_okay;
+  __ ld_bu(tmp_reg, at_bcp(0));
+  __ li(AT, (int)Bytecodes::java_code(bc));
+  __ beq(tmp_reg, AT, L_okay);
+  __ beq(tmp_reg, bc_reg, L_patch_done);
+  __ stop("patching the wrong bytecode");
+  __ bind(L_okay);
+#endif
+
+  // patch bytecode
+  __ st_b(bc_reg, at_bcp(0));
+  __ bind(L_patch_done);
+}
+
+
+// Individual instructions
+
+void TemplateTable::nop() {
+  transition(vtos, vtos);
+  // nothing to do
+}
+
+void TemplateTable::shouldnotreachhere() {
+  transition(vtos, vtos);
+  __ stop("shouldnotreachhere bytecode");
+}
+
+void TemplateTable::aconst_null() {
+  transition(vtos, atos);
+  __ move(FSR, R0);
+}
+
+void TemplateTable::iconst(int value) {
+  transition(vtos, itos);
+  if (value == 0) {
+    __ move(FSR, R0);
+  } else {
+    __ li(FSR, value);
+  }
+}
+
+void TemplateTable::lconst(int value) {
+  transition(vtos, ltos);
+  if (value == 0) {
+    __ move(FSR, R0);
+  } else {
+    __ li(FSR, value);
+  }
+}
+
+void TemplateTable::fconst(int value) {
+  transition(vtos, ftos);
+  switch( value ) {
+    case 0:  __ movgr2fr_w(FSF, R0);    return;
+    case 1:  __ addi_d(AT, R0, 1); break;
+    case 2:  __ addi_d(AT, R0, 2); break;
+    default: ShouldNotReachHere();
+  }
+  __ movgr2fr_w(FSF, AT);
+  __ ffint_s_w(FSF, FSF);
+}
+
+void TemplateTable::dconst(int value) {
+  transition(vtos, dtos);
+  switch( value ) {
+    case 0:  __ movgr2fr_d(FSF, R0);
+             return;
+    case 1:  __ addi_d(AT, R0, 1);
+             __ movgr2fr_d(FSF, AT);
+             __ ffint_d_w(FSF, FSF);
+             break;
+    default: ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::bipush() {
+  transition(vtos, itos);
+  __ ld_b(FSR, at_bcp(1));
+}
+
+void TemplateTable::sipush() {
+  transition(vtos, itos);
+  __ ld_b(FSR, BCP, 1);
+  __ ld_bu(AT, BCP, 2);
+  __ slli_d(FSR, FSR, 8);
+  __ orr(FSR, FSR, AT);
+}
+
+// T1 : tags
+// T2 : index
+// T3 : cpool
+// T8 : tag
+void TemplateTable::ldc(bool wide) {
+  transition(vtos, vtos);
+  Label call_ldc, notFloat, notClass, Done;
+  // get index in cpool
+  if (wide) {
+    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
+  } else {
+    __ ld_bu(T2, at_bcp(1));
+  }
+
+  __ get_cpool_and_tags(T3, T1);
+
+  const int base_offset = ConstantPool::header_size() * wordSize;
+  const int tags_offset = Array<u1>::base_offset_in_bytes();
+
+  // get type
+  __ add_d(AT, T1, T2);
+  __ ld_b(T1, AT, tags_offset);
+  if(os::is_MP()) {
+    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
+  }
+  //now T1 is the tag
+
+  // unresolved class - get the resolved class
+  __ addi_d(AT, T1, - JVM_CONSTANT_UnresolvedClass);
+  __ beq(AT, R0, call_ldc);
+
+  // unresolved class in error (resolution failed) - call into runtime
+  // so that the same error from first resolution attempt is thrown.
+  __ addi_d(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
+  __ beq(AT, R0, call_ldc);
+
+  // resolved class - need to call vm to get java mirror of the class
+  __ addi_d(AT, T1, - JVM_CONSTANT_Class);
+  __ slli_d(T2, T2, Address::times_8);
+  __ bne(AT, R0, notClass);
+
+  __ bind(call_ldc);
+  __ li(A1, wide);
+  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
+  //__ push(atos);
+  __ addi_d(SP, SP, - Interpreter::stackElementSize);
+  __ st_d(FSR, SP, 0);
+  __ b(Done);
+
+  __ bind(notClass);
+  __ addi_d(AT, T1, -JVM_CONSTANT_Float);
+  __ bne(AT, R0, notFloat);
+  // ftos
+  __ add_d(AT, T3, T2);
+  __ fld_s(FSF, AT, base_offset);
+  //__ push_f();
+  __ addi_d(SP, SP, - Interpreter::stackElementSize);
+  __ fst_s(FSF, SP, 0);
+  __ b(Done);
+
+  __ bind(notFloat);
+#ifdef ASSERT
+  {
+    Label L;
+    __ addi_d(AT, T1, -JVM_CONSTANT_Integer);
+    __ beq(AT, R0, L);
+    __ stop("unexpected tag type in ldc");
+    __ bind(L);
+  }
+#endif
+  // itos JVM_CONSTANT_Integer only
+  __ add_d(T0, T3, T2);
+  __ ld_w(FSR, T0, base_offset);
+  __ push(itos);
+  __ bind(Done);
+}
+
+// Fast path for caching oop constants.
+void TemplateTable::fast_aldc(bool wide) {
+  transition(vtos, atos);
+
+  Register result = FSR;
+  Register tmp = SSR;
+  int index_size = wide ? sizeof(u2) : sizeof(u1);
+
+  Label resolved;
+
+  // We are resolved if the resolved reference cache entry contains a
+  // non-null object (String, MethodType, etc.)
+  assert_different_registers(result, tmp);
+  __ get_cache_index_at_bcp(tmp, 1, index_size);
+  __ load_resolved_reference_at_index(result, tmp);
+  __ bne(result, R0, resolved);
+
+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+  // first time invocation - must resolve first
+  int i = (int)bytecode();
+  __ li(tmp, i);
+  __ call_VM(result, entry, tmp);
+
+  __ bind(resolved);
+
+  if (VerifyOops) {
+    __ verify_oop(result);
+  }
+}
+
+
+// used register: T2, T3, T1
+// T2 : index
+// T3 : cpool
+// T1 : tag
+void TemplateTable::ldc2_w() {
+  transition(vtos, vtos);
+  Label Long, Done;
+
+  // get index in cpool
+  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
+
+  __ get_cpool_and_tags(T3, T1);
+
+  const int base_offset = ConstantPool::header_size() * wordSize;
+  const int tags_offset = Array<u1>::base_offset_in_bytes();
+
+  // get type in T1
+  __ add_d(AT, T1, T2);
+  __ ld_b(T1, AT, tags_offset);
+
+  __ addi_d(AT, T1, - JVM_CONSTANT_Double);
+  __ slli_d(T2, T2, Address::times_8);
+  __ bne(AT, R0, Long);
+
+  // dtos
+  __ add_d(AT, T3, T2);
+  __ fld_d(FSF, AT, base_offset);
+  __ push(dtos);
+  __ b(Done);
+
+  // ltos
+  __ bind(Long);
+  __ add_d(AT, T3, T2);
+  __ ld_d(FSR, AT, base_offset);
+  __ push(ltos);
+
+  __ bind(Done);
+}
+
+// we compute the actual local variable address here
+void TemplateTable::locals_index(Register reg, int offset) {
+  __ ld_bu(reg, at_bcp(offset));
+  __ slli_d(reg, reg, Address::times_8);
+  __ sub_d(reg, LVP, reg);
+}
+
+// this method will do bytecode folding of the two form:
+// iload iload      iload caload
+// used register : T2, T3
+// T2 : bytecode
+// T3 : folded code
+void TemplateTable::iload() {
+  transition(vtos, itos);
+  if (RewriteFrequentPairs) {
+    Label rewrite, done;
+    // get the next bytecode in T2
+    __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
+    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
+    // last two iloads in a pair.  Comparing against fast_iload means that
+    // the next bytecode is neither an iload or a caload, and therefore
+    // an iload pair.
+    __ li(AT, Bytecodes::_iload);
+    __ beq(AT, T2, done);
+
+    __ li(T3, Bytecodes::_fast_iload2);
+    __ li(AT, Bytecodes::_fast_iload);
+    __ beq(AT, T2, rewrite);
+
+    // if _caload, rewrite to fast_icaload
+    __ li(T3, Bytecodes::_fast_icaload);
+    __ li(AT, Bytecodes::_caload);
+    __ beq(AT, T2, rewrite);
+
+    // rewrite so iload doesn't check again.
+    __ li(T3, Bytecodes::_fast_iload);
+
+    // rewrite
+    // T3 : fast bytecode
+    __ bind(rewrite);
+    patch_bytecode(Bytecodes::_iload, T3, T2, false);
+    __ bind(done);
+  }
+
+  // Get the local value into tos
+  locals_index(T2);
+  __ ld_w(FSR, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::fast_iload2() {
+  transition(vtos, itos);
+  locals_index(T2);
+  __ ld_w(FSR, T2, 0);
+  __ push(itos);
+  locals_index(T2, 3);
+  __ ld_w(FSR, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::fast_iload() {
+  transition(vtos, itos);
+  locals_index(T2);
+  __ ld_w(FSR, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::lload() {
+  transition(vtos, ltos);
+  locals_index(T2);
+  __ ld_d(FSR, T2, -wordSize);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::fload() {
+  transition(vtos, ftos);
+  locals_index(T2);
+  __ fld_s(FSF, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::dload() {
+  transition(vtos, dtos);
+  locals_index(T2);
+  __ fld_d(FSF, T2, -wordSize);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::aload() {
+  transition(vtos, atos);
+  locals_index(T2);
+  __ ld_d(FSR, T2, 0);
+}
+
+void TemplateTable::locals_index_wide(Register reg) {
+  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
+  __ slli_d(reg, reg, Address::times_8);
+  __ sub_d(reg, LVP, reg);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_iload() {
+  transition(vtos, itos);
+  locals_index_wide(T2);
+  __ ld_d(FSR, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_lload() {
+  transition(vtos, ltos);
+  locals_index_wide(T2);
+  __ ld_d(FSR, T2, -wordSize);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_fload() {
+  transition(vtos, ftos);
+  locals_index_wide(T2);
+  __ fld_s(FSF, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_dload() {
+  transition(vtos, dtos);
+  locals_index_wide(T2);
+  __ fld_d(FSF, T2, -wordSize);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_aload() {
+  transition(vtos, atos);
+  locals_index_wide(T2);
+  __ ld_d(FSR, T2, 0);
+}
+
+// we use A2 as the regiser for index, BE CAREFUL!
+// we dont use our tge 29 now, for later optimization
+void TemplateTable::index_check(Register array, Register index) {
+  // Pop ptr into array
+  __ pop_ptr(array);
+  index_check_without_pop(array, index);
+}
+
+void TemplateTable::index_check_without_pop(Register array, Register index) {
+  // destroys A2
+  // check array
+  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
+
+  // sign extend since tos (index) might contain garbage in upper bits
+  __ slli_w(index, index, 0);
+
+  // check index
+  Label ok;
+  __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes());
+  __ bltu(index, AT, ok);
+
+  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
+  if (A2 != index) __ move(A2, index);
+  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
+  __ bind(ok);
+}
+
+void TemplateTable::iaload() {
+  transition(itos, itos);
+  index_check(SSR, FSR);
+  __ alsl_d(FSR, FSR, SSR, 1);
+  __ ld_w(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
+}
+
+void TemplateTable::laload() {
+  transition(itos, ltos);
+  index_check(SSR, FSR);
+  __ alsl_d(AT, FSR, SSR, Address::times_8 - 1);
+  __ ld_d(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG));
+}
+
+void TemplateTable::faload() {
+  transition(itos, ftos);
+  index_check(SSR, FSR);
+  __ shl(FSR, 2);
+  __ add_d(FSR, SSR, FSR);
+  __ fld_s(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
+}
+
+void TemplateTable::daload() {
+  transition(itos, dtos);
+  index_check(SSR, FSR);
+  __ alsl_d(AT, FSR, SSR, 2);
+  __ fld_d(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
+}
+
+void TemplateTable::aaload() {
+  transition(itos, atos);
+  index_check(SSR, FSR);
+  __ alsl_d(FSR, FSR, SSR, (UseCompressedOops ? Address::times_4 : Address::times_8) - 1);
+  //add for compressedoops
+  __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+}
+
+void TemplateTable::baload() {
+  transition(itos, itos);
+  index_check(SSR, FSR);
+  __ add_d(FSR, SSR, FSR);
+  __ ld_b(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+}
+
+void TemplateTable::caload() {
+  transition(itos, itos);
+  index_check(SSR, FSR);
+  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
+  __ ld_hu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
+}
+
+// iload followed by caload frequent pair
+// used register : T2
+// T2 : index
+void TemplateTable::fast_icaload() {
+  transition(vtos, itos);
+  // load index out of locals
+  locals_index(T2);
+  __ ld_w(FSR, T2, 0);
+  index_check(SSR, FSR);
+  __ alsl_d(FSR, FSR, SSR, 0);
+  __ ld_hu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
+}
+
+void TemplateTable::saload() {
+  transition(itos, itos);
+  index_check(SSR, FSR);
+  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
+  __ ld_h(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
+}
+
+void TemplateTable::iload(int n) {
+  transition(vtos, itos);
+  __ ld_w(FSR, iaddress(n));
+}
+
+void TemplateTable::lload(int n) {
+  transition(vtos, ltos);
+  __ ld_d(FSR, laddress(n));
+}
+
+void TemplateTable::fload(int n) {
+  transition(vtos, ftos);
+  __ fld_s(FSF, faddress(n));
+}
+
+void TemplateTable::dload(int n) {
+  transition(vtos, dtos);
+  __ fld_d(FSF, laddress(n));
+}
+
+void TemplateTable::aload(int n) {
+  transition(vtos, atos);
+  __ ld_d(FSR, aaddress(n));
+}
+
+// used register : T2, T3
+// T2 : bytecode
+// T3 : folded code
+void TemplateTable::aload_0() {
+  transition(vtos, atos);
+  // According to bytecode histograms, the pairs:
+  //
+  // _aload_0, _fast_igetfield
+  // _aload_0, _fast_agetfield
+  // _aload_0, _fast_fgetfield
+  //
+  // occur frequently. If RewriteFrequentPairs is set, the (slow)
+  // _aload_0 bytecode checks if the next bytecode is either
+  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
+  // rewrites the current bytecode into a pair bytecode; otherwise it
+  // rewrites the current bytecode into _fast_aload_0 that doesn't do
+  // the pair check anymore.
+  //
+  // Note: If the next bytecode is _getfield, the rewrite must be
+  //       delayed, otherwise we may miss an opportunity for a pair.
+  //
+  // Also rewrite frequent pairs
+  //   aload_0, aload_1
+  //   aload_0, iload_1
+  // These bytecodes with a small amount of code are most profitable
+  // to rewrite
+  if (RewriteFrequentPairs) {
+    Label rewrite, done;
+    // get the next bytecode in T2
+    __ ld_bu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
+
+    // do actual aload_0
+    aload(0);
+
+    // if _getfield then wait with rewrite
+    __ li(AT, Bytecodes::_getfield);
+    __ beq(AT, T2, done);
+
+    // if _igetfield then reqrite to _fast_iaccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
+        Bytecodes::_aload_0,
+        "fix bytecode definition");
+    __ li(T3, Bytecodes::_fast_iaccess_0);
+    __ li(AT, Bytecodes::_fast_igetfield);
+    __ beq(AT, T2, rewrite);
+
+    // if _agetfield then reqrite to _fast_aaccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
+        Bytecodes::_aload_0,
+        "fix bytecode definition");
+    __ li(T3, Bytecodes::_fast_aaccess_0);
+    __ li(AT, Bytecodes::_fast_agetfield);
+    __ beq(AT, T2, rewrite);
+
+    // if _fgetfield then reqrite to _fast_faccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
+        Bytecodes::_aload_0,
+        "fix bytecode definition");
+    __ li(T3, Bytecodes::_fast_faccess_0);
+    __ li(AT, Bytecodes::_fast_fgetfield);
+    __ beq(AT, T2, rewrite);
+
+    // else rewrite to _fast_aload0
+    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
+        Bytecodes::_aload_0,
+        "fix bytecode definition");
+    __ li(T3, Bytecodes::_fast_aload_0);
+
+    // rewrite
+    __ bind(rewrite);
+    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
+
+    __ bind(done);
+  } else {
+    aload(0);
+  }
+}
+
+void TemplateTable::istore() {
+  transition(itos, vtos);
+  locals_index(T2);
+  __ st_w(FSR, T2, 0);
+}
+
+void TemplateTable::lstore() {
+  transition(ltos, vtos);
+  locals_index(T2);
+  __ st_d(FSR, T2, -wordSize);
+}
+
+void TemplateTable::fstore() {
+  transition(ftos, vtos);
+  locals_index(T2);
+  __ fst_s(FSF, T2, 0);
+}
+
+void TemplateTable::dstore() {
+  transition(dtos, vtos);
+  locals_index(T2);
+  __ fst_d(FSF, T2, -wordSize);
+}
+
+void TemplateTable::astore() {
+  transition(vtos, vtos);
+  __ pop_ptr(FSR);
+  locals_index(T2);
+  __ st_d(FSR, T2, 0);
+}
+
+void TemplateTable::wide_istore() {
+  transition(vtos, vtos);
+  __ pop_i(FSR);
+  locals_index_wide(T2);
+  __ st_d(FSR, T2, 0);
+}
+
+void TemplateTable::wide_lstore() {
+  transition(vtos, vtos);
+  __ pop_l(FSR);
+  locals_index_wide(T2);
+  __ st_d(FSR, T2, -wordSize);
+}
+
+void TemplateTable::wide_fstore() {
+  wide_istore();
+}
+
+void TemplateTable::wide_dstore() {
+  wide_lstore();
+}
+
+void TemplateTable::wide_astore() {
+  transition(vtos, vtos);
+  __ pop_ptr(FSR);
+  locals_index_wide(T2);
+  __ st_d(FSR, T2, 0);
+}
+
+// used register : T2
+void TemplateTable::iastore() {
+  transition(itos, vtos);
+  __ pop_i(SSR);   // T2: array  SSR: index
+  index_check(T2, SSR);  // prefer index in SSR
+  __ slli_d(SSR, SSR, Address::times_4);
+  __ add_d(T2, T2, SSR);
+  __ st_w(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
+}
+
+
+
+// used register T2, T3
+void TemplateTable::lastore() {
+  transition(ltos, vtos);
+  __ pop_i (T2);
+  index_check(T3, T2);
+  __ slli_d(T2, T2, Address::times_8);
+  __ add_d(T3, T3, T2);
+  __ st_d(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG));
+}
+
+// used register T2
+void TemplateTable::fastore() {
+  transition(ftos, vtos);
+  __ pop_i(SSR);
+  index_check(T2, SSR);
+  __ slli_d(SSR, SSR, Address::times_4);
+  __ add_d(T2, T2, SSR);
+  __ fst_s(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
+}
+
+// used register T2, T3
+void TemplateTable::dastore() {
+  transition(dtos, vtos);
+  __ pop_i (T2);
+  index_check(T3, T2);
+  __ slli_d(T2, T2, Address::times_8);
+  __ add_d(T3, T3, T2);
+  __ fst_d(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
+}
+
+// used register : T2, T3, T8
+// T2 : array
+// T3 : subklass
+// T8 : supklass
+void TemplateTable::aastore() {
+  Label is_null, ok_is_subtype, done;
+  transition(vtos, vtos);
+  // stack: ..., array, index, value
+  __ ld_d(FSR, at_tos());     // Value
+  __ ld_w(SSR, at_tos_p1());  // Index
+  __ ld_d(T2, at_tos_p2());  // Array
+
+  // index_check(T2, SSR);
+  index_check_without_pop(T2, SSR);
+  // do array store check - check for NULL value first
+  __ beq(FSR, R0, is_null);
+
+  // Move subklass into T3
+  //add for compressedoops
+  __ load_klass(T3, FSR);
+  // Move superklass into T8
+  //add for compressedoops
+  __ load_klass(T8, T2);
+  __ ld_d(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
+  // Compress array+index*4+12 into a single register. T2
+  __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1);
+  __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+  // Generate subtype check.
+  // Superklass in T8.  Subklass in T3.
+  __ gen_subtype_check(T8, T3, ok_is_subtype);
+  // Come here on failure
+  // object is at FSR
+  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
+  // Come here on success
+  __ bind(ok_is_subtype);
+  do_oop_store(_masm, Address(T2, 0), FSR, _bs->kind(), true);
+  __ b(done);
+
+  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
+  __ bind(is_null);
+  __ profile_null_seen(T4);
+  __ alsl_d(T2, SSR, T2, (UseCompressedOops? Address::times_4 : Address::times_8) - 1);
+  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, _bs->kind(), true);
+
+  __ bind(done);
+  __ addi_d(SP, SP, 3 * Interpreter::stackElementSize);
+}
+
+void TemplateTable::bastore() {
+  transition(itos, vtos);
+  __ pop_i(SSR);
+  index_check(T2, SSR);
+
+  // Need to check whether array is boolean or byte
+  // since both types share the bastore bytecode.
+  __ load_klass(T4, T2);
+  __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset()));
+
+  int diffbit = Klass::layout_helper_boolean_diffbit();
+  __ li(AT, diffbit);
+
+  Label L_skip;
+  __ andr(AT, T4, AT);
+  __ beq(AT, R0, L_skip);
+  __ andi(FSR, FSR, 0x1);
+  __ bind(L_skip);
+
+  __ add_d(SSR, T2, SSR);
+  __ st_b(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+}
+
+void TemplateTable::castore() {
+  transition(itos, vtos);
+  __ pop_i(SSR);
+  index_check(T2, SSR);
+  __ alsl_d(SSR, SSR, T2, Address::times_2 - 1);
+  __ st_h(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
+}
+
+void TemplateTable::sastore() {
+  castore();
+}
+
+void TemplateTable::istore(int n) {
+  transition(itos, vtos);
+  __ st_w(FSR, iaddress(n));
+}
+
+void TemplateTable::lstore(int n) {
+  transition(ltos, vtos);
+  __ st_d(FSR, laddress(n));
+}
+
+void TemplateTable::fstore(int n) {
+  transition(ftos, vtos);
+  __ fst_s(FSF, faddress(n));
+}
+
+void TemplateTable::dstore(int n) {
+  transition(dtos, vtos);
+  __ fst_d(FSF, laddress(n));
+}
+
+void TemplateTable::astore(int n) {
+  transition(vtos, vtos);
+  __ pop_ptr(FSR);
+  __ st_d(FSR, aaddress(n));
+}
+
+void TemplateTable::pop() {
+  transition(vtos, vtos);
+  __ addi_d(SP, SP, Interpreter::stackElementSize);
+}
+
+void TemplateTable::pop2() {
+  transition(vtos, vtos);
+  __ addi_d(SP, SP, 2 * Interpreter::stackElementSize);
+}
+
+void TemplateTable::dup() {
+  transition(vtos, vtos);
+  // stack: ..., a
+  __ load_ptr(0, FSR);
+  __ push_ptr(FSR);
+  // stack: ..., a, a
+}
+
+// blows FSR
+void TemplateTable::dup_x1() {
+  transition(vtos, vtos);
+  // stack: ..., a, b
+  __ load_ptr(0, FSR);  // load b
+  __ load_ptr(1, A5);  // load a
+  __ store_ptr(1, FSR); // store b
+  __ store_ptr(0, A5); // store a
+  __ push_ptr(FSR);             // push b
+  // stack: ..., b, a, b
+}
+
+// blows FSR
+void TemplateTable::dup_x2() {
+  transition(vtos, vtos);
+  // stack: ..., a, b, c
+  __ load_ptr(0, FSR);  // load c
+  __ load_ptr(2, A5);  // load a
+  __ store_ptr(2, FSR); // store c in a
+  __ push_ptr(FSR);             // push c
+  // stack: ..., c, b, c, c
+  __ load_ptr(2, FSR);  // load b
+  __ store_ptr(2, A5); // store a in b
+  // stack: ..., c, a, c, c
+  __ store_ptr(1, FSR); // store b in c
+  // stack: ..., c, a, b, c
+}
+
+// blows FSR
+void TemplateTable::dup2() {
+  transition(vtos, vtos);
+  // stack: ..., a, b
+  __ load_ptr(1, FSR);  // load a
+  __ push_ptr(FSR);             // push a
+  __ load_ptr(1, FSR);  // load b
+  __ push_ptr(FSR);             // push b
+  // stack: ..., a, b, a, b
+}
+
+// blows FSR
+void TemplateTable::dup2_x1() {
+  transition(vtos, vtos);
+  // stack: ..., a, b, c
+  __ load_ptr(0, T2);  // load c
+  __ load_ptr(1, FSR);  // load b
+  __ push_ptr(FSR);             // push b
+  __ push_ptr(T2);             // push c
+  // stack: ..., a, b, c, b, c
+  __ store_ptr(3, T2); // store c in b
+  // stack: ..., a, c, c, b, c
+  __ load_ptr(4, T2);  // load a
+  __ store_ptr(2, T2); // store a in 2nd c
+  // stack: ..., a, c, a, b, c
+  __ store_ptr(4, FSR); // store b in a
+  // stack: ..., b, c, a, b, c
+
+  // stack: ..., b, c, a, b, c
+}
+
+// blows FSR, SSR
+void TemplateTable::dup2_x2() {
+  transition(vtos, vtos);
+  // stack: ..., a, b, c, d
+  // stack: ..., a, b, c, d
+  __ load_ptr(0, T2);  // load d
+  __ load_ptr(1, FSR);  // load c
+  __ push_ptr(FSR);             // push c
+  __ push_ptr(T2);             // push d
+  // stack: ..., a, b, c, d, c, d
+  __ load_ptr(4, FSR);  // load b
+  __ store_ptr(2, FSR); // store b in d
+  __ store_ptr(4, T2); // store d in b
+  // stack: ..., a, d, c, b, c, d
+  __ load_ptr(5, T2);  // load a
+  __ load_ptr(3, FSR);  // load c
+  __ store_ptr(3, T2); // store a in c
+  __ store_ptr(5, FSR); // store c in a
+  // stack: ..., c, d, a, b, c, d
+
+  // stack: ..., c, d, a, b, c, d
+}
+
+// blows FSR
+void TemplateTable::swap() {
+  transition(vtos, vtos);
+  // stack: ..., a, b
+
+  __ load_ptr(1, A5);  // load a
+  __ load_ptr(0, FSR);  // load b
+  __ store_ptr(0, A5); // store a in b
+  __ store_ptr(1, FSR); // store b in a
+
+  // stack: ..., b, a
+}
+
+void TemplateTable::iop2(Operation op) {
+  transition(itos, itos);
+
+  __ pop_i(SSR);
+  switch (op) {
+    case add  : __ add_w(FSR, SSR, FSR); break;
+    case sub  : __ sub_w(FSR, SSR, FSR); break;
+    case mul  : __ mul_w(FSR, SSR, FSR);    break;
+    case _and : __ andr(FSR, SSR, FSR);   break;
+    case _or  : __ orr(FSR, SSR, FSR);    break;
+    case _xor : __ xorr(FSR, SSR, FSR);   break;
+    case shl  : __ sll_w(FSR, SSR, FSR);   break;
+    case shr  : __ sra_w(FSR, SSR, FSR);   break;
+    case ushr : __ srl_w(FSR, SSR, FSR);   break;
+    default   : ShouldNotReachHere();
+  }
+}
+
+// the result stored in FSR, SSR,
+// used registers : T2, T3
+void TemplateTable::lop2(Operation op) {
+  transition(ltos, ltos);
+  __ pop_l(T2);
+
+  switch (op) {
+    case add : __ add_d(FSR, T2, FSR); break;
+    case sub : __ sub_d(FSR, T2, FSR); break;
+    case _and: __ andr(FSR, T2, FSR);  break;
+    case _or : __ orr(FSR, T2, FSR);   break;
+    case _xor: __ xorr(FSR, T2, FSR);  break;
+    default : ShouldNotReachHere();
+  }
+}
+
+// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
+// the result is 0x80000000
+// the godson2 cpu do the same, so we need not handle this specially like x86
+void TemplateTable::idiv() {
+  transition(itos, itos);
+  Label not_zero;
+
+  __ bne(FSR, R0, not_zero);
+  __ jmp(Interpreter::_throw_ArithmeticException_entry);
+  __ bind(not_zero);
+
+  __ pop_i(SSR);
+  __ div_w(FSR, SSR, FSR);
+}
+
+void TemplateTable::irem() {
+  transition(itos, itos);
+  Label not_zero;
+  __ pop_i(SSR);
+
+  __ bne(FSR, R0, not_zero);
+  //__ brk(7);
+  __ jmp(Interpreter::_throw_ArithmeticException_entry);
+
+  __ bind(not_zero);
+  __ mod_w(FSR, SSR, FSR);
+}
+
+void TemplateTable::lmul() {
+  transition(ltos, ltos);
+  __ pop_l(T2);
+  __ mul_d(FSR, T2, FSR);
+}
+
+// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
+void TemplateTable::ldiv() {
+  transition(ltos, ltos);
+  Label normal;
+
+  __ bne(FSR, R0, normal);
+
+  //__ brk(7);    //generate FPE
+  __ jmp(Interpreter::_throw_ArithmeticException_entry);
+
+  __ bind(normal);
+  __ pop_l(A2);
+  __ div_d(FSR, A2, FSR);
+}
+
+// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
+void TemplateTable::lrem() {
+  transition(ltos, ltos);
+  Label normal;
+
+  __ bne(FSR, R0, normal);
+
+  __ jmp(Interpreter::_throw_ArithmeticException_entry);
+
+  __ bind(normal);
+  __ pop_l (A2);
+
+  __ mod_d(FSR, A2, FSR);
+}
+
+// result in FSR
+// used registers : T0
+void TemplateTable::lshl() {
+  transition(itos, ltos);
+  __ pop_l(T0);
+  __ sll_d(FSR, T0, FSR);
+}
+
+// used registers : T0
+void TemplateTable::lshr() {
+  transition(itos, ltos);
+  __ pop_l(T0);
+  __ sra_d(FSR, T0, FSR);
+}
+
+// used registers : T0
+void TemplateTable::lushr() {
+  transition(itos, ltos);
+  __ pop_l(T0);
+  __ srl_d(FSR, T0, FSR);
+}
+
+// result in FSF
+void TemplateTable::fop2(Operation op) {
+  transition(ftos, ftos);
+  switch (op) {
+    case add:
+      __ fld_s(fscratch, at_sp());
+      __ fadd_s(FSF, fscratch, FSF);
+      break;
+    case sub:
+      __ fld_s(fscratch, at_sp());
+      __ fsub_s(FSF, fscratch, FSF);
+      break;
+    case mul:
+      __ fld_s(fscratch, at_sp());
+      __ fmul_s(FSF, fscratch, FSF);
+      break;
+    case div:
+      __ fld_s(fscratch, at_sp());
+      __ fdiv_s(FSF, fscratch, FSF);
+      break;
+    case rem:
+      __ fmov_s(FA1, FSF);
+      __ fld_s(FA0, at_sp());
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
+      break;
+    default : ShouldNotReachHere();
+  }
+
+  __ addi_d(SP, SP, 1 * wordSize);
+}
+
+// result in SSF||FSF
+// i dont handle the strict flags
+void TemplateTable::dop2(Operation op) {
+  transition(dtos, dtos);
+  switch (op) {
+    case add:
+      __ fld_d(fscratch, at_sp());
+      __ fadd_d(FSF, fscratch, FSF);
+      break;
+    case sub:
+      __ fld_d(fscratch, at_sp());
+      __ fsub_d(FSF, fscratch, FSF);
+      break;
+    case mul:
+      __ fld_d(fscratch, at_sp());
+      __ fmul_d(FSF, fscratch, FSF);
+      break;
+    case div:
+      __ fld_d(fscratch, at_sp());
+      __ fdiv_d(FSF, fscratch, FSF);
+      break;
+    case rem:
+      __ fmov_d(FA1, FSF);
+      __ fld_d(FA0, at_sp());
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
+      break;
+    default : ShouldNotReachHere();
+  }
+
+  __ addi_d(SP, SP, 2 * wordSize);
+}
+
+void TemplateTable::ineg() {
+  transition(itos, itos);
+  __ sub_w(FSR, R0, FSR);
+}
+
+void TemplateTable::lneg() {
+  transition(ltos, ltos);
+  __ sub_d(FSR, R0, FSR);
+}
+
+void TemplateTable::fneg() {
+  transition(ftos, ftos);
+  __ fneg_s(FSF, FSF);
+}
+
+void TemplateTable::dneg() {
+  transition(dtos, dtos);
+  __ fneg_d(FSF, FSF);
+}
+
+// used registers : T2
+void TemplateTable::iinc() {
+  transition(vtos, vtos);
+  locals_index(T2);
+  __ ld_w(FSR, T2, 0);
+  __ ld_b(AT, at_bcp(2));           // get constant
+  __ add_d(FSR, FSR, AT);
+  __ st_w(FSR, T2, 0);
+}
+
+// used register : T2
+void TemplateTable::wide_iinc() {
+  transition(vtos, vtos);
+  locals_index_wide(T2);
+  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
+  __ hswap(FSR);
+  __ ld_w(AT, T2, 0);
+  __ add_d(FSR, AT, FSR);
+  __ st_w(FSR, T2, 0);
+}
+
+void TemplateTable::convert() {
+  // Checking
+#ifdef ASSERT
+  {
+    TosState tos_in  = ilgl;
+    TosState tos_out = ilgl;
+    switch (bytecode()) {
+      case Bytecodes::_i2l: // fall through
+      case Bytecodes::_i2f: // fall through
+      case Bytecodes::_i2d: // fall through
+      case Bytecodes::_i2b: // fall through
+      case Bytecodes::_i2c: // fall through
+      case Bytecodes::_i2s: tos_in = itos; break;
+      case Bytecodes::_l2i: // fall through
+      case Bytecodes::_l2f: // fall through
+      case Bytecodes::_l2d: tos_in = ltos; break;
+      case Bytecodes::_f2i: // fall through
+      case Bytecodes::_f2l: // fall through
+      case Bytecodes::_f2d: tos_in = ftos; break;
+      case Bytecodes::_d2i: // fall through
+      case Bytecodes::_d2l: // fall through
+      case Bytecodes::_d2f: tos_in = dtos; break;
+      default             : ShouldNotReachHere();
+    }
+    switch (bytecode()) {
+      case Bytecodes::_l2i: // fall through
+      case Bytecodes::_f2i: // fall through
+      case Bytecodes::_d2i: // fall through
+      case Bytecodes::_i2b: // fall through
+      case Bytecodes::_i2c: // fall through
+      case Bytecodes::_i2s: tos_out = itos; break;
+      case Bytecodes::_i2l: // fall through
+      case Bytecodes::_f2l: // fall through
+      case Bytecodes::_d2l: tos_out = ltos; break;
+      case Bytecodes::_i2f: // fall through
+      case Bytecodes::_l2f: // fall through
+      case Bytecodes::_d2f: tos_out = ftos; break;
+      case Bytecodes::_i2d: // fall through
+      case Bytecodes::_l2d: // fall through
+      case Bytecodes::_f2d: tos_out = dtos; break;
+      default             : ShouldNotReachHere();
+    }
+    transition(tos_in, tos_out);
+  }
+#endif // ASSERT
+  // Conversion
+  switch (bytecode()) {
+    case Bytecodes::_i2l:
+      __ slli_w(FSR, FSR, 0);
+      break;
+    case Bytecodes::_i2f:
+      __ movgr2fr_w(FSF, FSR);
+      __ ffint_s_w(FSF, FSF);
+      break;
+    case Bytecodes::_i2d:
+      __ movgr2fr_w(FSF, FSR);
+      __ ffint_d_w(FSF, FSF);
+      break;
+    case Bytecodes::_i2b:
+      __ ext_w_b(FSR, FSR);
+      break;
+    case Bytecodes::_i2c:
+      __ bstrpick_d(FSR, FSR, 15, 0);  // truncate upper 56 bits
+      break;
+    case Bytecodes::_i2s:
+      __ ext_w_h(FSR, FSR);
+      break;
+    case Bytecodes::_l2i:
+      __ slli_w(FSR, FSR, 0);
+      break;
+    case Bytecodes::_l2f:
+      __ movgr2fr_d(FSF, FSR);
+      __ ffint_s_l(FSF, FSF);
+      break;
+    case Bytecodes::_l2d:
+      __ movgr2fr_d(FSF, FSR);
+      __ ffint_d_l(FSF, FSF);
+      break;
+    case Bytecodes::_f2i:
+      __ ftintrz_w_s(fscratch, FSF);
+      __ movfr2gr_s(FSR, fscratch);
+      break;
+    case Bytecodes::_f2l:
+      __ ftintrz_l_s(fscratch, FSF);
+      __ movfr2gr_d(FSR, fscratch);
+      break;
+    case Bytecodes::_f2d:
+      __ fcvt_d_s(FSF, FSF);
+      break;
+    case Bytecodes::_d2i:
+      __ ftintrz_w_d(fscratch, FSF);
+      __ movfr2gr_s(FSR, fscratch);
+      break;
+    case Bytecodes::_d2l:
+      __ ftintrz_l_d(fscratch, FSF);
+      __ movfr2gr_d(FSR, fscratch);
+      break;
+    case Bytecodes::_d2f:
+      __ fcvt_s_d(FSF, FSF);
+      break;
+    default             :
+      ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::lcmp() {
+  transition(ltos, itos);
+
+  __ pop(T0);
+  __ pop(R0);
+
+  __ slt(AT, T0, FSR);
+  __ slt(FSR, FSR, T0);
+  __ sub_d(FSR, FSR, AT);
+}
+
+void TemplateTable::float_cmp(bool is_float, int unordered_result) {
+  if (is_float) {
+    __ fld_s(fscratch, at_sp());
+    __ addi_d(SP, SP, 1 * wordSize);
+
+    if (unordered_result < 0) {
+      __ fcmp_clt_s(FCC0, FSF, fscratch);
+      __ fcmp_cult_s(FCC1, fscratch, FSF);
+    } else {
+      __ fcmp_cult_s(FCC0, FSF, fscratch);
+      __ fcmp_clt_s(FCC1, fscratch, FSF);
+    }
+  } else {
+    __ fld_d(fscratch, at_sp());
+    __ addi_d(SP, SP, 2 * wordSize);
+
+    if (unordered_result < 0) {
+      __ fcmp_clt_d(FCC0, FSF, fscratch);
+      __ fcmp_cult_d(FCC1, fscratch, FSF);
+    } else {
+      __ fcmp_cult_d(FCC0, FSF, fscratch);
+      __ fcmp_clt_d(FCC1, fscratch, FSF);
+    }
+  }
+
+  __ movcf2gr(FSR, FCC0);
+  __ movcf2gr(AT, FCC1);
+  __ sub_d(FSR, FSR, AT);
+}
+
+
+// used registers : T3, A7, Rnext
+// FSR : return bci, this is defined by the vm specification
+// T2 : MDO taken count
+// T3 : method
+// A7 : offset
+// Rnext : next bytecode, this is required by dispatch_base
+void TemplateTable::branch(bool is_jsr, bool is_wide) {
+  __ get_method(T3);
+  __ profile_taken_branch(A7, T2);    // only C2 meaningful
+
+  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
+                             InvocationCounter::counter_offset();
+  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
+                              InvocationCounter::counter_offset();
+
+  // Load up T4 with the branch displacement
+  if (!is_wide) {
+    __ ld_b(A7, BCP, 1);
+    __ ld_bu(AT, BCP, 2);
+    __ slli_d(A7, A7, 8);
+    __ orr(A7, A7, AT);
+  } else {
+    __ get_4_byte_integer_at_bcp(A7, 1);
+    __ swap(A7);
+  }
+
+  // Handle all the JSR stuff here, then exit.
+  // It's much shorter and cleaner than intermingling with the non-JSR
+  // normal-branch stuff occuring below.
+  if (is_jsr) {
+    // Pre-load the next target bytecode into Rnext
+    __ ldx_bu(Rnext, BCP, A7);
+
+    // compute return address as bci in FSR
+    __ addi_d(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
+    __ ld_d(AT, T3, in_bytes(Method::const_offset()));
+    __ sub_d(FSR, FSR, AT);
+    // Adjust the bcp in BCP by the displacement in A7
+    __ add_d(BCP, BCP, A7);
+    // jsr returns atos that is not an oop
+    // Push return address
+    __ push_i(FSR);
+    // jsr returns vtos
+    __ dispatch_only_noverify(vtos);
+
+    return;
+  }
+
+  // Normal (non-jsr) branch handling
+
+  // Adjust the bcp in S0 by the displacement in T4
+  __ add_d(BCP, BCP, A7);
+
+  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
+  Label backedge_counter_overflow;
+  Label profile_method;
+  Label dispatch;
+  if (UseLoopCounter) {
+    // increment backedge counter for backward branches
+    // T3: method
+    // T4: target offset
+    // BCP: target bcp
+    // LVP: locals pointer
+    __ blt(R0, A7, dispatch);  // check if forward or backward branch
+
+    // check if MethodCounters exists
+    Label has_counters;
+    __ ld_d(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
+    __ bne(AT, R0, has_counters);
+    __ push2(T3, A7);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
+               T3);
+    __ pop2(T3, A7);
+    __ ld_d(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
+    __ beq(AT, R0, dispatch);
+    __ bind(has_counters);
+
+    if (TieredCompilation) {
+      Label no_mdo;
+      int increment = InvocationCounter::count_increment;
+      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
+      if (ProfileInterpreter) {
+        // Are we profiling?
+        __ ld_d(T0, Address(T3, in_bytes(Method::method_data_offset())));
+        __ beq(T0, R0, no_mdo);
+        // Increment the MDO backedge counter
+        const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
+                                           in_bytes(InvocationCounter::counter_offset()));
+        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
+                                   T1, false, Assembler::zero, &backedge_counter_overflow);
+        __ beq(R0, R0, dispatch);
+      }
+      __ bind(no_mdo);
+      // Increment backedge counter in MethodCounters*
+      __ ld_d(T0, Address(T3, Method::method_counters_offset()));
+      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
+                                 T1, false, Assembler::zero, &backedge_counter_overflow);
+      if (!UseOnStackReplacement) {
+        __ bind(backedge_counter_overflow);
+      }
+    } else {
+      // increment back edge counter
+      __ ld_d(T1, T3, in_bytes(Method::method_counters_offset()));
+      __ ld_w(T0, T1, in_bytes(be_offset));
+      __ increment(T0, InvocationCounter::count_increment);
+      __ st_w(T0, T1, in_bytes(be_offset));
+
+      // load invocation counter
+      __ ld_w(T1, T1, in_bytes(inv_offset));
+      // buffer bit added, mask no needed
+
+      // dadd backedge counter & invocation counter
+      __ add_d(T1, T1, T0);
+
+      if (ProfileInterpreter) {
+        // Test to see if we should create a method data oop
+        // T1 : backedge counter & invocation counter
+        if (Assembler::is_simm(InvocationCounter::InterpreterProfileLimit, 12)) {
+          __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
+          __ bne(AT, R0, dispatch);
+        } else {
+          __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
+          __ ld_w(AT, AT, 0);
+          __ blt(T1, AT, dispatch);
+        }
+
+        // if no method data exists, go to profile method
+        __ test_method_data_pointer(T1, profile_method);
+
+        if (UseOnStackReplacement) {
+          if (Assembler::is_simm(InvocationCounter::InterpreterBackwardBranchLimit, 12)) {
+            __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
+            __ bne(AT, R0, dispatch);
+          } else {
+            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
+            __ ld_w(AT, AT, 0);
+            __ blt(T2, AT, dispatch);
+          }
+
+          // When ProfileInterpreter is on, the backedge_count comes
+          // from the methodDataOop, which value does not get reset on
+          // the call to  frequency_counter_overflow().
+          // To avoid excessive calls to the overflow routine while
+          // the method is being compiled, dadd a second test to make
+          // sure the overflow function is called only once every
+          // overflow_frequency.
+          const int overflow_frequency = 1024;
+          __ andi(AT, T2, overflow_frequency-1);
+          __ beq(AT, R0, backedge_counter_overflow);
+        }
+      } else {
+        if (UseOnStackReplacement) {
+          // check for overflow against AT, which is the sum of the counters
+          __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
+          __ ld_w(AT, AT, 0);
+          __ bge(T1, AT, backedge_counter_overflow);
+        }
+      }
+    }
+    __ bind(dispatch);
+  }
+
+  // Pre-load the next target bytecode into Rnext
+  __ ld_bu(Rnext, BCP, 0);
+
+  // continue with the bytecode @ target
+  // FSR: return bci for jsr's, unused otherwise
+  // Rnext: target bytecode
+  // BCP: target bcp
+  __ dispatch_only(vtos);
+
+  if (UseLoopCounter) {
+    if (ProfileInterpreter) {
+      // Out-of-line code to allocate method data oop.
+      __ bind(profile_method);
+      __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+      __ ld_bu(Rnext, BCP, 0);
+      __ set_method_data_pointer_for_bcp();
+      __ b(dispatch);
+    }
+
+    if (UseOnStackReplacement) {
+      // invocation counter overflow
+      __ bind(backedge_counter_overflow);
+      __ sub_d(A7, BCP, A7);  // branch bcp
+      call_VM(NOREG, CAST_FROM_FN_PTR(address,
+      InterpreterRuntime::frequency_counter_overflow), A7);
+      __ ld_bu(Rnext, BCP, 0);
+
+      // V0: osr nmethod (osr ok) or NULL (osr not possible)
+      // V1: osr adapter frame return address
+      // Rnext: target bytecode
+      // LVP: locals pointer
+      // BCP: bcp
+      __ beq(V0, R0, dispatch);
+      // nmethod may have been invalidated (VM may block upon call_VM return)
+      __ ld_w(T3, V0, nmethod::entry_bci_offset());
+      __ li(AT, InvalidOSREntryBci);
+      __ beq(AT, T3, dispatch);
+      // We need to prepare to execute the OSR method. First we must
+      // migrate the locals and monitors off of the stack.
+      //V0: osr nmethod (osr ok) or NULL (osr not possible)
+      //V1: osr adapter frame return address
+      //Rnext: target bytecode
+      //LVP: locals pointer
+      //BCP: bcp
+      __ move(BCP, V0);
+      const Register thread = TREG;
+#ifndef OPT_THREAD
+      __ get_thread(thread);
+#endif
+      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
+
+      // V0 is OSR buffer, move it to expected parameter location
+      // refer to osrBufferPointer in c1_LIRAssembler_loongarch.cpp
+      __ move(T0, V0);
+
+      // pop the interpreter frame
+      __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
+      // remove frame anchor
+      __ leave();
+      __ move(LVP, RA);
+      __ move(SP, A7);
+
+      __ li(AT, -(StackAlignmentInBytes));
+      __ andr(SP , SP , AT);
+
+      // push the (possibly adjusted) return address
+      // refer to osr_entry in c1_LIRAssembler_loongarch.cpp
+      __ ld_d(AT, BCP, nmethod::osr_entry_point_offset());
+      __ jr(AT);
+    }
+  }
+}
+
+
+void TemplateTable::if_0cmp(Condition cc) {
+  transition(itos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  switch(cc) {
+    case not_equal:
+      __ beq(FSR, R0, not_taken);
+      break;
+    case equal:
+      __ bne(FSR, R0, not_taken);
+      break;
+    case less:
+      __ bge(FSR, R0, not_taken);
+      break;
+    case less_equal:
+      __ blt(R0, FSR, not_taken);
+      break;
+    case greater:
+      __ bge(R0, FSR, not_taken);
+      break;
+    case greater_equal:
+      __ blt(FSR, R0, not_taken);
+      break;
+  }
+
+  branch(false, false);
+
+  __ bind(not_taken);
+  __ profile_not_taken_branch(FSR);
+}
+
+void TemplateTable::if_icmp(Condition cc) {
+  transition(itos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+
+  __ pop_i(SSR);
+  switch(cc) {
+    case not_equal:
+      __ beq(SSR, FSR, not_taken);
+      break;
+    case equal:
+      __ bne(SSR, FSR, not_taken);
+      break;
+    case less:
+      __ bge(SSR, FSR, not_taken);
+      break;
+    case less_equal:
+      __ blt(FSR, SSR, not_taken);
+      break;
+    case greater:
+      __ bge(FSR, SSR, not_taken);
+      break;
+    case greater_equal:
+      __ blt(SSR, FSR, not_taken);
+      break;
+  }
+
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(FSR);
+}
+
+void TemplateTable::if_nullcmp(Condition cc) {
+  transition(atos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  switch(cc) {
+    case not_equal:
+      __ beq(FSR, R0, not_taken);
+      break;
+    case equal:
+      __ bne(FSR, R0, not_taken);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(FSR);
+}
+
+
+void TemplateTable::if_acmp(Condition cc) {
+  transition(atos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  //  __ ld_w(SSR, SP, 0);
+  __ pop_ptr(SSR);
+  switch(cc) {
+    case not_equal:
+      __ beq(SSR, FSR, not_taken);
+      break;
+    case equal:
+      __ bne(SSR, FSR, not_taken);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  branch(false, false);
+
+  __ bind(not_taken);
+  __ profile_not_taken_branch(FSR);
+}
+
+// used registers : T1, T2, T3
+// T1 : method
+// T2 : returb bci
+void TemplateTable::ret() {
+  transition(vtos, vtos);
+
+  locals_index(T2);
+  __ ld_d(T2, T2, 0);
+  __ profile_ret(T2, T3);
+
+  __ get_method(T1);
+  __ ld_d(BCP, T1, in_bytes(Method::const_offset()));
+  __ add_d(BCP, BCP, T2);
+  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
+
+  __ dispatch_next(vtos);
+}
+
+// used registers : T1, T2, T3
+// T1 : method
+// T2 : returb bci
+void TemplateTable::wide_ret() {
+  transition(vtos, vtos);
+
+  locals_index_wide(T2);
+  __ ld_d(T2, T2, 0);                   // get return bci, compute return bcp
+  __ profile_ret(T2, T3);
+
+  __ get_method(T1);
+  __ ld_d(BCP, T1, in_bytes(Method::const_offset()));
+  __ add_d(BCP, BCP, T2);
+  __ addi_d(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
+
+  __ dispatch_next(vtos);
+}
+
+// used register T2, T3, A7, Rnext
+// T2 : bytecode pointer
+// T3 : low
+// A7 : high
+// Rnext : dest bytecode, required by dispatch_base
+void TemplateTable::tableswitch() {
+  Label default_case, continue_execution;
+  transition(itos, vtos);
+
+  // align BCP
+  __ addi_d(T2, BCP, BytesPerInt);
+  __ li(AT, -BytesPerInt);
+  __ andr(T2, T2, AT);
+
+  // load lo & hi
+  __ ld_w(T3, T2, 1 * BytesPerInt);
+  __ swap(T3);
+  __ ld_w(A7, T2, 2 * BytesPerInt);
+  __ swap(A7);
+
+  // check against lo & hi
+  __ blt(FSR, T3, default_case);
+  __ blt(A7, FSR, default_case);
+
+  // lookup dispatch offset, in A7 big endian
+  __ sub_d(FSR, FSR, T3);
+  __ alsl_d(AT, FSR, T2, Address::times_4 - 1);
+  __ ld_w(A7, AT, 3 * BytesPerInt);
+  __ profile_switch_case(FSR, T4, T3);
+
+  __ bind(continue_execution);
+  __ swap(A7);
+  __ add_d(BCP, BCP, A7);
+  __ ld_bu(Rnext, BCP, 0);
+  __ dispatch_only(vtos);
+
+  // handle default
+  __ bind(default_case);
+  __ profile_switch_default(FSR);
+  __ ld_w(A7, T2, 0);
+  __ b(continue_execution);
+}
+
+void TemplateTable::lookupswitch() {
+  transition(itos, itos);
+  __ stop("lookupswitch bytecode should have been rewritten");
+}
+
+// used registers : T2, T3, A7, Rnext
+// T2 : bytecode pointer
+// T3 : pair index
+// A7 : offset
+// Rnext : dest bytecode
+// the data after the opcode is the same as lookupswitch
+// see Rewriter::rewrite_method for more information
+void TemplateTable::fast_linearswitch() {
+  transition(itos, vtos);
+  Label loop_entry, loop, found, continue_execution;
+
+  // swap FSR so we can avoid swapping the table entries
+  __ swap(FSR);
+
+  // align BCP
+  __ addi_d(T2, BCP, BytesPerInt);
+  __ li(AT, -BytesPerInt);
+  __ andr(T2, T2, AT);
+
+  // set counter
+  __ ld_w(T3, T2, BytesPerInt);
+  __ swap(T3);
+  __ b(loop_entry);
+
+  // table search
+  __ bind(loop);
+  // get the entry value
+  __ alsl_d(AT, T3, T2, Address::times_8 - 1);
+  __ ld_w(AT, AT, 2 * BytesPerInt);
+
+  // found?
+  __ beq(FSR, AT, found);
+
+  __ bind(loop_entry);
+  Label L1;
+  __ bge(R0, T3, L1);
+  __ addi_d(T3, T3, -1);
+  __ b(loop);
+  __ bind(L1);
+  __ addi_d(T3, T3, -1);
+
+  // default case
+  __ profile_switch_default(FSR);
+  __ ld_w(A7, T2, 0);
+  __ b(continue_execution);
+
+  // entry found -> get offset
+  __ bind(found);
+  __ alsl_d(AT, T3, T2, Address::times_8 - 1);
+  __ ld_w(A7, AT, 3 * BytesPerInt);
+  __ profile_switch_case(T3, FSR, T2);
+
+  // continue execution
+  __ bind(continue_execution);
+  __ swap(A7);
+  __ add_d(BCP, BCP, A7);
+  __ ld_bu(Rnext, BCP, 0);
+  __ dispatch_only(vtos);
+}
+
+// used registers : T0, T1, T2, T3, A7, Rnext
+// T2 : pairs address(array)
+// Rnext : dest bytecode
+// the data after the opcode is the same as lookupswitch
+// see Rewriter::rewrite_method for more information
+void TemplateTable::fast_binaryswitch() {
+  transition(itos, vtos);
+  // Implementation using the following core algorithm:
+  //
+  // int binary_search(int key, LookupswitchPair* array, int n) {
+  //   // Binary search according to "Methodik des Programmierens" by
+  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
+  //   int i = 0;
+  //   int j = n;
+  //   while (i+1 < j) {
+  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
+  //     // with      Q: for all i: 0 <= i < n: key < a[i]
+  //     // where a stands for the array and assuming that the (inexisting)
+  //     // element a[n] is infinitely big.
+  //     int h = (i + j) >> 1;
+  //     // i < h < j
+  //     if (key < array[h].fast_match()) {
+  //       j = h;
+  //     } else {
+  //       i = h;
+  //     }
+  //   }
+  //   // R: a[i] <= key < a[i+1] or Q
+  //   // (i.e., if key is within array, i is the correct index)
+  //   return i;
+  // }
+
+  // register allocation
+  const Register array = T2;
+  const Register i = T3, j = A7;
+  const Register h = T1;
+  const Register temp = T0;
+  const Register key = FSR;
+
+  // setup array
+  __ addi_d(array, BCP, 3*BytesPerInt);
+  __ li(AT, -BytesPerInt);
+  __ andr(array, array, AT);
+
+  // initialize i & j
+  __ move(i, R0);
+  __ ld_w(j, array, - 1 * BytesPerInt);
+  // Convert j into native byteordering
+  __ swap(j);
+
+  // and start
+  Label entry;
+  __ b(entry);
+
+  // binary search loop
+  {
+    Label loop;
+    __ bind(loop);
+    // int h = (i + j) >> 1;
+    __ add_d(h, i, j);
+    __ srli_d(h, h, 1);
+    // if (key < array[h].fast_match()) {
+    //   j = h;
+    // } else {
+    //   i = h;
+    // }
+    // Convert array[h].match to native byte-ordering before compare
+    __ alsl_d(AT, h, array, Address::times_8 - 1);
+    __ ld_w(temp, AT, 0 * BytesPerInt);
+    __ swap(temp);
+
+    __ slt(AT, key, temp);
+    __ maskeqz(i, i, AT);
+    __ masknez(temp, h, AT);
+    __ OR(i, i, temp);
+    __ masknez(j, j, AT);
+    __ maskeqz(temp, h, AT);
+    __ OR(j, j, temp);
+
+    // while (i+1 < j)
+    __ bind(entry);
+    __ addi_d(h, i, 1);
+    __ blt(h, j, loop);
+  }
+
+  // end of binary search, result index is i (must check again!)
+  Label default_case;
+  // Convert array[i].match to native byte-ordering before compare
+  __ alsl_d(AT, i, array, Address::times_8 - 1);
+  __ ld_w(temp, AT, 0 * BytesPerInt);
+  __ swap(temp);
+  __ bne(key, temp, default_case);
+
+  // entry found -> j = offset
+  __ alsl_d(AT, i, array, Address::times_8 - 1);
+  __ ld_w(j, AT, 1 * BytesPerInt);
+  __ profile_switch_case(i, key, array);
+  __ swap(j);
+
+  __ add_d(BCP, BCP, j);
+  __ ld_bu(Rnext, BCP, 0);
+  __ dispatch_only(vtos);
+
+  // default case -> j = default offset
+  __ bind(default_case);
+  __ profile_switch_default(i);
+  __ ld_w(j, array, - 2 * BytesPerInt);
+  __ swap(j);
+  __ add_d(BCP, BCP, j);
+  __ ld_bu(Rnext, BCP, 0);
+  __ dispatch_only(vtos);
+}
+
+void TemplateTable::_return(TosState state) {
+  transition(state, state);
+  assert(_desc->calls_vm(),
+      "inconsistent calls_vm information"); // call in remove_activation
+
+  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
+    assert(state == vtos, "only valid state");
+    __ ld_d(T1, aaddress(0));
+    __ load_klass(LVP, T1);
+    __ ld_w(LVP, LVP, in_bytes(Klass::access_flags_offset()));
+    __ li(AT, JVM_ACC_HAS_FINALIZER);
+    __ andr(AT, AT, LVP);
+    Label skip_register_finalizer;
+    __ beq(AT, R0, skip_register_finalizer);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+    InterpreterRuntime::register_finalizer), T1);
+    __ bind(skip_register_finalizer);
+  }
+
+  // Narrow result if state is itos but result type is smaller.
+  // Need to narrow in the return bytecode rather than in generate_return_entry
+  // since compiled code callers expect the result to already be narrowed.
+  if (state == itos) {
+    __ narrow(FSR);
+  }
+
+  __ remove_activation(state, T4);
+  __ membar(__ StoreStore);
+
+  __ jr(T4);
+}
+
+// we dont shift left 2 bits in get_cache_and_index_at_bcp
+// for we always need shift the index we use it. the ConstantPoolCacheEntry
+// is 16-byte long, index is the index in
+// ConstantPoolCache, so cache + base_offset() + index * 16 is
+// the corresponding ConstantPoolCacheEntry
+// used registers : T2
+// NOTE : the returned index need also shift left 4 to get the address!
+void TemplateTable::resolve_cache_and_index(int byte_no,
+                                            Register Rcache,
+                                            Register index,
+                                            size_t index_size) {
+  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+  const Register temp = A1;
+  assert_different_registers(Rcache, index);
+
+  Label resolved;
+  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
+  // is resolved?
+  int i = (int)bytecode();
+  __ addi_d(temp, temp, -i);
+  __ beq(temp, R0, resolved);
+  // resolve first time through
+  address entry;
+  switch (bytecode()) {
+    case Bytecodes::_getstatic      : // fall through
+    case Bytecodes::_putstatic      : // fall through
+    case Bytecodes::_getfield       : // fall through
+    case Bytecodes::_putfield       :
+      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
+      break;
+    case Bytecodes::_invokevirtual  : // fall through
+    case Bytecodes::_invokespecial  : // fall through
+    case Bytecodes::_invokestatic   : // fall through
+    case Bytecodes::_invokeinterface:
+      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
+      break;
+    case Bytecodes::_invokehandle:
+      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
+      break;
+    case Bytecodes::_invokedynamic:
+      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
+      break;
+    default                          :
+      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
+      break;
+  }
+
+  __ li(temp, i);
+  __ call_VM(NOREG, entry, temp);
+
+  // Update registers with resolved info
+  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
+  __ bind(resolved);
+}
+
+// The Rcache and index registers must be set before call
+void TemplateTable::load_field_cp_cache_entry(Register obj,
+                                              Register cache,
+                                              Register index,
+                                              Register off,
+                                              Register flags,
+                                              bool is_static = false) {
+  assert_different_registers(cache, index, flags, off);
+
+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+  // Field offset
+  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
+  __ ld_d(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
+  // Flags
+  __ ld_d(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
+
+  // klass overwrite register
+  if (is_static) {
+    __ ld_d(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+    __ ld_d(obj, Address(obj, mirror_offset));
+
+    __ verify_oop(obj);
+  }
+}
+
+// get the method, itable_index and flags of the current invoke
+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+                                               Register method,
+                                               Register itable_index,
+                                               Register flags,
+                                               bool is_invokevirtual,
+                                               bool is_invokevfinal, /*unused*/
+                                               bool is_invokedynamic) {
+  // setup registers
+  const Register cache = T3;
+  const Register index = T1;
+  assert_different_registers(method, flags);
+  assert_different_registers(method, cache, index);
+  assert_different_registers(itable_index, flags);
+  assert_different_registers(itable_index, cache, index);
+  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
+  // determine constant pool cache field offsets
+  const int method_offset = in_bytes(
+    ConstantPoolCache::base_offset() +
+      ((byte_no == f2_byte)
+       ? ConstantPoolCacheEntry::f2_offset()
+       : ConstantPoolCacheEntry::f1_offset()));
+  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
+                                    ConstantPoolCacheEntry::flags_offset());
+  // access constant pool cache fields
+  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
+                                    ConstantPoolCacheEntry::f2_offset());
+
+  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
+  resolve_cache_and_index(byte_no, cache, index, index_size);
+
+  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
+  __ ld_d(method, AT, method_offset);
+
+  if (itable_index != NOREG) {
+    __ ld_d(itable_index, AT, index_offset);
+  }
+  __ ld_d(flags, AT, flags_offset);
+}
+
+// The registers cache and index expected to be set before call.
+// Correct values of the cache and index registers are preserved.
+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
+                                            bool is_static, bool has_tos) {
+  // do the JVMTI work here to avoid disturbing the register state below
+  // We use c_rarg registers here because we want to use the register used in
+  // the call to the VM
+  if (JvmtiExport::can_post_field_access()) {
+    // Check to see if a field access watch has been set before we
+    // take the time to call into the VM.
+    Label L1;
+    // kill FSR
+    Register tmp1 = T2;
+    Register tmp2 = T1;
+    Register tmp3 = T3;
+    assert_different_registers(cache, index, AT);
+    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
+    __ ld_w(AT, AT, 0);
+    __ beq(AT, R0, L1);
+
+    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
+
+    // cache entry pointer
+    __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
+    __ shl(tmp3, LogBytesPerWord);
+    __ add_d(tmp2, tmp2, tmp3);
+    if (is_static) {
+      __ move(tmp1, R0);
+    } else {
+      __ ld_d(tmp1, SP, 0);
+      __ verify_oop(tmp1);
+    }
+    // tmp1: object pointer or NULL
+    // tmp2: cache entry pointer
+    // tmp3: jvalue object on the stack
+    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+                                       InterpreterRuntime::post_field_access),
+               tmp1, tmp2, tmp3);
+    __ get_cache_and_index_at_bcp(cache, index, 1);
+    __ bind(L1);
+  }
+}
+
+void TemplateTable::pop_and_check_object(Register r) {
+  __ pop_ptr(r);
+  __ null_check(r);  // for field access must check obj.
+  __ verify_oop(r);
+}
+
+// used registers : T1, T2, T3, T1
+// T1 : flags
+// T2 : off
+// T3 : obj
+// T1 : field address
+// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
+// following mapping to the TosState states:
+// btos: 0
+// ctos: 1
+// stos: 2
+// itos: 3
+// ltos: 4
+// ftos: 5
+// dtos: 6
+// atos: 7
+// vtos: 8
+// see ConstantPoolCacheEntry::set_field for more info
+void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
+  transition(vtos, vtos);
+
+  const Register cache = T3;
+  const Register index = T0;
+
+  const Register obj   = T3;
+  const Register off   = T2;
+  const Register flags = T1;
+
+  const Register scratch = T8;
+
+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+  jvmti_post_field_access(cache, index, is_static, false);
+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+  {
+    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
+    __ andr(scratch, scratch, flags);
+
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ membar(MacroAssembler::AnyAny);
+    __ bind(notVolatile);
+  }
+
+  if (!is_static) pop_and_check_object(obj);
+  __ add_d(index, obj, off);
+
+
+  Label Done, notByte, notBool, notInt, notShort, notChar,
+              notLong, notFloat, notObj, notDouble;
+
+  assert(btos == 0, "change code, btos != 0");
+  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
+  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
+  __ bne(flags, R0, notByte);
+
+  // btos
+  __ ld_b(FSR, index, 0);
+  __ push(btos);
+
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
+  }
+  __ b(Done);
+
+  __ bind(notByte);
+  __ li(AT, ztos);
+  __ bne(flags, AT, notBool);
+
+  // ztos
+  __ ld_b(FSR, index, 0);
+  __ push(ztos);
+
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
+  }
+  __ b(Done);
+
+  __ bind(notBool);
+  __ li(AT, itos);
+  __ bne(flags, AT, notInt);
+
+  // itos
+  __ ld_w(FSR, index, 0);
+  __ push(itos);
+
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
+  }
+  __ b(Done);
+
+  __ bind(notInt);
+  __ li(AT, atos);
+  __ bne(flags, AT, notObj);
+
+  // atos
+  //add for compressedoops
+  __ load_heap_oop(FSR, Address(index, 0));
+  __ push(atos);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
+  }
+  __ b(Done);
+
+  __ bind(notObj);
+  __ li(AT, ctos);
+  __ bne(flags, AT, notChar);
+
+  // ctos
+  __ ld_hu(FSR, index, 0);
+  __ push(ctos);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
+  }
+  __ b(Done);
+
+  __ bind(notChar);
+  __ li(AT, stos);
+  __ bne(flags, AT, notShort);
+
+  // stos
+  __ ld_h(FSR, index, 0);
+  __ push(stos);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
+  }
+  __ b(Done);
+
+  __ bind(notShort);
+  __ li(AT, ltos);
+  __ bne(flags, AT, notLong);
+
+  // ltos
+  __ ld_d(FSR, index, 0 * wordSize);
+  __ push(ltos);
+
+  // Don't rewrite to _fast_lgetfield for potential volatile case.
+  __ b(Done);
+
+  __ bind(notLong);
+  __ li(AT, ftos);
+  __ bne(flags, AT, notFloat);
+
+  // ftos
+  __ fld_s(FSF, index, 0);
+  __ push(ftos);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
+  }
+  __ b(Done);
+
+  __ bind(notFloat);
+  __ li(AT, dtos);
+#ifdef ASSERT
+  __ bne(flags, AT, notDouble);
+#endif
+
+  // dtos
+  __ fld_d(FSF, index, 0 * wordSize);
+  __ push(dtos);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
+  }
+
+#ifdef ASSERT
+  __ b(Done);
+  __ bind(notDouble);
+  __ stop("Bad state");
+#endif
+
+  __ bind(Done);
+
+  {
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
+    __ bind(notVolatile);
+  }
+}
+
+
+void TemplateTable::getfield(int byte_no) {
+  getfield_or_static(byte_no, false);
+}
+
+void TemplateTable::getstatic(int byte_no) {
+  getfield_or_static(byte_no, true);
+}
+
+// The registers cache and index expected to be set before call.
+// The function may destroy various registers, just not the cache and index registers.
+void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
+  transition(vtos, vtos);
+
+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+
+  if (JvmtiExport::can_post_field_modification()) {
+    // Check to see if a field modification watch has been set before
+    // we take the time to call into the VM.
+    Label L1;
+    //kill AT, T1, T2, T3, T4
+    Register tmp1 = T2;
+    Register tmp2 = T1;
+    Register tmp3 = T3;
+    Register tmp4 = T4;
+    assert_different_registers(cache, index, tmp4);
+
+    __ li(AT, JvmtiExport::get_field_modification_count_addr());
+    __ ld_w(AT, AT, 0);
+    __ beq(AT, R0, L1);
+
+    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
+
+    if (is_static) {
+      __ move(tmp1, R0);
+    } else {
+      // Life is harder. The stack holds the value on top, followed by
+      // the object.  We don't know the size of the value, though; it
+      // could be one or two words depending on its type. As a result,
+      // we must find the type to determine where the object is.
+      Label two_word, valsize_known;
+      __ alsl_d(AT, tmp4, tmp2, Address::times_8 - 1);
+      __ ld_d(tmp3, AT, in_bytes(cp_base_offset +
+                                 ConstantPoolCacheEntry::flags_offset()));
+      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
+
+      ConstantPoolCacheEntry::verify_tos_state_shift();
+      __ move(tmp1, SP);
+      __ li(AT, ltos);
+      __ beq(tmp3, AT, two_word);
+      __ li(AT, dtos);
+      __ beq(tmp3, AT, two_word);
+      __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
+      __ b(valsize_known);
+
+      __ bind(two_word);
+      __ addi_d(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
+
+      __ bind(valsize_known);
+      // setup object pointer
+      __ ld_d(tmp1, tmp1, 0 * wordSize);
+    }
+    // cache entry pointer
+    __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset));
+    __ shl(tmp4, LogBytesPerWord);
+    __ add_d(tmp2, tmp2, tmp4);
+    // object (tos)
+    __ move(tmp3, SP);
+    // tmp1: object pointer set up above (NULL if static)
+    // tmp2: cache entry pointer
+    // tmp3: jvalue object on the stack
+    __ call_VM(NOREG,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::post_field_modification),
+               tmp1, tmp2, tmp3);
+    __ get_cache_and_index_at_bcp(cache, index, 1);
+    __ bind(L1);
+  }
+}
+
+// used registers : T0, T1, T2, T3, T8
+// T1 : flags
+// T2 : off
+// T3 : obj
+// T8 : volatile bit
+// see ConstantPoolCacheEntry::set_field for more info
+void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
+  transition(vtos, vtos);
+
+  const Register cache = T3;
+  const Register index = T0;
+  const Register obj   = T3;
+  const Register off   = T2;
+  const Register flags = T1;
+  const Register bc    = T3;
+
+  const Register scratch = T8;
+
+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+  jvmti_post_field_mod(cache, index, is_static);
+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+  Label Done;
+  {
+    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
+    __ andr(scratch, scratch, flags);
+
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
+    __ bind(notVolatile);
+  }
+
+  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
+
+  assert(btos == 0, "change code, btos != 0");
+
+  // btos
+  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
+  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
+  __ bne(flags, R0, notByte);
+
+  __ pop(btos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ add_d(AT, obj, off);
+  __ st_b(FSR, AT, 0);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+
+  // ztos
+  __ bind(notByte);
+  __ li(AT, ztos);
+  __ bne(flags, AT, notBool);
+
+  __ pop(ztos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ add_d(AT, obj, off);
+  __ andi(FSR, FSR, 0x1);
+  __ st_b(FSR, AT, 0);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+
+  // itos
+  __ bind(notBool);
+  __ li(AT, itos);
+  __ bne(flags, AT, notInt);
+
+  __ pop(itos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ add_d(AT, obj, off);
+  __ st_w(FSR, AT, 0);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+
+  // atos
+  __ bind(notInt);
+  __ li(AT, atos);
+  __ bne(flags, AT, notObj);
+
+  __ pop(atos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+
+  do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR, _bs->kind(), false);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+
+  // ctos
+  __ bind(notObj);
+  __ li(AT, ctos);
+  __ bne(flags, AT, notChar);
+
+  __ pop(ctos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ add_d(AT, obj, off);
+  __ st_h(FSR, AT, 0);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+
+  // stos
+  __ bind(notChar);
+  __ li(AT, stos);
+  __ bne(flags, AT, notShort);
+
+  __ pop(stos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ add_d(AT, obj, off);
+  __ st_h(FSR, AT, 0);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+
+  // ltos
+  __ bind(notShort);
+  __ li(AT, ltos);
+  __ bne(flags, AT, notLong);
+
+  __ pop(ltos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ add_d(AT, obj, off);
+  __ st_d(FSR, AT, 0);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+
+  // ftos
+  __ bind(notLong);
+  __ li(AT, ftos);
+  __ bne(flags, AT, notFloat);
+
+  __ pop(ftos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ add_d(AT, obj, off);
+  __ fst_s(FSF, AT, 0);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+
+
+  // dtos
+  __ bind(notFloat);
+  __ li(AT, dtos);
+#ifdef ASSERT
+  __ bne(flags, AT, notDouble);
+#endif
+
+  __ pop(dtos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ add_d(AT, obj, off);
+  __ fst_d(FSF, AT, 0);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
+  }
+
+#ifdef ASSERT
+  __ b(Done);
+
+  __ bind(notDouble);
+  __ stop("Bad state");
+#endif
+
+  __ bind(Done);
+
+  {
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
+    __ bind(notVolatile);
+  }
+}
+
+void TemplateTable::putfield(int byte_no) {
+  putfield_or_static(byte_no, false);
+}
+
+void TemplateTable::putstatic(int byte_no) {
+  putfield_or_static(byte_no, true);
+}
+
+// used registers : T1, T2, T3
+// T1 : cp_entry
+// T2 : obj
+// T3 : value pointer
+void TemplateTable::jvmti_post_fast_field_mod() {
+  if (JvmtiExport::can_post_field_modification()) {
+    // Check to see if a field modification watch has been set before
+    // we take the time to call into the VM.
+    Label L2;
+    //kill AT, T1, T2, T3, T4
+    Register tmp1 = T2;
+    Register tmp2 = T1;
+    Register tmp3 = T3;
+    Register tmp4 = T4;
+    __ li(AT, JvmtiExport::get_field_modification_count_addr());
+    __ ld_w(tmp3, AT, 0);
+    __ beq(tmp3, R0, L2);
+    __ pop_ptr(tmp1);
+    __ verify_oop(tmp1);
+    __ push_ptr(tmp1);
+    switch (bytecode()) {          // load values into the jvalue object
+    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
+    case Bytecodes::_fast_bputfield: // fall through
+    case Bytecodes::_fast_zputfield: // fall through
+    case Bytecodes::_fast_sputfield: // fall through
+    case Bytecodes::_fast_cputfield: // fall through
+    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
+    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
+    case Bytecodes::_fast_fputfield: __ push_f(); break;
+    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
+      default:  ShouldNotReachHere();
+    }
+    __ move(tmp3, SP);
+    // access constant pool cache entry
+    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
+    __ verify_oop(tmp1);
+    // tmp1: object pointer copied above
+    // tmp2: cache entry pointer
+    // tmp3: jvalue object on the stack
+    __ call_VM(NOREG,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::post_field_modification),
+               tmp1, tmp2, tmp3);
+
+    switch (bytecode()) {             // restore tos values
+    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
+    case Bytecodes::_fast_bputfield: // fall through
+    case Bytecodes::_fast_zputfield: // fall through
+    case Bytecodes::_fast_sputfield: // fall through
+    case Bytecodes::_fast_cputfield: // fall through
+    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
+    case Bytecodes::_fast_dputfield: __ pop_d(); break;
+    case Bytecodes::_fast_fputfield: __ pop_f(); break;
+    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
+    }
+    __ bind(L2);
+  }
+}
+
+// used registers : T2, T3, T1
+// T2 : index & off & field address
+// T3 : cache & obj
+// T1 : flags
+void TemplateTable::fast_storefield(TosState state) {
+  transition(state, vtos);
+
+  const Register scratch = T8;
+
+  ByteSize base = ConstantPoolCache::base_offset();
+
+  jvmti_post_fast_field_mod();
+
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(T3, T2, 1);
+
+  // Must prevent reordering of the following cp cache loads with bytecode load
+  __ membar(__ LoadLoad);
+
+  // test for volatile with T1
+  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
+  __ ld_d(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
+
+  // replace index with field offset from cache entry
+  __ ld_d(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
+
+  Label Done;
+  {
+    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
+    __ andr(scratch, scratch, T1);
+
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ membar(Assembler::Membar_mask_bits(__ StoreStore | __ LoadStore));
+    __ bind(notVolatile);
+  }
+
+  // Get object from stack
+  pop_and_check_object(T3);
+
+  if (bytecode() != Bytecodes::_fast_aputfield) {
+    // field address
+    __ add_d(T2, T3, T2);
+  }
+
+  // access field
+  switch (bytecode()) {
+    case Bytecodes::_fast_zputfield:
+      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
+      // fall through to bputfield
+    case Bytecodes::_fast_bputfield:
+      __ st_b(FSR, T2, 0);
+      break;
+    case Bytecodes::_fast_sputfield: // fall through
+    case Bytecodes::_fast_cputfield:
+      __ st_h(FSR, T2, 0);
+      break;
+    case Bytecodes::_fast_iputfield:
+      __ st_w(FSR, T2, 0);
+      break;
+    case Bytecodes::_fast_lputfield:
+      __ st_d(FSR, T2, 0 * wordSize);
+      break;
+    case Bytecodes::_fast_fputfield:
+      __ fst_s(FSF, T2, 0);
+      break;
+    case Bytecodes::_fast_dputfield:
+      __ fst_d(FSF, T2, 0 * wordSize);
+      break;
+    case Bytecodes::_fast_aputfield:
+      do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR, _bs->kind(), false);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  {
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ membar(Assembler::Membar_mask_bits(__ StoreLoad | __ StoreStore));
+    __ bind(notVolatile);
+  }
+}
+
+// used registers : T2, T3, T1
+// T3 : cp_entry & cache
+// T2 : index & offset
+void TemplateTable::fast_accessfield(TosState state) {
+  transition(atos, state);
+
+  const Register scratch = T8;
+
+  // do the JVMTI work here to avoid disturbing the register state below
+  if (JvmtiExport::can_post_field_access()) {
+    // Check to see if a field access watch has been set before we take
+    // the time to call into the VM.
+    Label L1;
+    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
+    __ ld_w(T3, AT, 0);
+    __ beq(T3, R0, L1);
+    // access constant pool cache entry
+    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
+    __ move(TSR, FSR);
+    __ verify_oop(FSR);
+    // FSR: object pointer copied above
+    // T3: cache entry pointer
+    __ call_VM(NOREG,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
+               FSR, T3);
+    __ move(FSR, TSR);
+    __ bind(L1);
+  }
+
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(T3, T2, 1);
+
+  // Must prevent reordering of the following cp cache loads with bytecode load
+  __ membar(__ LoadLoad);
+
+  // replace index with field offset from cache entry
+  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
+  __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+
+  {
+    __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
+    __ andr(scratch, scratch, AT);
+
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ membar(MacroAssembler::AnyAny);
+    __ bind(notVolatile);
+  }
+
+  // FSR: object
+  __ verify_oop(FSR);
+  __ null_check(FSR);
+  // field addresses
+  __ add_d(FSR, FSR, T2);
+
+  // access field
+  switch (bytecode()) {
+    case Bytecodes::_fast_bgetfield:
+      __ ld_b(FSR, FSR, 0);
+      break;
+    case Bytecodes::_fast_sgetfield:
+      __ ld_h(FSR, FSR, 0);
+      break;
+    case Bytecodes::_fast_cgetfield:
+      __ ld_hu(FSR, FSR, 0);
+      break;
+    case Bytecodes::_fast_igetfield:
+      __ ld_w(FSR, FSR, 0);
+      break;
+    case Bytecodes::_fast_lgetfield:
+      __ stop("should not be rewritten");
+      break;
+    case Bytecodes::_fast_fgetfield:
+      __ fld_s(FSF, FSR, 0);
+      break;
+    case Bytecodes::_fast_dgetfield:
+      __ fld_d(FSF, FSR, 0);
+      break;
+    case Bytecodes::_fast_agetfield:
+      __ load_heap_oop(FSR, Address(FSR, 0));
+      __ verify_oop(FSR);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  {
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
+    __ bind(notVolatile);
+  }
+}
+
+// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
+// used registers : T1, T2, T3, T1
+// T1 : obj & field address
+// T2 : off
+// T3 : cache
+// T1 : index
+void TemplateTable::fast_xaccess(TosState state) {
+  transition(vtos, state);
+
+  const Register scratch = T8;
+
+  // get receiver
+  __ ld_d(T1, aaddress(0));
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(T3, T2, 2);
+  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
+  __ ld_d(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+
+  {
+    __ ld_d(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+    __ li(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
+    __ andr(scratch, scratch, AT);
+
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ membar(MacroAssembler::AnyAny);
+    __ bind(notVolatile);
+  }
+
+  // make sure exception is reported in correct bcp range (getfield is
+  // next instruction)
+  __ addi_d(BCP, BCP, 1);
+  __ null_check(T1);
+  __ add_d(T1, T1, T2);
+
+  if (state == itos) {
+    __ ld_w(FSR, T1, 0);
+  } else if (state == atos) {
+    __ load_heap_oop(FSR, Address(T1, 0));
+    __ verify_oop(FSR);
+  } else if (state == ftos) {
+    __ fld_s(FSF, T1, 0);
+  } else {
+    ShouldNotReachHere();
+  }
+  __ addi_d(BCP, BCP, -1);
+
+  {
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
+    __ bind(notVolatile);
+  }
+}
+
+
+
+//-----------------------------------------------------------------------------
+// Calls
+
+void TemplateTable::count_calls(Register method, Register temp) {
+  // implemented elsewhere
+  ShouldNotReachHere();
+}
+
+// method, index, recv, flags: T1, T2, T3, T1
+// byte_no = 2 for _invokevirtual, 1 else
+// T0 : return address
+// get the method & index of the invoke, and push the return address of
+// the invoke(first word in the frame)
+// this address is where the return code jmp to.
+// NOTE : this method will set T3&T1 as recv&flags
+void TemplateTable::prepare_invoke(int byte_no,
+                                   Register method,  // linked method (or i-klass)
+                                   Register index,   // itable index, MethodType, etc.
+                                   Register recv,    // if caller wants to see it
+                                   Register flags    // if caller wants to test it
+                                   ) {
+
+
+  // determine flags
+  const Bytecodes::Code code = bytecode();
+  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
+  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
+  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
+  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
+  const bool load_receiver       = (recv  != noreg);
+  const bool save_flags          = (flags != noreg);
+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
+  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
+  assert(flags == noreg || flags == T1, "error flags reg.");
+  assert(recv  == noreg || recv  == T3, "error recv reg.");
+
+  // setup registers & access constant pool cache
+  if(recv == noreg) recv  = T3;
+  if(flags == noreg) flags  = T1;
+  assert_different_registers(method, index, recv, flags);
+
+  // save 'interpreter return address'
+  __ save_bcp();
+
+  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
+
+  if (is_invokedynamic || is_invokehandle) {
+   Label L_no_push;
+     __ li(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
+     __ andr(AT, AT, flags);
+     __ beq(AT, R0, L_no_push);
+     // Push the appendix as a trailing parameter.
+     // This must be done before we get the receiver,
+     // since the parameter_size includes it.
+     Register tmp = SSR;
+     __ push(tmp);
+     __ move(tmp, index);
+     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
+     __ load_resolved_reference_at_index(index, tmp);
+     __ pop(tmp);
+     __ push(index);  // push appendix (MethodType, CallSite, etc.)
+     __ bind(L_no_push);
+  }
+
+  // load receiver if needed (after appendix is pushed so parameter size is correct)
+  // Note: no return address pushed yet
+  if (load_receiver) {
+    __ li(AT, ConstantPoolCacheEntry::parameter_size_mask);
+    __ andr(recv, flags, AT);
+    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
+    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
+    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
+    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
+    __ ld_d(recv, recv_addr);
+    __ verify_oop(recv);
+  }
+  if(save_flags) {
+    __ move(BCP, flags);
+  }
+
+  // compute return type
+  __ srli_d(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
+  __ andi(flags, flags, 0xf);
+
+  // Make sure we don't need to mask flags for tos_state_shift after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
+  // load return address
+  {
+    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
+    __ li(AT, (long)table);
+    __ slli_d(flags, flags, LogBytesPerWord);
+    __ add_d(AT, AT, flags);
+    __ ld_d(RA, AT, 0);
+  }
+
+  if (save_flags) {
+    __ move(flags, BCP);
+    __ restore_bcp();
+  }
+}
+
+// used registers : T0, T3, T1, T2
+// T3 : recv, this two register using convention is by prepare_invoke
+// T1 : flags, klass
+// Rmethod : method, index must be Rmethod
+void TemplateTable::invokevirtual_helper(Register index,
+                                         Register recv,
+                                         Register flags) {
+
+  assert_different_registers(index, recv, flags, T2);
+
+  // Test for an invoke of a final method
+  Label notFinal;
+  __ li(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
+  __ andr(AT, flags, AT);
+  __ beq(AT, R0, notFinal);
+
+  Register method = index;  // method must be Rmethod
+  assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
+
+  // do the call - the index is actually the method to call
+  // the index is indeed methodOop, for this is vfinal,
+  // see ConstantPoolCacheEntry::set_method for more info
+
+  __ verify_oop(method);
+
+  // It's final, need a null check here!
+  __ null_check(recv);
+
+  // profile this call
+  __ profile_final_call(T2);
+
+  // T2: tmp, used for mdp
+  // method: callee
+  // T4: tmp
+  // is_virtual: true
+  __ profile_arguments_type(T2, method, T4, true);
+
+  __ jump_from_interpreted(method, T2);
+
+  __ bind(notFinal);
+
+  // get receiver klass
+  __ null_check(recv, oopDesc::klass_offset_in_bytes());
+  __ load_klass(T2, recv);
+  __ verify_oop(T2);
+
+  // profile this call
+  __ profile_virtual_call(T2, T0, T1);
+
+  // get target methodOop & entry point
+  const int base = InstanceKlass::vtable_start_offset() * wordSize;
+  assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+  // T2: receiver
+  __ alsl_d(AT, index, T2, Address::times_ptr - 1);
+  //this is a ualign read
+  __ ld_d(method, AT, base + vtableEntry::method_offset_in_bytes());
+  __ profile_arguments_type(T2, method, T4, true);
+  __ jump_from_interpreted(method, T2);
+}
+
+void TemplateTable::invokevirtual(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f2_byte, "use this argument");
+  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
+  // now recv & flags in T3, T1
+  invokevirtual_helper(Rmethod, T3, T1);
+}
+
+// T4 : entry
+// Rmethod : method
+void TemplateTable::invokespecial(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+  prepare_invoke(byte_no, Rmethod, NOREG, T3);
+  // now recv & flags in T3, T1
+  __ verify_oop(T3);
+  __ null_check(T3);
+  __ profile_call(T4);
+
+  // T8: tmp, used for mdp
+  // Rmethod: callee
+  // T4: tmp
+  // is_virtual: false
+  __ profile_arguments_type(T8, Rmethod, T4, false);
+
+  __ jump_from_interpreted(Rmethod, T4);
+  __ move(T0, T3);
+}
+
+void TemplateTable::invokestatic(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+  prepare_invoke(byte_no, Rmethod, NOREG);
+  __ verify_oop(Rmethod);
+
+  __ profile_call(T4);
+
+  // T8: tmp, used for mdp
+  // Rmethod: callee
+  // T4: tmp
+  // is_virtual: false
+  __ profile_arguments_type(T8, Rmethod, T4, false);
+
+  __ jump_from_interpreted(Rmethod, T4);
+}
+
+// i have no idea what to do here, now. for future change. FIXME.
+void TemplateTable::fast_invokevfinal(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f2_byte, "use this argument");
+  __ stop("fast_invokevfinal not used on LoongArch64");
+}
+
+// used registers : T0, T1, T2, T3, T1, A7
+// T0 : itable, vtable, entry
+// T1 : interface
+// T3 : receiver
+// T1 : flags, klass
+// Rmethod : index, method, this is required by interpreter_entry
+void TemplateTable::invokeinterface(int byte_no) {
+  transition(vtos, vtos);
+  //this method will use T1-T4 and T0
+  assert(byte_no == f1_byte, "use this argument");
+  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
+  // T2: reference klass
+  // Rmethod: method
+  // T3: receiver
+  // T1: flags
+
+  // Special case of invokeinterface called for virtual method of
+  // java.lang.Object.  See cpCacheOop.cpp for details.
+  // This code isn't produced by javac, but could be produced by
+  // another compliant java compiler.
+  Label notMethod;
+  __ li(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
+  __ andr(AT, T1, AT);
+  __ beq(AT, R0, notMethod);
+
+  invokevirtual_helper(Rmethod, T3, T1);
+  __ bind(notMethod);
+  // Get receiver klass into T1 - also a null check
+  //add for compressedoops
+  __ load_klass(T1, T3);
+  __ verify_oop(T1);
+
+  Label no_such_interface, no_such_method;
+
+  // Receiver subtype check against REFC.
+  // Superklass in T2. Subklass in T1.
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             T1, T2, noreg,
+                             // outputs: scan temp. reg, scan temp. reg
+                             T0, FSR,
+                             no_such_interface,
+                             /*return_method=*/false);
+
+  // profile this call
+  __ profile_virtual_call(T1, T0, FSR);
+
+  // Get declaring interface class from method, and itable index
+  __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset()));
+  __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset()));
+  __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes());
+  __ ld_w(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
+  __ addi_d(Rmethod, Rmethod, (-1) * Method::itable_index_max);
+  __ sub_w(Rmethod, R0, Rmethod);
+
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             T1, T2, Rmethod,
+                             // outputs: method, scan temp. reg
+                             Rmethod, T0,
+                             no_such_interface);
+
+  // Rmethod: Method* to call
+  // T3: receiver
+  // Check for abstract method error
+  // Note: This should be done more efficiently via a throw_abstract_method_error
+  //       interpreter entry point and a conditional jump to it in case of a null
+  //       method.
+  __ beq(Rmethod, R0, no_such_method);
+
+  __ profile_arguments_type(T1, Rmethod, T0, true);
+
+  // do the call
+  // T3: receiver
+  // Rmethod: Method*
+  __ jump_from_interpreted(Rmethod, T1);
+  __ should_not_reach_here();
+
+  // exception handling code follows...
+  // note: must restore interpreter registers to canonical
+  //       state for exception handling to work correctly!
+
+  __ bind(no_such_method);
+  // throw exception
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  __ bind(no_such_interface);
+  // throw exception
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                   InterpreterRuntime::throw_IncompatibleClassChangeError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+}
+
+
+void TemplateTable::invokehandle(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+  const Register T2_method  = Rmethod;
+  const Register FSR_mtype  = FSR;
+  const Register T3_recv    = T3;
+
+  if (!EnableInvokeDynamic) {
+     // rewriter does not generate this bytecode
+     __ should_not_reach_here();
+     return;
+   }
+
+   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
+   //??__ verify_method_ptr(T2_method);
+   __ verify_oop(T3_recv);
+   __ null_check(T3_recv);
+
+   // T4: MethodType object (from cpool->resolved_references[f1], if necessary)
+   // T2_method: MH.invokeExact_MT method (from f2)
+
+   // Note:  T4 is already pushed (if necessary) by prepare_invoke
+
+   // FIXME: profile the LambdaForm also
+   __ profile_final_call(T4);
+
+   // T8: tmp, used for mdp
+   // T2_method: callee
+   // T4: tmp
+   // is_virtual: true
+   __ profile_arguments_type(T8, T2_method, T4, true);
+
+  __ jump_from_interpreted(T2_method, T4);
+}
+
+ void TemplateTable::invokedynamic(int byte_no) {
+   transition(vtos, vtos);
+   assert(byte_no == f1_byte, "use this argument");
+
+   if (!EnableInvokeDynamic) {
+     // We should not encounter this bytecode if !EnableInvokeDynamic.
+     // The verifier will stop it.  However, if we get past the verifier,
+     // this will stop the thread in a reasonable way, without crashing the JVM.
+     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                      InterpreterRuntime::throw_IncompatibleClassChangeError));
+     // the call_VM checks for exception, so we should never return here.
+     __ should_not_reach_here();
+     return;
+   }
+
+   const Register T2_callsite = T2;
+
+   prepare_invoke(byte_no, Rmethod, T2_callsite);
+
+   // T2: CallSite object (from cpool->resolved_references[f1])
+   // Rmethod: MH.linkToCallSite method (from f2)
+
+   // Note:  T2_callsite is already pushed by prepare_invoke
+   // %%% should make a type profile for any invokedynamic that takes a ref argument
+   // profile this call
+   __ profile_call(T4);
+
+   // T8: tmp, used for mdp
+   // Rmethod: callee
+   // T4: tmp
+   // is_virtual: false
+   __ profile_arguments_type(T8, Rmethod, T4, false);
+
+   __ verify_oop(T2_callsite);
+
+   __ jump_from_interpreted(Rmethod, T4);
+ }
+
+//-----------------------------------------------------------------------------
+// Allocation
+// T1 : tags & buffer end & thread
+// T2 : object end
+// T3 : klass
+// T1 : object size
+// A1 : cpool
+// A2 : cp index
+// return object in FSR
+void TemplateTable::_new() {
+  transition(vtos, atos);
+  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
+
+  Label slow_case;
+  Label done;
+  Label initialize_header;
+  Label initialize_object; // including clearing the fields
+  Label allocate_shared;
+
+  // get InstanceKlass in T3
+  __ get_cpool_and_tags(A1, T1);
+
+  __ alsl_d(AT, A2, A1, Address::times_8 - 1);
+  __ ld_d(T3, AT, sizeof(ConstantPool));
+
+  // make sure the class we're about to instantiate has been resolved.
+  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
+  const int tags_offset = Array<u1>::base_offset_in_bytes();
+  __ add_d(T1, T1, A2);
+  __ ld_b(AT, T1, tags_offset);
+  if(os::is_MP()) {
+    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
+  }
+  __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class);
+  __ bne(AT, R0, slow_case);
+
+  // make sure klass is initialized & doesn't have finalizer
+  // make sure klass is fully initialized
+  __ ld_hu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
+  __ addi_d(AT, T1, - (int)InstanceKlass::fully_initialized);
+  __ bne(AT, R0, slow_case);
+
+  // has_finalizer
+  __ ld_w(T0, T3, in_bytes(Klass::layout_helper_offset()) );
+  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
+  __ bne(AT, R0, slow_case);
+
+  // Allocate the instance
+  // 1) Try to allocate in the TLAB
+  // 2) if fail and the object is large allocate in the shared Eden
+  // 3) if the above fails (or is not applicable), go to a slow case
+  // (creates a new TLAB, etc.)
+
+  const bool allow_shared_alloc =
+    Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
+
+#ifndef OPT_THREAD
+    const Register thread = T8;
+    if (UseTLAB || allow_shared_alloc) {
+      __ get_thread(thread);
+    }
+#else
+    const Register thread = TREG;
+#endif
+
+  if (UseTLAB) {
+    // get tlab_top
+    __ ld_d(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
+    // get tlab_end
+    __ ld_d(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
+    __ add_d(T2, FSR, T0);
+    __ blt(AT, T2, allow_shared_alloc ? allocate_shared : slow_case);
+    __ st_d(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
+
+    if (ZeroTLAB) {
+      // the fields have been already cleared
+      __ beq(R0, R0, initialize_header);
+    } else {
+      // initialize both the header and fields
+      __ beq(R0, R0, initialize_object);
+    }
+  }
+
+  // Allocation in the shared Eden , if allowed
+  // T0 : instance size in words
+  if(allow_shared_alloc){
+    __ bind(allocate_shared);
+
+    Label done, retry;
+    Address heap_top(T1);
+    __ li(T1, (long)Universe::heap()->top_addr());
+    __ ld_d(FSR, heap_top);
+
+    __ bind(retry);
+    __ li(AT, (long)Universe::heap()->end_addr());
+    __ ld_d(AT, AT, 0);
+    __ add_d(T2, FSR, T0);
+    __ blt(AT, T2, slow_case);
+
+    // Compare FSR with the top addr, and if still equal, store the new
+    // top addr in T2 at the address of the top addr pointer. Sets AT if was
+    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
+    //
+    // FSR: object begin
+    // T2: object end
+    // T0: instance size in words
+
+    // if someone beat us on the allocation, try again, otherwise continue
+    __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry);
+
+    __ bind(done);
+    __ incr_allocated_bytes(thread, T0, 0);
+  }
+
+  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
+    // The object is initialized before the header.  If the object size is
+    // zero, go directly to the header initialization.
+    __ bind(initialize_object);
+    __ li(AT, - sizeof(oopDesc));
+    __ add_d(T0, T0, AT);
+    __ beq(T0, R0, initialize_header);
+
+    // initialize remaining object fields: T0 is a multiple of 2
+    {
+      Label loop;
+      __ add_d(T1, FSR, T0);
+      __ addi_d(T1, T1, -oopSize);
+
+      __ bind(loop);
+      __ st_d(R0, T1, sizeof(oopDesc) + 0 * oopSize);
+      Label L1;
+      __ beq(T1, FSR, L1); //dont clear header
+      __ addi_d(T1, T1, -oopSize);
+      __ b(loop);
+      __ bind(L1);
+      __ addi_d(T1, T1, -oopSize);
+    }
+
+    // klass in T3,
+    // initialize object header only.
+    __ bind(initialize_header);
+    if (UseBiasedLocking) {
+      __ ld_d(AT, T3, in_bytes(Klass::prototype_header_offset()));
+      __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes ());
+    } else {
+      __ li(AT, (long)markOopDesc::prototype());
+      __ st_d(AT, FSR, oopDesc::mark_offset_in_bytes());
+    }
+
+    __ store_klass_gap(FSR, R0);
+    __ store_klass(FSR, T3);
+
+    {
+      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
+      // Trigger dtrace event for fastpath
+      __ push(atos);
+      __ call_VM_leaf(
+           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
+      __ pop(atos);
+
+    }
+    __ b(done);
+  }
+
+  // slow case
+  __ bind(slow_case);
+  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
+
+  // continue
+  __ bind(done);
+  __ membar(__ StoreStore);
+}
+
+void TemplateTable::newarray() {
+  transition(itos, atos);
+  __ ld_bu(A1, at_bcp(1));
+  // type, count
+  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
+  __ membar(__ StoreStore);
+}
+
+void TemplateTable::anewarray() {
+  transition(itos, atos);
+  __ get_2_byte_integer_at_bcp(A2, AT, 1);
+  __ huswap(A2);
+  __ get_constant_pool(A1);
+  // cp, index, count
+  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
+  __ membar(__ StoreStore);
+}
+
+void TemplateTable::arraylength() {
+  transition(atos, itos);
+  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
+  __ ld_w(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
+}
+
+// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
+// T2 : sub klass
+// T3 : cpool
+// T3 : super klass
+void TemplateTable::checkcast() {
+  transition(atos, atos);
+  Label done, is_null, ok_is_subtype, quicked, resolved;
+  __ beq(FSR, R0, is_null);
+
+  // Get cpool & tags index
+  __ get_cpool_and_tags(T3, T1);
+  __ get_2_byte_integer_at_bcp(T2, AT, 1);
+  __ huswap(T2);
+
+  // See if bytecode has already been quicked
+  __ add_d(AT, T1, T2);
+  __ ld_b(AT, AT, Array<u1>::base_offset_in_bytes());
+  if(os::is_MP()) {
+    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
+  }
+  __ addi_d(AT, AT, - (int)JVM_CONSTANT_Class);
+  __ beq(AT, R0, quicked);
+
+  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
+  // Then, GC will move the object in V0 to another places in heap.
+  // Therefore, We should never save such an object in register.
+  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
+  // After GC, the object address in FSR is changed to a new place.
+  //
+  __ push(atos);
+  const Register thread = TREG;
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+  __ get_vm_result_2(T3, thread);
+  __ pop_ptr(FSR);
+  __ b(resolved);
+
+  // klass already in cp, get superklass in T3
+  __ bind(quicked);
+  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
+  __ ld_d(T3, AT, sizeof(ConstantPool));
+
+  __ bind(resolved);
+
+  // get subklass in T2
+  __ load_klass(T2, FSR);
+  // Superklass in T3.  Subklass in T2.
+  __ gen_subtype_check(T3, T2, ok_is_subtype);
+
+  // Come here on failure
+  // object is at FSR
+  __ jmp(Interpreter::_throw_ClassCastException_entry);
+
+  // Come here on success
+  __ bind(ok_is_subtype);
+
+  // Collect counts on whether this check-cast sees NULLs a lot or not.
+  if (ProfileInterpreter) {
+    __ b(done);
+    __ bind(is_null);
+    __ profile_null_seen(T3);
+  } else {
+    __ bind(is_null);
+  }
+  __ bind(done);
+}
+
+// T3 as cpool, T1 as tags, T2 as index
+// object always in FSR, superklass in T3, subklass in T2
+void TemplateTable::instanceof() {
+  transition(atos, itos);
+  Label done, is_null, ok_is_subtype, quicked, resolved;
+
+  __ beq(FSR, R0, is_null);
+
+  // Get cpool & tags index
+  __ get_cpool_and_tags(T3, T1);
+  // get index
+  __ get_2_byte_integer_at_bcp(T2, AT, 1);
+  __ hswap(T2);
+
+  // See if bytecode has already been quicked
+  // quicked
+  __ add_d(AT, T1, T2);
+  __ ld_b(AT, AT, Array<u1>::base_offset_in_bytes());
+  if(os::is_MP()) {
+    __ membar(Assembler::Membar_mask_bits(__ LoadLoad | __ LoadStore));
+  }
+  __ addi_d(AT, AT, -(int)JVM_CONSTANT_Class);
+  __ beq(AT, R0, quicked);
+
+  __ push(atos);
+  const Register thread = TREG;
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+  __ get_vm_result_2(T3, thread);
+  __ pop_ptr(FSR);
+  __ b(resolved);
+
+  // get superklass in T3, subklass in T2
+  __ bind(quicked);
+  __ alsl_d(AT, T2, T3, Address::times_8 - 1);
+  __ ld_d(T3, AT, sizeof(ConstantPool));
+
+  __ bind(resolved);
+  // get subklass in T2
+  __ load_klass(T2, FSR);
+
+  // Superklass in T3.  Subklass in T2.
+  __ gen_subtype_check(T3, T2, ok_is_subtype);
+  // Come here on failure
+  __ move(FSR, R0);
+  __ b(done);
+
+  // Come here on success
+  __ bind(ok_is_subtype);
+  __ li(FSR, 1);
+
+  // Collect counts on whether this test sees NULLs a lot or not.
+  if (ProfileInterpreter) {
+    __ beq(R0, R0, done);
+    __ bind(is_null);
+    __ profile_null_seen(T3);
+  } else {
+    __ bind(is_null);   // same as 'done'
+  }
+  __ bind(done);
+  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
+  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
+}
+
+//--------------------------------------------------------
+//--------------------------------------------
+// Breakpoints
+void TemplateTable::_breakpoint() {
+  // Note: We get here even if we are single stepping..
+  // jbug inists on setting breakpoints at every bytecode
+  // even if we are in single step mode.
+
+  transition(vtos, vtos);
+
+  // get the unpatched byte code
+  __ get_method(A1);
+  __ call_VM(NOREG,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::get_original_bytecode_at),
+             A1, BCP);
+  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
+
+  // post the breakpoint event
+  __ get_method(A1);
+  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
+
+  // complete the execution of original bytecode
+  __ dispatch_only_normal(vtos);
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateTable::athrow() {
+  transition(atos, vtos);
+  __ null_check(FSR);
+  __ jmp(Interpreter::throw_exception_entry());
+}
+
+//-----------------------------------------------------------------------------
+// Synchronization
+//
+// Note: monitorenter & exit are symmetric routines; which is reflected
+//       in the assembly code structure as well
+//
+// Stack layout:
+//
+// [expressions  ] <--- SP               = expression stack top
+// ..
+// [expressions  ]
+// [monitor entry] <--- monitor block top = expression stack bot
+// ..
+// [monitor entry]
+// [frame data   ] <--- monitor block bot
+// ...
+// [return addr  ] <--- FP
+
+// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
+// object always in FSR
+void TemplateTable::monitorenter() {
+  transition(atos, vtos);
+
+  // check for NULL object
+  __ null_check(FSR);
+
+  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
+      * wordSize);
+  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
+  Label allocated;
+
+  // initialize entry pointer
+  __ move(c_rarg0, R0);
+
+  // find a free slot in the monitor block (result in c_rarg0)
+  {
+    Label entry, loop, exit, next;
+    __ ld_d(T2, monitor_block_top);
+    __ addi_d(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
+    __ b(entry);
+
+    // free slot?
+    __ bind(loop);
+    __ ld_d(AT, T2, BasicObjectLock::obj_offset_in_bytes());
+    __ bne(AT, R0, next);
+    __ move(c_rarg0, T2);
+
+    __ bind(next);
+    __ beq(FSR, AT, exit);
+    __ addi_d(T2, T2, entry_size);
+
+    __ bind(entry);
+    __ bne(T3, T2, loop);
+    __ bind(exit);
+  }
+
+  __ bne(c_rarg0, R0, allocated);
+
+  // allocate one if there's no free slot
+  {
+    Label entry, loop;
+    // 1. compute new pointers                   // SP: old expression stack top
+    __ ld_d(c_rarg0, monitor_block_top);
+    __ addi_d(SP, SP, -entry_size);
+    __ addi_d(c_rarg0, c_rarg0, -entry_size);
+    __ st_d(c_rarg0, monitor_block_top);
+    __ move(T3, SP);
+    __ b(entry);
+
+    // 2. move expression stack contents
+    __ bind(loop);
+    __ ld_d(AT, T3, entry_size);
+    __ st_d(AT, T3, 0);
+    __ addi_d(T3, T3, wordSize);
+    __ bind(entry);
+    __ bne(T3, c_rarg0, loop);
+  }
+
+  __ bind(allocated);
+  // Increment bcp to point to the next bytecode,
+  // so exception handling for async. exceptions work correctly.
+  // The object has already been poped from the stack, so the
+  // expression stack looks correct.
+  __ addi_d(BCP, BCP, 1);
+  __ st_d(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
+  __ lock_object(c_rarg0);
+  // check to make sure this monitor doesn't cause stack overflow after locking
+  __ save_bcp();  // in case of exception
+  __ generate_stack_overflow_check(0);
+  // The bcp has already been incremented. Just need to dispatch to next instruction.
+
+  __ dispatch_next(vtos);
+}
+
+// T2 : top
+// c_rarg0 : entry
+void TemplateTable::monitorexit() {
+  transition(atos, vtos);
+
+  __ null_check(FSR);
+
+  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
+  Label found;
+
+  // find matching slot
+  {
+    Label entry, loop;
+    __ ld_d(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    __ addi_d(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
+    __ b(entry);
+
+    __ bind(loop);
+    __ ld_d(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
+    __ beq(FSR, AT, found);
+    __ addi_d(c_rarg0, c_rarg0, entry_size);
+    __ bind(entry);
+    __ bne(T2, c_rarg0, loop);
+  }
+
+  // error handling. Unlocking was not block-structured
+  Label end;
+  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+  InterpreterRuntime::throw_illegal_monitor_state_exception));
+  __ should_not_reach_here();
+
+  // call run-time routine
+  // c_rarg0: points to monitor entry
+  __ bind(found);
+  __ move(TSR, FSR);
+  __ unlock_object(c_rarg0);
+  __ move(FSR, TSR);
+  __ bind(end);
+}
+
+
+// Wide instructions
+void TemplateTable::wide() {
+  transition(vtos, vtos);
+  __ ld_bu(Rnext, at_bcp(1));
+  __ slli_d(T4, Rnext, Address::times_8);
+  __ li(AT, (long)Interpreter::_wentry_point);
+  __ add_d(AT, T4, AT);
+  __ ld_d(T4, AT, 0);
+  __ jr(T4);
+}
+
+
+void TemplateTable::multianewarray() {
+  transition(vtos, atos);
+  // last dim is on top of stack; we want address of first one:
+  // first_addr = last_addr + (ndims - 1) * wordSize
+  __ ld_bu(A1, at_bcp(3));  // dimension
+  __ addi_d(A1, A1, -1);
+  __ slli_d(A1, A1, Address::times_8);
+  __ add_d(A1, SP, A1);    // now A1 pointer to the count array on the stack
+  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
+  __ ld_bu(AT, at_bcp(3));
+  __ slli_d(AT, AT, Address::times_8);
+  __ add_d(SP, SP, AT);
+  __ membar(__ AnyAny);//no membar here for aarch64
+}
+#endif // !CC_INTERP
diff --git a/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp
new file mode 100644
index 00000000000..c48d76e0a2a
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/templateTable_loongarch_64.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP
+#define CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP
+
+  static void prepare_invoke(int byte_no,
+                             Register method,
+                             Register index = noreg,
+                             Register recv  = noreg,
+                             Register flags = noreg
+                             );
+  static void invokevirtual_helper(Register index, Register recv,
+                                   Register flags);
+  //static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
+  static void volatile_barrier();
+
+  // Helpers
+  static void index_check(Register array, Register index);
+  static void index_check_without_pop(Register array, Register index);
+
+#endif // CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp
new file mode 100644
index 00000000000..7c3ce68010d
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/vmStructs_loongarch.hpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP
+
+// These are the CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
+                                                                                                                                     \
+  /******************************/                                                                                                   \
+  /* JavaCallWrapper            */                                                                                                   \
+  /******************************/                                                                                                   \
+  /******************************/                                                                                                   \
+  /* JavaFrameAnchor            */                                                                                                   \
+  /******************************/                                                                                                   \
+  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                         \
+                                                                                                                                     \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used  */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
+  /* be present there)                                                */
+
+
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used  */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
+  /* be present there)                                                */
+
+
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used        */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
+  /* be present there)                                                      */
+
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used         */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
+  /* be present there)                                                       */
+
+#endif // CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp
new file mode 100644
index 00000000000..c71f64e132b
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "memory/allocation.inline.hpp"
+#include "vm_version_ext_loongarch.hpp"
+
+// VM_Version_Ext statics
+int VM_Version_Ext::_no_of_threads = 0;
+int VM_Version_Ext::_no_of_cores = 0;
+int VM_Version_Ext::_no_of_sockets = 0;
+bool VM_Version_Ext::_initialized = false;
+char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
+char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
+
+void VM_Version_Ext::initialize_cpu_information(void) {
+  // do nothing if cpu info has been initialized
+  if (_initialized) {
+    return;
+  }
+
+  _no_of_cores  = os::processor_count();
+  _no_of_threads = _no_of_cores;
+  _no_of_sockets = _no_of_cores;
+  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "LoongArch");
+  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "LoongArch %s", cpu_features());
+  _initialized = true;
+}
+
+int VM_Version_Ext::number_of_threads(void) {
+  initialize_cpu_information();
+  return _no_of_threads;
+}
+
+int VM_Version_Ext::number_of_cores(void) {
+  initialize_cpu_information();
+  return _no_of_cores;
+}
+
+int VM_Version_Ext::number_of_sockets(void) {
+  initialize_cpu_information();
+  return _no_of_sockets;
+}
+
+const char* VM_Version_Ext::cpu_name(void) {
+  initialize_cpu_information();
+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
+  if (NULL == tmp) {
+    return NULL;
+  }
+  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
+  return tmp;
+}
+
+const char* VM_Version_Ext::cpu_description(void) {
+  initialize_cpu_information();
+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
+  if (NULL == tmp) {
+    return NULL;
+  }
+  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
+  return tmp;
+}
diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp
new file mode 100644
index 00000000000..682dd9c78ff
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/vm_version_ext_loongarch.hpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP
+
+#include "runtime/vm_version.hpp"
+#include "utilities/macros.hpp"
+
+class VM_Version_Ext : public VM_Version {
+ private:
+  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
+  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
+
+  static int               _no_of_threads;
+  static int               _no_of_cores;
+  static int               _no_of_sockets;
+  static bool              _initialized;
+  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
+  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
+
+ public:
+  static int number_of_threads(void);
+  static int number_of_cores(void);
+  static int number_of_sockets(void);
+
+  static const char* cpu_name(void);
+  static const char* cpu_description(void);
+  static void initialize_cpu_information(void);
+};
+
+#endif // CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp
new file mode 100644
index 00000000000..81ea3b230cb
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.cpp
@@ -0,0 +1,443 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/java.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "vm_version_loongarch.hpp"
+#ifdef TARGET_OS_FAMILY_linux
+# include "os_linux.inline.hpp"
+#endif
+
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+int VM_Version::_cpuFeatures;
+unsigned long VM_Version::auxv;
+const char* VM_Version::_features_str = "";
+VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
+bool VM_Version::_cpu_info_is_initialized = false;
+
+static BufferBlob* stub_blob;
+static const int stub_size = 600;
+
+extern "C" {
+  typedef void (*get_cpu_info_stub_t)(void*);
+}
+static get_cpu_info_stub_t get_cpu_info_stub = NULL;
+
+
+class VM_Version_StubGenerator: public StubCodeGenerator {
+ public:
+
+  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
+
+  address generate_get_cpu_info() {
+    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
+    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
+#   define __ _masm->
+
+    address start = __ pc();
+
+    __ enter();
+    __ push(AT);
+    __ push(T5);
+
+    __ li(AT, (long)0);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
+
+    __ li(AT, 1);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
+
+    __ li(AT, 2);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
+
+    __ li(AT, 3);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id3_offset()));
+
+    __ li(AT, 4);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id4_offset()));
+
+    __ li(AT, 5);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id5_offset()));
+
+    __ li(AT, 6);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id6_offset()));
+
+    __ li(AT, 10);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id10_offset()));
+
+    __ li(AT, 11);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id11_offset()));
+
+    __ li(AT, 12);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id12_offset()));
+
+    __ li(AT, 13);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id13_offset()));
+
+    __ li(AT, 14);
+    __ cpucfg(T5, AT);
+    __ st_w(T5, A0, in_bytes(VM_Version::Loongson_Cpucfg_id14_offset()));
+
+    __ pop(T5);
+    __ pop(AT);
+    __ leave();
+    __ jr(RA);
+#   undef __
+    return start;
+  };
+};
+
+uint32_t VM_Version::get_feature_flags_by_cpucfg() {
+  uint32_t result = 0;
+  if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b00 || _cpuid_info.cpucfg_info_id1.bits.ARCH == 0b01 ) {
+    result |= CPU_LA32;
+  } else if (_cpuid_info.cpucfg_info_id1.bits.ARCH == 0b10 ) {
+    result |= CPU_LA64;
+  }
+  if (_cpuid_info.cpucfg_info_id1.bits.UAL != 0)
+    result |= CPU_UAL;
+
+  if (_cpuid_info.cpucfg_info_id2.bits.FP_CFG != 0)
+    result |= CPU_FP;
+  if (_cpuid_info.cpucfg_info_id2.bits.COMPLEX != 0)
+    result |= CPU_COMPLEX;
+  if (_cpuid_info.cpucfg_info_id2.bits.CRYPTO != 0)
+    result |= CPU_CRYPTO;
+  if (_cpuid_info.cpucfg_info_id2.bits.LBT_X86 != 0)
+    result |= CPU_LBT_X86;
+  if (_cpuid_info.cpucfg_info_id2.bits.LBT_ARM != 0)
+    result |= CPU_LBT_ARM;
+  if (_cpuid_info.cpucfg_info_id2.bits.LBT_MIPS != 0)
+    result |= CPU_LBT_MIPS;
+  if (_cpuid_info.cpucfg_info_id2.bits.LAM != 0)
+    result |= CPU_LAM;
+
+  if (_cpuid_info.cpucfg_info_id3.bits.CCDMA != 0)
+    result |= CPU_CCDMA;
+  if (_cpuid_info.cpucfg_info_id3.bits.LLDBAR != 0)
+    result |= CPU_LLDBAR;
+  if (_cpuid_info.cpucfg_info_id3.bits.SCDLY != 0)
+    result |= CPU_SCDLY;
+  if (_cpuid_info.cpucfg_info_id3.bits.LLEXC != 0)
+    result |= CPU_LLEXC;
+
+  result |= CPU_ULSYNC;
+
+  return result;
+}
+
+void VM_Version::get_processor_features() {
+
+  clean_cpuFeatures();
+
+  get_cpu_info_stub(&_cpuid_info);
+  _cpuFeatures = get_feature_flags_by_cpucfg();
+
+  _supports_cx8 = true;
+
+  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
+    FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650);
+  }
+
+  auxv = getauxval(AT_HWCAP);
+
+  if (supports_lsx()) {
+    if (FLAG_IS_DEFAULT(UseLSX)) {
+      FLAG_SET_DEFAULT(UseLSX, true);
+    }
+  } else if (UseLSX) {
+    warning("LSX instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseLSX, false);
+  }
+
+  if (supports_lasx()) {
+    if (FLAG_IS_DEFAULT(UseLASX)) {
+      FLAG_SET_DEFAULT(UseLASX, true);
+    }
+  } else if (UseLASX) {
+    warning("LASX instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseLASX, false);
+  }
+
+  if (UseLASX && !UseLSX) {
+    warning("LASX instructions depends on LSX, setting UseLASX to false");
+    FLAG_SET_DEFAULT(UseLASX, false);
+  }
+
+#ifdef COMPILER2
+  int max_vector_size = 0;
+  int min_vector_size = 0;
+  if (UseLASX) {
+    max_vector_size = 32;
+    min_vector_size = 16;
+  }
+  else if (UseLSX) {
+    max_vector_size = 16;
+    min_vector_size = 16;
+  }
+
+  if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
+    if (MaxVectorSize == 0) {
+      // do nothing
+    } else if (MaxVectorSize > max_vector_size) {
+      warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
+      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
+    } else if (MaxVectorSize < min_vector_size) {
+      warning("MaxVectorSize must be at least %i or 0 on this platform, setting to: %i", min_vector_size, min_vector_size);
+      FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
+    } else if (!is_power_of_2(MaxVectorSize)) {
+      warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
+      FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
+    }
+  } else {
+    // If default, use highest supported configuration
+    FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
+  }
+#endif
+
+  if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) {
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 1000);
+    }
+  } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) {
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 2000);
+    }
+  } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 3000);
+    }
+  } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 4000);
+    }
+  } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) {
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
+    }
+  } else {
+    assert(false, "Should Not Reach Here, what is the cpu type?");
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
+    }
+  }
+
+  char buf[256];
+
+  // A note on the _features_string format:
+  //   There are jtreg tests checking the _features_string for various properties.
+  //   For some strange reason, these tests require the string to contain
+  //   only _lowercase_ characters. Keep that in mind when being surprised
+  //   about the unusual notation of features - and when adding new ones.
+  //   Features may have one comma at the end.
+  //   Furthermore, use one, and only one, separator space between features.
+  //   Multiple spaces are considered separate tokens, messing up everything.
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, "
+    "0x%lx, fp_ver: %d, lvz_ver: %d, "
+    "usesynclevel:%d",
+    (is_la64()             ?  "la64"  : ""),
+    (is_la32()             ?  "la32"  : ""),
+    (supports_lsx()        ?  ", lsx" : ""),
+    (supports_lasx()       ?  ", lasx" : ""),
+    (supports_crypto()     ?  ", crypto" : ""),
+    (supports_lam()        ?  ", am" : ""),
+    (supports_ual()        ?  ", ual" : ""),
+    (supports_lldbar()     ?  ", lldbar" : ""),
+    (supports_scdly()      ?  ", scdly" : ""),
+    (supports_llexc()      ?  ", llexc" : ""),
+    (supports_lbt_x86()    ?  ", lbt_x86" : ""),
+    (supports_lbt_arm()    ?  ", lbt_arm" : ""),
+    (supports_lbt_mips()   ?  ", lbt_mips" : ""),
+    (needs_llsync()        ?  ", needs_llsync" : ""),
+    (needs_tgtsync()       ?  ", needs_tgtsync": ""),
+    (needs_ulsync()        ?  ", needs_ulsync": ""),
+    _cpuid_info.cpucfg_info_id0.bits.PRID,
+    _cpuid_info.cpucfg_info_id2.bits.FP_VER,
+    _cpuid_info.cpucfg_info_id2.bits.LVZ_VER,
+    UseSyncLevel);
+  _features_str = strdup(buf);
+
+  assert(!is_la32(), "Should Not Reach Here, what is the cpu type?");
+  assert( is_la64(), "Should be LoongArch64");
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
+  }
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchLines, 3);
+  }
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
+  }
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 192);
+  }
+
+  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
+    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
+  }
+
+  // Basic instructions are used to implement SHA Intrinsics on LA, so sha
+  // instructions support is not needed.
+  if (/*supports_crypto()*/ 1) {
+    if (FLAG_IS_DEFAULT(UseSHA)) {
+      FLAG_SET_DEFAULT(UseSHA, true);
+    }
+  } else if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
+  if (UseSHA/* && supports_crypto()*/) {
+    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
+    }
+  } else if (UseSHA1Intrinsics) {
+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+  }
+
+  if (UseSHA/* && supports_crypto()*/) {
+    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+    }
+  } else if (UseSHA256Intrinsics) {
+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+  }
+
+  if (UseSHA512Intrinsics) {
+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  }
+
+  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
+  // Basic instructions are used to implement AES Intrinsics on LA, so AES
+  // instructions support is not needed.
+  if (/*supports_crypto()*/ 1) {
+    if (FLAG_IS_DEFAULT(UseAES)) {
+      FLAG_SET_DEFAULT(UseAES, true);
+    }
+  } else if (UseAES) {
+    if (!FLAG_IS_DEFAULT(UseAES))
+      warning("AES instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseAES, false);
+  }
+
+  if (UseAES/* && supports_crypto()*/) {
+    if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+      FLAG_SET_DEFAULT(UseAESIntrinsics, true);
+    }
+  } else if (UseAESIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
+      warning("AES intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+  }
+
+  if (FLAG_IS_DEFAULT(UseCRC32)) {
+    FLAG_SET_DEFAULT(UseCRC32, true);
+  }
+
+  if (UseCRC32) {
+    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
+      UseCRC32Intrinsics = true;
+    }
+  }
+
+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+    UseMontgomeryMultiplyIntrinsic = true;
+  }
+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+    UseMontgomerySquareIntrinsic = true;
+  }
+
+  // This machine allows unaligned memory accesses
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+  }
+
+  if (CriticalJNINatives) {
+    if (FLAG_IS_CMDLINE(CriticalJNINatives)) {
+      warning("CriticalJNINatives specified, but not supported in this VM");
+    }
+    FLAG_SET_DEFAULT(CriticalJNINatives, false);
+  }
+}
+
+void VM_Version::initialize() {
+  ResourceMark rm;
+  // Making this stub must be FIRST use of assembler
+
+  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
+  if (stub_blob == NULL) {
+    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
+  }
+  CodeBuffer c(stub_blob);
+  VM_Version_StubGenerator g(&c);
+  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
+                                     g.generate_get_cpu_info());
+
+  get_processor_features();
+}
diff --git a/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp
new file mode 100644
index 00000000000..3b5f907a793
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/vm_version_loongarch.hpp
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP
+
+#include "runtime/globals_extension.hpp"
+#include "runtime/vm_version.hpp"
+
+#ifndef HWCAP_LOONGARCH_LSX
+#define HWCAP_LOONGARCH_LSX       (1 << 4)
+#endif
+
+#ifndef HWCAP_LOONGARCH_LASX
+#define HWCAP_LOONGARCH_LASX      (1 << 5)
+#endif
+
+class VM_Version: public Abstract_VM_Version {
+public:
+
+  union LoongArch_Cpucfg_Id0 {
+    uint32_t value;
+    struct {
+      uint32_t PRID      : 32;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id1 {
+    uint32_t value;
+    struct {
+      uint32_t ARCH      : 2,
+               PGMMU     : 1,
+               IOCSR     : 1,
+               PALEN     : 8,
+               VALEN     : 8,
+               UAL       : 1, // unaligned access
+               RI        : 1,
+               EP        : 1,
+               RPLV      : 1,
+               HP        : 1,
+               IOCSR_BRD : 1,
+               MSG_INT   : 1,
+                         : 5;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id2 {
+    uint32_t value;
+    struct {
+      uint32_t FP_CFG     : 1, // FP is used, use FP_CFG instead
+               FP_SP      : 1,
+               FP_DP      : 1,
+               FP_VER     : 3,
+               LSX        : 1,
+               LASX       : 1,
+               COMPLEX    : 1,
+               CRYPTO     : 1,
+               LVZ        : 1,
+               LVZ_VER    : 3,
+               LLFTP      : 1,
+               LLFTP_VER  : 3,
+               LBT_X86    : 1,
+               LBT_ARM    : 1,
+               LBT_MIPS   : 1,
+               LSPW       : 1,
+               LAM        : 1,
+                          : 9;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id3 {
+    uint32_t value;
+    struct {
+      uint32_t CCDMA      : 1,
+               SFB        : 1,
+               UCACC      : 1,
+               LLEXC      : 1,
+               SCDLY      : 1,
+               LLDBAR     : 1,
+               ITLBHMC    : 1,
+               ICHMC      : 1,
+               SPW_LVL    : 3,
+               SPW_HP_HF  : 1,
+               RVA        : 1,
+               RVAMAXM1   : 4,
+                          : 15;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id4 {
+    uint32_t value;
+    struct {
+      uint32_t CC_FREQ      : 32;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id5 {
+    uint32_t value;
+    struct {
+      uint32_t CC_MUL      : 16,
+               CC_DIV      : 16;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id6 {
+    uint32_t value;
+    struct {
+      uint32_t PMP      : 1,
+               PMVER    : 3,
+               PMNUM    : 4,
+               PMBITS   : 6,
+               UPM      : 1,
+                        : 17;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id10 {
+    uint32_t value;
+    struct {
+      uint32_t L1IU_PRESENT    : 1,
+               L1IU_UNIFY      : 1,
+               L1D_PRESENT     : 1,
+               L2IU_PRESENT    : 1,
+               L2IU_UNIFY      : 1,
+               L2IU_PRIVATE    : 1,
+               L2IU_INCLUSIVE  : 1,
+               L2D_PRESENT     : 1,
+               L2D_PRIVATE     : 1,
+               L2D_INCLUSIVE   : 1,
+               L3IU_PRESENT    : 1,
+               L3IU_UNIFY      : 1,
+               L3IU_PRIVATE    : 1,
+               L3IU_INCLUSIVE  : 1,
+               L3D_PRESENT     : 1,
+               L3D_PRIVATE     : 1,
+               L3D_INCLUSIVE   : 1,
+                               : 15;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id11 {
+    uint32_t value;
+    struct {
+      uint32_t WAYM1         : 16,
+               INDEXMLOG2    : 8,
+               LINESIZELOG2  : 7,
+                             : 1;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id12 {
+    uint32_t value;
+    struct {
+      uint32_t WAYM1         : 16,
+               INDEXMLOG2    : 8,
+               LINESIZELOG2  : 7,
+                             : 1;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id13 {
+    uint32_t value;
+    struct {
+      uint32_t WAYM1         : 16,
+               INDEXMLOG2    : 8,
+               LINESIZELOG2  : 7,
+                             : 1;
+    } bits;
+  };
+
+  union LoongArch_Cpucfg_Id14 {
+    uint32_t value;
+    struct {
+      uint32_t WAYM1         : 16,
+               INDEXMLOG2    : 8,
+               LINESIZELOG2  : 7,
+                             : 1;
+    } bits;
+  };
+
+protected:
+
+  enum {
+    CPU_LAM               = (1 << 1),
+    CPU_UAL               = (1 << 2),
+    CPU_LSX               = (1 << 4),
+    CPU_LASX              = (1 << 5),
+    CPU_COMPLEX           = (1 << 7),
+    CPU_CRYPTO            = (1 << 8),
+    CPU_LBT_X86           = (1 << 10),
+    CPU_LBT_ARM           = (1 << 11),
+    CPU_LBT_MIPS          = (1 << 12),
+    /* flags above must follow Linux HWCAP */
+    CPU_LA32              = (1 << 13),
+    CPU_LA64              = (1 << 14),
+    CPU_FP                = (1 << 15),
+    CPU_LLEXC             = (1 << 16),
+    CPU_SCDLY             = (1 << 17),
+    CPU_LLDBAR            = (1 << 18),
+    CPU_CCDMA             = (1 << 19),
+    CPU_LLSYNC            = (1 << 20),
+    CPU_TGTSYNC           = (1 << 21),
+    CPU_ULSYNC            = (1 << 22),
+
+    //////////////////////add some other feature here//////////////////
+  } cpuFeatureFlags;
+
+  static int  _cpuFeatures;
+  static unsigned long auxv;
+  static const char* _features_str;
+  static bool _cpu_info_is_initialized;
+
+  struct CpuidInfo {
+    LoongArch_Cpucfg_Id0   cpucfg_info_id0;
+    LoongArch_Cpucfg_Id1   cpucfg_info_id1;
+    LoongArch_Cpucfg_Id2   cpucfg_info_id2;
+    LoongArch_Cpucfg_Id3   cpucfg_info_id3;
+    LoongArch_Cpucfg_Id4   cpucfg_info_id4;
+    LoongArch_Cpucfg_Id5   cpucfg_info_id5;
+    LoongArch_Cpucfg_Id6   cpucfg_info_id6;
+    LoongArch_Cpucfg_Id10  cpucfg_info_id10;
+    LoongArch_Cpucfg_Id11  cpucfg_info_id11;
+    LoongArch_Cpucfg_Id12  cpucfg_info_id12;
+    LoongArch_Cpucfg_Id13  cpucfg_info_id13;
+    LoongArch_Cpucfg_Id14  cpucfg_info_id14;
+  };
+
+  // The actual cpuid info block
+  static CpuidInfo _cpuid_info;
+
+  static uint32_t get_feature_flags_by_cpucfg();
+  static int      get_feature_flags_by_cpuinfo(int features);
+  static void     get_processor_features();
+
+public:
+  // Offsets for cpuid asm stub
+  static ByteSize Loongson_Cpucfg_id0_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
+  static ByteSize Loongson_Cpucfg_id1_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
+  static ByteSize Loongson_Cpucfg_id2_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
+  static ByteSize Loongson_Cpucfg_id3_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
+  static ByteSize Loongson_Cpucfg_id4_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
+  static ByteSize Loongson_Cpucfg_id5_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
+  static ByteSize Loongson_Cpucfg_id6_offset()  { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
+  static ByteSize Loongson_Cpucfg_id10_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id10); }
+  static ByteSize Loongson_Cpucfg_id11_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id11); }
+  static ByteSize Loongson_Cpucfg_id12_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id12); }
+  static ByteSize Loongson_Cpucfg_id13_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id13); }
+  static ByteSize Loongson_Cpucfg_id14_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id14); }
+
+  static void clean_cpuFeatures()   { _cpuFeatures = 0; }
+
+  // Initialization
+  static void initialize();
+
+  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
+
+  static bool is_la32()             { return _cpuFeatures & CPU_LA32; }
+  static bool is_la64()             { return _cpuFeatures & CPU_LA64; }
+  static bool supports_crypto()     { return _cpuFeatures & CPU_CRYPTO; }
+  static bool supports_lsx()        { return auxv & HWCAP_LOONGARCH_LSX; }
+  static bool supports_lasx()       { return auxv & HWCAP_LOONGARCH_LASX; }
+  static bool supports_lam()        { return _cpuFeatures & CPU_LAM; }
+  static bool supports_llexc()      { return _cpuFeatures & CPU_LLEXC; }
+  static bool supports_scdly()      { return _cpuFeatures & CPU_SCDLY; }
+  static bool supports_lldbar()     { return _cpuFeatures & CPU_LLDBAR; }
+  static bool supports_ual()        { return _cpuFeatures & CPU_UAL; }
+  static bool supports_lbt_x86()    { return _cpuFeatures & CPU_LBT_X86; }
+  static bool supports_lbt_arm()    { return _cpuFeatures & CPU_LBT_ARM; }
+  static bool supports_lbt_mips()   { return _cpuFeatures & CPU_LBT_MIPS; }
+  static bool needs_llsync()        { return !supports_lldbar(); }
+  static bool needs_tgtsync()       { return 1; }
+  static bool needs_ulsync()        { return 1; }
+
+  static const char* cpu_features()           { return _features_str; }
+};
+
+#endif // CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp
new file mode 100644
index 00000000000..52bccfc1834
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "code/vmreg.hpp"
+
+
+
+void VMRegImpl::set_regName() {
+  Register reg = ::as_Register(0);
+  int i;
+  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
+    regName[i++] = reg->name();
+    regName[i++] = reg->name();
+    reg = reg->successor();
+  }
+
+  FloatRegister freg = ::as_FloatRegister(0);
+  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
+    regName[i++] = freg->name();
+    regName[i++] = freg->name();
+    freg = freg->successor();
+  }
+
+  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
+    regName[i] = "NON-GPR-FPR";
+  }
+}
diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp
new file mode 100644
index 00000000000..80a1fc57de5
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.hpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP
+#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP
+
+bool is_Register();
+Register as_Register();
+
+bool is_FloatRegister();
+FloatRegister as_FloatRegister();
+
+#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp
new file mode 100644
index 00000000000..f822d4c355b
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/vmreg_loongarch.inline.hpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP
+#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP
+
+inline VMReg RegisterImpl::as_VMReg() {
+  if( this==noreg ) return VMRegImpl::Bad();
+  return VMRegImpl::as_VMReg(encoding() << 1 );
+}
+
+inline VMReg FloatRegisterImpl::as_VMReg() {
+  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
+}
+
+inline bool VMRegImpl::is_Register() {
+  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
+}
+
+inline bool VMRegImpl::is_FloatRegister() {
+  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
+}
+
+inline Register VMRegImpl::as_Register() {
+
+  assert( is_Register(), "must be");
+  return ::as_Register(value() >> 1);
+}
+
+inline FloatRegister VMRegImpl::as_FloatRegister() {
+  assert( is_FloatRegister(), "must be" );
+  assert( is_even(value()), "must be" );
+  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
+}
+
+inline   bool VMRegImpl::is_concrete() {
+  assert(is_reg(), "must be");
+  if(is_Register()) return true;
+  if(is_FloatRegister()) return true;
+  assert(false, "what register?");
+  return false;
+}
+
+#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP
diff --git a/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp b/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp
new file mode 100644
index 00000000000..df0d176b8bd
--- /dev/null
+++ b/hotspot/src/cpu/loongarch/vm/vtableStubs_loongarch_64.cpp
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/vtableStubs.hpp"
+#include "interp_masm_loongarch_64.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/klassVtable.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_loongarch.inline.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+
+// machine-dependent part of VtableStubs: create VtableStub of correct size and
+// initialize its code
+
+#define __ masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+#ifndef PRODUCT
+extern "C" void bad_compiled_vtable_index(JavaThread* thread,
+                                          oop receiver,
+                                          int index);
+#endif
+
+// used by compiler only;  reciever in T0.
+// used registers :
+// Rmethod : receiver klass & method
+// NOTE: If this code is used by the C1, the receiver_location is always 0.
+// when reach here, receiver in T0, klass in T8
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+  const int la_code_length = VtableStub::pd_code_size_limit(true);
+  VtableStub* s = new(la_code_length) VtableStub(true, vtable_index);
+  ResourceMark rm;
+  CodeBuffer cb(s->entry_point(), la_code_length);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+  Register t1 = T8, t2 = Rmethod;
+#ifndef PRODUCT
+  if (CountCompiledCalls) {
+    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
+    __ ld_w(t1, AT , 0);
+    __ addi_w(t1, t1, 1);
+    __ st_w(t1, AT,0);
+  }
+#endif
+
+  // get receiver (need to skip return address on top of stack)
+  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
+
+  // get receiver klass
+  address npe_addr = __ pc();
+  __ load_klass(t1, T0);
+  // compute entry offset (in words)
+  int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
+#ifndef PRODUCT
+  if (DebugVtables) {
+    Label L;
+    // check offset vs vtable length
+    __ ld_w(t2, t1, InstanceKlass::vtable_length_offset()*wordSize);
+    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
+    __ li(AT, vtable_index*vtableEntry::size());
+    __ blt(AT, t2, L);
+    __ li(A2, vtable_index);
+    __ move(A1, A0);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
+    __ bind(L);
+  }
+#endif // PRODUCT
+  // load methodOop and target address
+  const Register method = Rmethod;
+  int offset = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
+  if (Assembler::is_simm(offset, 12)) {
+    __ ld_ptr(method, t1, offset);
+  } else {
+    __ li(AT, offset);
+    __ ld_ptr(method, t1, AT);
+  }
+  if (DebugVtables) {
+    Label L;
+    __ beq(method, R0, L);
+    __ ld_d(AT, method,in_bytes(Method::from_compiled_offset()));
+    __ bne(AT, R0, L);
+    __ stop("Vtable entry is NULL");
+    __ bind(L);
+  }
+  // T8: receiver klass
+  // T0: receiver
+  // Rmethod: methodOop
+  // T4: entry
+  address ame_addr = __ pc();
+  __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset()));
+  __ jr(T4);
+  masm->flush();
+  s->set_exception_points(npe_addr, ame_addr);
+  return s;
+}
+
+
+// used registers :
+//  T1 T2
+// when reach here, the receiver in T0, klass in T1
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
+  // Note well: pd_code_size_limit is the absolute minimum we can get
+  // away with.  If you add code here, bump the code stub size
+  // returned by pd_code_size_limit!
+  const int la_code_length = VtableStub::pd_code_size_limit(false);
+  VtableStub* s = new(la_code_length) VtableStub(false, itable_index);
+  ResourceMark rm;
+  CodeBuffer cb(s->entry_point(), la_code_length);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+  // we T8,T4 as temparary register, they are free from register allocator
+  Register t1 = T8, t2 = T2;
+  // Entry arguments:
+  //  T1: Interface
+  //  T0: Receiver
+
+#ifndef PRODUCT
+  if (CountCompiledCalls) {
+    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
+    __ ld_w(T8, AT, 0);
+    __ addi_w(T8, T8, 1);
+    __ st_w(T8, AT, 0);
+  }
+#endif /* PRODUCT */
+  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
+  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
+  const Register icholder_reg = T1;
+  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
+  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
+
+  // get receiver klass (also an implicit null-check)
+  address npe_addr = __ pc();
+  __ load_klass(t1, T0);
+  {
+    // x86 use lookup_interface_method, but lookup_interface_method does not work on LoongArch.
+    const int base = InstanceKlass::vtable_start_offset() * wordSize;
+    assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
+    assert(Assembler::is_simm16(base), "change this code");
+    __ addi_d(t2, t1, base);
+    assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code");
+    __ ld_w(AT, t1, InstanceKlass::vtable_length_offset() * wordSize);
+    __ alsl_d(t2, AT, t2, Address::times_8 - 1);
+    if (HeapWordsPerLong > 1) {
+      __ round_to(t2, BytesPerLong);
+    }
+
+    Label hit, entry;
+    assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code");
+    __ bind(entry);
+
+#ifdef ASSERT
+    // Check that the entry is non-null
+    if (DebugVtables) {
+      Label L;
+      assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
+      __ ld_w(AT, t1, itableOffsetEntry::interface_offset_in_bytes());
+      __ bne(AT, R0, L);
+      __ stop("null entry point found in itable's offset table");
+      __ bind(L);
+    }
+#endif
+    assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
+    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
+    __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize);
+    __ bne(AT, resolved_klass_reg, entry);
+
+  }
+
+  // add for compressedoops
+  __ load_klass(t1, T0);
+  // compute itable entry offset (in words)
+  const int base = InstanceKlass::vtable_start_offset() * wordSize;
+  assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
+  assert(Assembler::is_simm16(base), "change this code");
+  __ addi_d(t2, t1, base);
+  assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code");
+  __ ld_w(AT, t1, InstanceKlass::vtable_length_offset() * wordSize);
+  __ alsl_d(t2, AT, t2, Address::times_8 - 1);
+  if (HeapWordsPerLong > 1) {
+    __ round_to(t2, BytesPerLong);
+  }
+
+  Label hit, entry;
+  assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code");
+  __ bind(entry);
+
+#ifdef ASSERT
+  // Check that the entry is non-null
+  if (DebugVtables) {
+    Label L;
+    assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
+    __ ld_w(AT, t1, itableOffsetEntry::interface_offset_in_bytes());
+    __ bne(AT, R0, L);
+    __ stop("null entry point found in itable's offset table");
+    __ bind(L);
+  }
+#endif
+  assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
+  __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
+  __ addi_d(t2, t2, itableOffsetEntry::size() * wordSize);
+  __ bne(AT, holder_klass_reg, entry);
+
+  // We found a hit, move offset into T4
+  __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize);
+
+  // Compute itableMethodEntry.
+  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
+    itableMethodEntry::method_offset_in_bytes();
+
+  // Get methodOop and entrypoint for compiler
+  const Register method = Rmethod;
+
+  __ slli_d(AT, t2, Address::times_1);
+  __ add_d(AT, AT, t1 );
+  if (Assembler::is_simm(method_offset, 12)) {
+    __ ld_ptr(method, AT, method_offset);
+  } else {
+    __ li(t1, method_offset);
+    __ ld_ptr(method, AT, t1);
+  }
+
+#ifdef ASSERT
+  if (DebugVtables) {
+    Label L1;
+    __ beq(method, R0, L1);
+    __ ld_d(AT, method,in_bytes(Method::from_compiled_offset()));
+    __ bne(AT, R0, L1);
+    __ stop("methodOop is null");
+    __ bind(L1);
+  }
+#endif // ASSERT
+
+  // Rmethod: methodOop
+  // T0: receiver
+  // T4: entry point
+  address ame_addr = __ pc();
+  __ ld_ptr(T4, method,in_bytes(Method::from_compiled_offset()));
+  __ jr(T4);
+  masm->flush();
+  s->set_exception_points(npe_addr, ame_addr);
+  return s;
+}
+
+// NOTE : whenever you change the code above, dont forget to change the const here
+int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
+  if (is_vtable_stub) {
+    return ( DebugVtables ? 600 : 28) + (CountCompiledCalls ? 24 : 0)+
+           (UseCompressedOops ? 16 : 0);
+  } else {
+    return  ( DebugVtables ? 636 : 152) + (CountCompiledCalls ? 24 : 0)+
+            (UseCompressedOops ? 32 : 0);
+  }
+}
+
+int VtableStub::pd_code_alignment() {
+  return wordSize;
+}
diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.cpp b/hotspot/src/cpu/mips/vm/assembler_mips.cpp
new file mode 100644
index 00000000000..6c720972ad6
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/assembler_mips.cpp
@@ -0,0 +1,774 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#ifndef PRODUCT
+#include "compiler/disassembler.hpp"
+#endif
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+// Implementation of AddressLiteral
+
+AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
+  _is_lval = false;
+  _target = target;
+  _rspec = rspec_from_rtype(rtype, target);
+}
+
+// Implementation of Address
+
+Address Address::make_array(ArrayAddress adr) {
+  AddressLiteral base = adr.base();
+  Address index = adr.index();
+  assert(index._disp == 0, "must not have disp"); // maybe it can?
+  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
+  array._rspec = base._rspec;
+  return array;
+}
+
+// exceedingly dangerous constructor
+Address::Address(address loc, RelocationHolder spec) {
+  _base  = noreg;
+  _index = noreg;
+  _scale = no_scale;
+  _disp  = (intptr_t) loc;
+  _rspec = spec;
+}
+
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+// Implementation of Assembler
+const char *Assembler::ops_name[] = {
+  "special",  "regimm",   "j",      "jal",    "beq",      "bne",      "blez",   "bgtz",
+  "addi",     "addiu",    "slti",   "sltiu",  "andi",     "ori",      "xori",   "lui",
+  "cop0",     "cop1",     "cop2",   "cop3",   "beql",     "bnel",     "bleql",  "bgtzl",
+  "daddi",    "daddiu",   "ldl",    "ldr",    "",         "",         "",       "",
+  "lb",       "lh",       "lwl",    "lw",     "lbu",      "lhu",      "lwr",    "lwu",
+  "sb",       "sh",       "swl",    "sw",     "sdl",      "sdr",      "swr",    "cache",
+  "ll",       "lwc1",     "",       "",       "lld",      "ldc1",     "",       "ld",
+  "sc",       "swc1",     "",       "",       "scd",      "sdc1",     "",       "sd"
+};
+
+const char* Assembler::special_name[] = {
+  "sll",      "",         "srl",      "sra",      "sllv",     "",         "srlv",     "srav",
+  "jr",       "jalr",     "movz",     "movn",     "syscall",  "break",    "",         "sync",
+  "mfhi",     "mthi",     "mflo",     "mtlo",     "dsll",     "",         "dsrl",     "dsra",
+  "mult",     "multu",    "div",      "divu",     "dmult",    "dmultu",   "ddiv",     "ddivu",
+  "add",      "addu",     "sub",      "subu",     "and",      "or",       "xor",      "nor",
+  "",         "",         "slt",      "sltu",     "dadd",     "daddu",    "dsub",     "dsubu",
+  "tge",      "tgeu",     "tlt",      "tltu",     "teq",      "",         "tne",      "",
+  "dsll",     "",         "dsrl",     "dsra",     "dsll32",   "",         "dsrl32",   "dsra32"
+};
+
+const char* Assembler::cop1_name[] = {
+  "add",      "sub",      "mul",      "div",      "sqrt",     "abs",      "mov",      "neg",
+  "round.l",  "trunc.l",  "ceil.l",   "floor.l",  "round.w",  "trunc.w",  "ceil.w",   "floor.w",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "c.f",      "c.un",     "c.eq",     "c.ueq",    "c.olt",    "c.ult",    "c.ole",    "c.ule",
+  "c.sf",     "c.ngle",   "c.seq",    "c.ngl",    "c.lt",     "c.nge",    "c.le",     "c.ngt"
+};
+
+const char* Assembler::cop1x_name[] = {
+  "lwxc1", "ldxc1",       "",         "",         "",    "luxc1",         "",         "",
+  "swxc1", "sdxc1",       "",         "",         "",    "suxc1",         "",    "prefx",
+  "",         "",         "",         "",         "",         "",  "alnv.ps",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "madd.s",   "madd.d",   "",         "",         "",         "",  "madd.ps",         "",
+  "msub.s",   "msub.d",   "",         "",         "",         "",  "msub.ps",         "",
+  "nmadd.s", "nmadd.d",   "",         "",         "",         "", "nmadd.ps",         "",
+  "nmsub.s", "nmsub.d",   "",         "",         "",         "", "nmsub.ps",         ""
+};
+
+const char* Assembler::special2_name[] = {
+  "madd",     "",         "mul",      "",         "msub",     "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "gsdmult",  "",         "",         "gsdiv",    "gsddiv",   "",         "",
+  "",         "",         "",         "",         "gsmod",    "gsdmod",   "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         ""
+};
+
+const char* Assembler::special3_name[] = {
+  "ext",      "",         "",         "",      "ins",    "dinsm",    "dinsu",     "dins",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "bshfl",    "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+  "",         "",         "",         "",         "",         "",         "",         "",
+};
+
+const char* Assembler::regimm_name[] = {
+  "bltz",     "bgez",     "bltzl",    "bgezl",    "",         "",         "",         "",
+  "tgei",     "tgeiu",    "tlti",     "tltiu",    "teqi",     "",         "tnei",     "",
+  "bltzal",   "bgezal",   "bltzall",  "bgezall"
+};
+
+const char* Assembler::gs_ldc2_name[] = {
+  "gslbx",    "gslhx",    "gslwx",    "gsldx",    "",         "",         "gslwxc1",  "gsldxc1"
+};
+
+
+const char* Assembler::gs_lwc2_name[] = {
+        "",       "",       "",       "",         "",         "",         "",         "",
+        "",       "",       "",       "",         "",         "",         "",         "",
+        "gslble", "gslbgt", "gslhle", "gslhgt",   "gslwle",   "gslwgt",   "gsldle",   "gsldgt",
+        "",       "",       "",       "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/
+        "gslq",   ""
+};
+
+const char* Assembler::gs_sdc2_name[] = {
+  "gssbx",    "gsshx",    "gsswx",    "gssdx",    "",         "",         "gsswxc1",  "gssdxc1"
+};
+
+const char* Assembler::gs_swc2_name[] = {
+        "",        "",        "",        "",        "",          "",          "",         "",
+        "",        "",        "",        "",        "",          "",          "",         "",
+        "gssble",  "gssbgt",  "gsshle",  "gsshgt",  "gsswle",    "gsswgt",    "gssdle",   "gssdgt",
+        "",        "",        "",        "",        "gsswlec1",  "gsswgtc1",  "gssdlec1", "gssdgtc1",
+        "gssq",    ""
+};
+
+//misleading name, print only branch/jump instruction
+void Assembler::print_instruction(int inst) {
+  const char *s;
+  switch( opcode(inst) ) {
+  default:
+    s = ops_name[opcode(inst)];
+    break;
+  case special_op:
+    s = special_name[special(inst)];
+    break;
+  case regimm_op:
+    s = special_name[rt(inst)];
+    break;
+  }
+
+  ::tty->print("%s", s);
+}
+
+int Assembler::is_int_mask(int x) {
+  int xx = x;
+  int count = 0;
+
+  while (x != 0) {
+    x &= (x - 1);
+    count++;
+  }
+
+  if ((1<<count) == (xx+1)) {
+    return count;
+  } else {
+    return -1;
+  }
+}
+
+int Assembler::is_jlong_mask(jlong x) {
+  jlong  xx = x;
+  int count = 0;
+
+  while (x != 0) {
+    x &= (x - 1);
+    count++;
+  }
+
+  if ((1<<count) == (xx+1)) {
+    return count;
+  } else {
+    return -1;
+  }
+}
+
+//without check, maybe fixed
+int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
+  int v = (dest_pos - inst_pos - 4)>>2;
+  switch(opcode(inst)) {
+  case j_op:
+  case jal_op:
+  case lui_op:
+  case ori_op:
+  case daddiu_op:
+    ShouldNotReachHere();
+    break;
+  default:
+    assert(is_simm16(v), "must be simm16");
+#ifndef PRODUCT
+    if(!is_simm16(v))
+    {
+      tty->print_cr("must be simm16");
+      tty->print_cr("Inst: %x", inst);
+    }
+#endif
+
+    v = low16(v);
+    inst &= 0xffff0000;
+    break;
+  }
+
+  return inst | v;
+}
+
+int Assembler::branch_destination(int inst, int pos) {
+  int off;
+
+  switch(opcode(inst)) {
+  case j_op:
+  case jal_op:
+    assert(false, "should not use j/jal here");
+    break;
+  default:
+    off = expand(low16(inst), 15);
+    break;
+  }
+
+  return off ? pos + 4 + (off<<2) : 0;
+}
+
+int AbstractAssembler::code_fill_byte() {
+  return 0x00;                  // illegal instruction 0x00000000
+}
+
+// Now the Assembler instruction (identical for 32/64 bits)
+
+void Assembler::lb(Register rt, Address src) {
+  assert(src.index() == NOREG, "index is unimplemented");
+  lb(rt, src.base(), src.disp());
+}
+
+void Assembler::lbu(Register rt, Address src) {
+  assert(src.index() == NOREG, "index is unimplemented");
+  lbu(rt, src.base(), src.disp());
+}
+
+void Assembler::ld(Register rt, Address dst){
+  Register src   = rt;
+  Register base  = dst.base();
+  Register index = dst.index();
+
+  int scale = dst.scale();
+  int disp  = dst.disp();
+
+  if (index != noreg) {
+    if (Assembler::is_simm16(disp)) {
+      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+        if (scale == 0) {
+          gsldx(src, base, index, disp);
+        } else {
+          dsll(AT, index, scale);
+          gsldx(src, base, AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          daddu(AT, base, index);
+        } else {
+          dsll(AT, index, scale);
+          daddu(AT, base, AT);
+        }
+        ld(src, AT, disp);
+      }
+    } else {
+      if (scale == 0) {
+        lui(AT, split_low(disp >> 16));
+        if (split_low(disp)) ori(AT, AT, split_low(disp));
+        daddu(AT, AT, base);
+        if (UseLEXT1) {
+          gsldx(src, AT, index, 0);
+        } else {
+          daddu(AT, AT, index);
+          ld(src, AT, 0);
+        }
+      } else {
+        assert_different_registers(src, AT);
+        dsll(AT, index, scale);
+        daddu(AT, base, AT);
+        lui(src, split_low(disp >> 16));
+        if (split_low(disp)) ori(src, src, split_low(disp));
+        if (UseLEXT1) {
+          gsldx(src, AT, src, 0);
+        } else {
+          daddu(AT, AT, src);
+          ld(src, AT, 0);
+        }
+      }
+    }
+  } else {
+    if (Assembler::is_simm16(disp)) {
+      ld(src, base, disp);
+    } else {
+      lui(AT, split_low(disp >> 16));
+      if (split_low(disp)) ori(AT, AT, split_low(disp));
+
+      if (UseLEXT1) {
+        gsldx(src, base, AT, 0);
+      } else {
+        daddu(AT, base, AT);
+        ld(src, AT, 0);
+      }
+    }
+  }
+}
+
+void Assembler::ldl(Register rt, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  ldl(rt, src.base(), src.disp());
+}
+
+void Assembler::ldr(Register rt, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  ldr(rt, src.base(), src.disp());
+}
+
+void Assembler::lh(Register rt, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  lh(rt, src.base(), src.disp());
+}
+
+void Assembler::lhu(Register rt, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  lhu(rt, src.base(), src.disp());
+}
+
+void Assembler::ll(Register rt, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  ll(rt, src.base(), src.disp());
+}
+
+void Assembler::lld(Register rt, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  lld(rt, src.base(), src.disp());
+}
+
+void Assembler::lw(Register rt, Address dst){
+  Register src   = rt;
+  Register base  = dst.base();
+  Register index = dst.index();
+
+  int scale = dst.scale();
+  int disp  = dst.disp();
+
+  if (index != noreg) {
+    if (Assembler::is_simm16(disp)) {
+      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+        if (scale == 0) {
+          gslwx(src, base, index, disp);
+        } else {
+          dsll(AT, index, scale);
+          gslwx(src, base, AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          daddu(AT, base, index);
+        } else {
+          dsll(AT, index, scale);
+          daddu(AT, base, AT);
+        }
+        lw(src, AT, disp);
+      }
+    } else {
+      if (scale == 0) {
+        lui(AT, split_low(disp >> 16));
+        if (split_low(disp)) ori(AT, AT, split_low(disp));
+        daddu(AT, AT, base);
+        if (UseLEXT1) {
+          gslwx(src, AT, index, 0);
+        } else {
+          daddu(AT, AT, index);
+          lw(src, AT, 0);
+        }
+      } else {
+        assert_different_registers(src, AT);
+        dsll(AT, index, scale);
+        daddu(AT, base, AT);
+        lui(src, split_low(disp >> 16));
+        if (split_low(disp)) ori(src, src, split_low(disp));
+        if (UseLEXT1) {
+          gslwx(src, AT, src, 0);
+        } else {
+          daddu(AT, AT, src);
+          lw(src, AT, 0);
+        }
+      }
+    }
+  } else {
+    if (Assembler::is_simm16(disp)) {
+      lw(src, base, disp);
+    } else {
+      lui(AT, split_low(disp >> 16));
+      if (split_low(disp)) ori(AT, AT, split_low(disp));
+
+      if (UseLEXT1) {
+        gslwx(src, base, AT, 0);
+      } else {
+        daddu(AT, base, AT);
+        lw(src, AT, 0);
+      }
+    }
+  }
+}
+
+void Assembler::lea(Register rt, Address src) {
+  Register dst   = rt;
+  Register base  = src.base();
+  Register index = src.index();
+
+  int scale = src.scale();
+  int disp  = src.disp();
+
+  if (index == noreg) {
+    if (is_simm16(disp)) {
+      daddiu(dst, base, disp);
+    } else {
+      lui(AT, split_low(disp >> 16));
+      if (split_low(disp)) ori(AT, AT, split_low(disp));
+      daddu(dst, base, AT);
+    }
+  } else {
+    if (scale == 0) {
+      if (is_simm16(disp)) {
+        daddu(AT, base, index);
+        daddiu(dst, AT, disp);
+      } else {
+        lui(AT, split_low(disp >> 16));
+        if (split_low(disp)) ori(AT, AT, split_low(disp));
+        daddu(AT, base, AT);
+        daddu(dst, AT, index);
+      }
+    } else {
+      if (is_simm16(disp)) {
+        dsll(AT, index, scale);
+        daddu(AT, AT, base);
+        daddiu(dst, AT, disp);
+      } else {
+        assert_different_registers(dst, AT);
+        lui(AT, split_low(disp >> 16));
+        if (split_low(disp)) ori(AT, AT, split_low(disp));
+        daddu(AT, AT, base);
+        dsll(dst, index, scale);
+        daddu(dst, dst, AT);
+      }
+    }
+  }
+}
+
+void Assembler::lwl(Register rt, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  lwl(rt, src.base(), src.disp());
+}
+
+void Assembler::lwr(Register rt, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  lwr(rt, src.base(), src.disp());
+}
+
+void Assembler::lwu(Register rt, Address src){
+  assert(src.index() == NOREG, "index is unimplemented");
+  lwu(rt, src.base(), src.disp());
+}
+
+void Assembler::sb(Register rt, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  sb(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sc(Register rt, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  sc(rt, dst.base(), dst.disp());
+}
+
+void Assembler::scd(Register rt, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  scd(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sd(Register rt, Address dst) {
+  Register src   = rt;
+  Register base  = dst.base();
+  Register index = dst.index();
+
+  int scale = dst.scale();
+  int disp  = dst.disp();
+
+  if (index != noreg) {
+    if (is_simm16(disp)) {
+      if ( UseLEXT1 && is_simm(disp, 8)) {
+        if (scale == 0) {
+          gssdx(src, base, index, disp);
+        } else {
+          assert_different_registers(rt, AT);
+          dsll(AT, index, scale);
+          gssdx(src, base, AT, disp);
+        }
+      } else {
+        assert_different_registers(rt, AT);
+        if (scale == 0) {
+          daddu(AT, base, index);
+        } else {
+          dsll(AT, index, scale);
+          daddu(AT, base, AT);
+        }
+        sd(src, AT, disp);
+      }
+    } else {
+      assert_different_registers(rt, AT);
+      if (scale == 0) {
+        lui(AT, split_low(disp >> 16));
+        if (split_low(disp)) ori(AT, AT, split_low(disp));
+        daddu(AT, AT, base);
+        if (UseLEXT1) {
+          gssdx(src, AT, index, 0);
+        } else {
+          daddu(AT, AT, index);
+          sd(src, AT, 0);
+        }
+      } else {
+        daddiu(SP, SP, -wordSize);
+        sd(T9, SP, 0);
+
+        dsll(AT, index, scale);
+        daddu(AT, base, AT);
+        lui(T9, split_low(disp >> 16));
+        if (split_low(disp)) ori(T9, T9, split_low(disp));
+        daddu(AT, AT, T9);
+        ld(T9, SP, 0);
+        daddiu(SP, SP, wordSize);
+        sd(src, AT, 0);
+      }
+    }
+  } else {
+    if (is_simm16(disp)) {
+      sd(src, base, disp);
+    } else {
+      assert_different_registers(rt, AT);
+      lui(AT, split_low(disp >> 16));
+      if (split_low(disp)) ori(AT, AT, split_low(disp));
+
+      if (UseLEXT1) {
+        gssdx(src, base, AT, 0);
+      } else {
+        daddu(AT, base, AT);
+        sd(src, AT, 0);
+      }
+    }
+  }
+}
+
+void Assembler::sdl(Register rt, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  sdl(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sdr(Register rt, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  sdr(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sh(Register rt, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  sh(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sw(Register rt, Address dst) {
+  Register src   = rt;
+  Register base  = dst.base();
+  Register index = dst.index();
+
+  int scale = dst.scale();
+  int disp  = dst.disp();
+
+  if (index != noreg) {
+    if ( Assembler::is_simm16(disp) ) {
+      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+        if (scale == 0) {
+          gsswx(src, base, index, disp);
+        } else {
+          assert_different_registers(rt, AT);
+          dsll(AT, index, scale);
+          gsswx(src, base, AT, disp);
+        }
+      } else {
+        assert_different_registers(rt, AT);
+        if (scale == 0) {
+          daddu(AT, base, index);
+        } else {
+          dsll(AT, index, scale);
+          daddu(AT, base, AT);
+        }
+        sw(src, AT, disp);
+      }
+    } else {
+      assert_different_registers(rt, AT);
+      if (scale == 0) {
+        lui(AT, split_low(disp >> 16));
+        if (split_low(disp)) ori(AT, AT, split_low(disp));
+        daddu(AT, AT, base);
+        if (UseLEXT1) {
+          gsswx(src, AT, index, 0);
+        } else {
+          daddu(AT, AT, index);
+          sw(src, AT, 0);
+        }
+      } else {
+        daddiu(SP, SP, -wordSize);
+        sd(T9, SP, 0);
+
+        dsll(AT, index, scale);
+        daddu(AT, base, AT);
+        lui(T9, split_low(disp >> 16));
+        if (split_low(disp)) ori(T9, T9, split_low(disp));
+        daddu(AT, AT, T9);
+        ld(T9, SP, 0);
+        daddiu(SP, SP, wordSize);
+        sw(src, AT, 0);
+      }
+    }
+  } else {
+    if (Assembler::is_simm16(disp)) {
+      sw(src, base, disp);
+    } else {
+      assert_different_registers(rt, AT);
+      lui(AT, split_low(disp >> 16));
+      if (split_low(disp)) ori(AT, AT, split_low(disp));
+
+      if (UseLEXT1) {
+        gsswx(src, base, AT, 0);
+      } else {
+        daddu(AT, base, AT);
+        sw(src, AT, 0);
+      }
+    }
+  }
+}
+
+void Assembler::swl(Register rt, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  swl(rt, dst.base(), dst.disp());
+}
+
+void Assembler::swr(Register rt, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  swr(rt, dst.base(), dst.disp());
+}
+
+void Assembler::lwc1(FloatRegister rt, Address src) {
+  assert(src.index() == NOREG, "index is unimplemented");
+  lwc1(rt, src.base(), src.disp());
+}
+
+void Assembler::ldc1(FloatRegister rt, Address src) {
+  assert(src.index() == NOREG, "index is unimplemented");
+  ldc1(rt, src.base(), src.disp());
+}
+
+void Assembler::swc1(FloatRegister rt, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  swc1(rt, dst.base(), dst.disp());
+}
+
+void Assembler::sdc1(FloatRegister rt, Address dst) {
+  assert(dst.index() == NOREG, "index is unimplemented");
+  sdc1(rt, dst.base(), dst.disp());
+}
+
+void Assembler::j(address entry) {
+  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
+  emit_long((j_op<<26) | dest);
+  has_delay_slot();
+}
+
+void Assembler::jal(address entry) {
+  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
+  emit_long((jal_op<<26) | dest);
+  has_delay_slot();
+}
+
+void Assembler::emit_long(int x) { // shadows AbstractAssembler::emit_long
+  check_delay();
+  AbstractAssembler::emit_int32(x);
+}
+
+inline void Assembler::emit_data(int x) { emit_long(x); }
+inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
+  relocate(rtype);
+  emit_long(x);
+}
+
+inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
+  relocate(rspec);
+  emit_long(x);
+}
+
+inline void Assembler::check_delay() {
+#ifdef CHECK_DELAY
+  guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot");
+  delay_state = no_delay;
+#endif
+}
diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.hpp b/hotspot/src/cpu/mips/vm/assembler_mips.hpp
new file mode 100644
index 00000000000..e91b9db222b
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/assembler_mips.hpp
@@ -0,0 +1,1789 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
+#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
+
+#include "asm/register.hpp"
+
+class BiasedLockingCounters;
+
+
+// Note: A register location is represented via a Register, not
+//       via an address for efficiency & simplicity reasons.
+
+class ArrayAddress;
+
+class Address VALUE_OBJ_CLASS_SPEC {
+ public:
+  enum ScaleFactor {
+    no_scale = -1,
+    times_1  =  0,
+    times_2  =  1,
+    times_4  =  2,
+    times_8  =  3,
+    times_ptr = times_8
+  };
+  static ScaleFactor times(int size) {
+    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
+    if (size == 8)  return times_8;
+    if (size == 4)  return times_4;
+    if (size == 2)  return times_2;
+    return times_1;
+  }
+
+ private:
+  Register         _base;
+  Register         _index;
+  ScaleFactor      _scale;
+  int              _disp;
+  RelocationHolder _rspec;
+
+  // Easily misused constructors make them private
+  Address(address loc, RelocationHolder spec);
+  Address(int disp, address loc, relocInfo::relocType rtype);
+  Address(int disp, address loc, RelocationHolder spec);
+
+ public:
+
+  // creation
+  Address()
+    : _base(noreg),
+      _index(noreg),
+      _scale(no_scale),
+      _disp(0) {
+  }
+
+  // No default displacement otherwise Register can be implicitly
+  // converted to 0(Register) which is quite a different animal.
+
+  Address(Register base, int disp = 0)
+    : _base(base),
+      _index(noreg),
+      _scale(no_scale),
+      _disp(disp) {
+    assert_different_registers(_base, AT);
+  }
+
+  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
+    : _base (base),
+      _index(index),
+      _scale(scale),
+      _disp (disp) {
+    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
+    assert_different_registers(_base, _index, AT);
+  }
+
+  // The following two overloads are used in connection with the
+  // ByteSize type (see sizes.hpp).  They simplify the use of
+  // ByteSize'd arguments in assembly code. Note that their equivalent
+  // for the optimized build are the member functions with int disp
+  // argument since ByteSize is mapped to an int type in that case.
+  //
+  // Note: DO NOT introduce similar overloaded functions for WordSize
+  // arguments as in the optimized mode, both ByteSize and WordSize
+  // are mapped to the same type and thus the compiler cannot make a
+  // distinction anymore (=> compiler errors).
+
+#ifdef ASSERT
+  Address(Register base, ByteSize disp)
+    : _base(base),
+      _index(noreg),
+      _scale(no_scale),
+      _disp(in_bytes(disp)) {
+    assert_different_registers(_base, AT);
+  }
+
+  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
+    : _base(base),
+      _index(index),
+      _scale(scale),
+      _disp(in_bytes(disp)) {
+    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
+    assert_different_registers(_base, _index, AT);
+  }
+#endif // ASSERT
+
+  // accessors
+  bool        uses(Register reg) const { return _base == reg || _index == reg; }
+  Register    base()             const { return _base;  }
+  Register    index()            const { return _index; }
+  ScaleFactor scale()            const { return _scale; }
+  int         disp()             const { return _disp;  }
+
+  static Address make_array(ArrayAddress);
+
+  friend class Assembler;
+  friend class MacroAssembler;
+  friend class LIR_Assembler; // base/index/scale/disp
+};
+
+// Calling convention
+class Argument VALUE_OBJ_CLASS_SPEC {
+ private:
+  int _number;
+ public:
+  enum {
+    n_register_parameters = 8,   // 8 integer registers used to pass parameters
+    n_float_register_parameters = 8   // 8 float registers used to pass parameters
+  };
+
+  Argument(int number):_number(number){ }
+  Argument successor() {return Argument(number() + 1);}
+
+  int number()const {return _number;}
+  bool is_Register()const {return _number < n_register_parameters;}
+  bool is_FloatRegister()const {return _number < n_float_register_parameters;}
+
+  Register as_Register()const {
+    assert(is_Register(), "must be a register argument");
+    return ::as_Register(RA0->encoding() + _number);
+  }
+  FloatRegister  as_FloatRegister()const {
+    assert(is_FloatRegister(), "must be a float register argument");
+    return ::as_FloatRegister(F12->encoding() + _number);
+  }
+
+  Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);}
+};
+
+//
+// AddressLiteral has been split out from Address because operands of this type
+// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
+// the few instructions that need to deal with address literals are unique and the
+// MacroAssembler does not have to implement every instruction in the Assembler
+// in order to search for address literals that may need special handling depending
+// on the instruction and the platform. As small step on the way to merging i486/amd64
+// directories.
+//
+class AddressLiteral VALUE_OBJ_CLASS_SPEC {
+  friend class ArrayAddress;
+  RelocationHolder _rspec;
+  // Typically we use AddressLiterals we want to use their rval
+  // However in some situations we want the lval (effect address) of the item.
+  // We provide a special factory for making those lvals.
+  bool _is_lval;
+
+  // If the target is far we'll need to load the ea of this to
+  // a register to reach it. Otherwise if near we can do rip
+  // relative addressing.
+
+  address          _target;
+
+ protected:
+  // creation
+  AddressLiteral()
+    : _is_lval(false),
+      _target(NULL)
+  {}
+
+  public:
+
+
+  AddressLiteral(address target, relocInfo::relocType rtype);
+
+  AddressLiteral(address target, RelocationHolder const& rspec)
+    : _rspec(rspec),
+      _is_lval(false),
+      _target(target)
+  {}
+  // 32-bit complains about a multiple declaration for int*.
+  AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _target((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral addr() {
+    AddressLiteral ret = *this;
+    ret._is_lval = true;
+    return ret;
+  }
+
+
+ private:
+
+  address target() { return _target; }
+  bool is_lval() { return _is_lval; }
+
+  relocInfo::relocType reloc() const { return _rspec.type(); }
+  const RelocationHolder& rspec() const { return _rspec; }
+
+  friend class Assembler;
+  friend class MacroAssembler;
+  friend class Address;
+  friend class LIR_Assembler;
+  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
+    switch (rtype) {
+      case relocInfo::external_word_type:
+        return external_word_Relocation::spec(addr);
+      case relocInfo::internal_word_type:
+        return internal_word_Relocation::spec(addr);
+      case relocInfo::opt_virtual_call_type:
+        return opt_virtual_call_Relocation::spec();
+      case relocInfo::static_call_type:
+        return static_call_Relocation::spec();
+      case relocInfo::runtime_call_type:
+        return runtime_call_Relocation::spec();
+      case relocInfo::poll_type:
+      case relocInfo::poll_return_type:
+        return Relocation::spec_simple(rtype);
+      case relocInfo::none:
+      case relocInfo::oop_type:
+        // Oops are a special case. Normally they would be their own section
+        // but in cases like icBuffer they are literals in the code stream that
+        // we don't have a section for. We use none so that we get a literal address
+        // which is always patchable.
+        return RelocationHolder();
+      default:
+        ShouldNotReachHere();
+        return RelocationHolder();
+    }
+  }
+
+};
+
+// Convience classes
+class RuntimeAddress: public AddressLiteral {
+
+ public:
+
+  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
+
+};
+
+class OopAddress: public AddressLiteral {
+
+ public:
+
+  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
+
+};
+
+class ExternalAddress: public AddressLiteral {
+
+ public:
+
+  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
+
+};
+
+class InternalAddress: public AddressLiteral {
+
+ public:
+
+  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
+
+};
+
+// x86 can do array addressing as a single operation since disp can be an absolute
+// address amd64 can't. We create a class that expresses the concept but does extra
+// magic on amd64 to get the final result
+
+class ArrayAddress VALUE_OBJ_CLASS_SPEC {
+  private:
+
+  AddressLiteral _base;
+  Address        _index;
+
+  public:
+
+  ArrayAddress() {};
+  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
+  AddressLiteral base() { return _base; }
+  Address index() { return _index; }
+
+};
+
+const int FPUStateSizeInWords = 512 / wordSize;
+
+// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction
+// level ; i.e., what you write is what you get. The Assembler is generating code into
+// a CodeBuffer.
+
+class Assembler : public AbstractAssembler  {
+  friend class AbstractAssembler; // for the non-virtual hack
+  friend class LIR_Assembler; // as_Address()
+  friend class StubGenerator;
+
+ public:
+  enum Condition {
+    zero         ,
+    notZero      ,
+    equal        ,
+    notEqual     ,
+    less         ,
+    lessEqual    ,
+    greater      ,
+    greaterEqual ,
+    below        ,
+    belowEqual   ,
+    above        ,
+    aboveEqual
+  };
+
+  static const int LogInstructionSize = 2;
+  static const int InstructionSize    = 1 << LogInstructionSize;
+
+  // opcode, highest 6 bits: bits[31...26]
+  enum ops {
+    special_op  = 0x00, // special_ops
+    regimm_op   = 0x01, // regimm_ops
+    j_op        = 0x02,
+    jal_op      = 0x03,
+    beq_op      = 0x04,
+    bne_op      = 0x05,
+    blez_op     = 0x06,
+    bgtz_op     = 0x07,
+    addiu_op    = 0x09,
+    slti_op     = 0x0a,
+    sltiu_op    = 0x0b,
+    andi_op     = 0x0c,
+    ori_op      = 0x0d,
+    xori_op     = 0x0e,
+    lui_op      = 0x0f,
+    cop0_op     = 0x10, // cop0_ops
+    cop1_op     = 0x11, // cop1_ops
+    gs_cop2_op  = 0x12, // gs_cop2_ops
+    cop1x_op    = 0x13, // cop1x_ops
+    beql_op     = 0x14,
+    bnel_op     = 0x15,
+    blezl_op    = 0x16,
+    bgtzl_op    = 0x17,
+    daddiu_op   = 0x19,
+    ldl_op      = 0x1a,
+    ldr_op      = 0x1b,
+    special2_op = 0x1c, // special2_ops
+    msa_op      = 0x1e, // msa_ops
+    special3_op = 0x1f, // special3_ops
+    lb_op       = 0x20,
+    lh_op       = 0x21,
+    lwl_op      = 0x22,
+    lw_op       = 0x23,
+    lbu_op      = 0x24,
+    lhu_op      = 0x25,
+    lwr_op      = 0x26,
+    lwu_op      = 0x27,
+    sb_op       = 0x28,
+    sh_op       = 0x29,
+    swl_op      = 0x2a,
+    sw_op       = 0x2b,
+    sdl_op      = 0x2c,
+    sdr_op      = 0x2d,
+    swr_op      = 0x2e,
+    cache_op    = 0x2f,
+    ll_op       = 0x30,
+    lwc1_op     = 0x31,
+    gs_lwc2_op  = 0x32, //gs_lwc2_ops
+    pref_op     = 0x33,
+    lld_op      = 0x34,
+    ldc1_op     = 0x35,
+    gs_ldc2_op  = 0x36, //gs_ldc2_ops
+    ld_op       = 0x37,
+    sc_op       = 0x38,
+    swc1_op     = 0x39,
+    gs_swc2_op  = 0x3a, //gs_swc2_ops
+    scd_op      = 0x3c,
+    sdc1_op     = 0x3d,
+    gs_sdc2_op  = 0x3e, //gs_sdc2_ops
+    sd_op       = 0x3f
+  };
+
+  static  const char *ops_name[];
+
+  //special family, the opcode is in low 6 bits.
+  enum special_ops {
+    sll_op       = 0x00,
+    movci_op     = 0x01,
+    srl_op       = 0x02,
+    sra_op       = 0x03,
+    sllv_op      = 0x04,
+    srlv_op      = 0x06,
+    srav_op      = 0x07,
+    jr_op        = 0x08,
+    jalr_op      = 0x09,
+    movz_op      = 0x0a,
+    movn_op      = 0x0b,
+    syscall_op   = 0x0c,
+    break_op     = 0x0d,
+    sync_op      = 0x0f,
+    mfhi_op      = 0x10,
+    mthi_op      = 0x11,
+    mflo_op      = 0x12,
+    mtlo_op      = 0x13,
+    dsllv_op     = 0x14,
+    dsrlv_op     = 0x16,
+    dsrav_op     = 0x17,
+    mult_op      = 0x18,
+    multu_op     = 0x19,
+    div_op       = 0x1a,
+    divu_op      = 0x1b,
+    dmult_op     = 0x1c,
+    dmultu_op    = 0x1d,
+    ddiv_op      = 0x1e,
+    ddivu_op     = 0x1f,
+    addu_op      = 0x21,
+    subu_op      = 0x23,
+    and_op       = 0x24,
+    or_op        = 0x25,
+    xor_op       = 0x26,
+    nor_op       = 0x27,
+    slt_op       = 0x2a,
+    sltu_op      = 0x2b,
+    daddu_op     = 0x2d,
+    dsubu_op     = 0x2f,
+    tge_op       = 0x30,
+    tgeu_op      = 0x31,
+    tlt_op       = 0x32,
+    tltu_op      = 0x33,
+    teq_op       = 0x34,
+    tne_op       = 0x36,
+    dsll_op      = 0x38,
+    dsrl_op      = 0x3a,
+    dsra_op      = 0x3b,
+    dsll32_op    = 0x3c,
+    dsrl32_op    = 0x3e,
+    dsra32_op    = 0x3f
+  };
+
+  static  const char* special_name[];
+
+  //regimm family, the opcode is in rt[16...20], 5 bits
+  enum regimm_ops {
+    bltz_op      = 0x00,
+    bgez_op      = 0x01,
+    bltzl_op     = 0x02,
+    bgezl_op     = 0x03,
+    tgei_op      = 0x08,
+    tgeiu_op     = 0x09,
+    tlti_op      = 0x0a,
+    tltiu_op     = 0x0b,
+    teqi_op      = 0x0c,
+    tnei_op      = 0x0e,
+    bltzal_op    = 0x10,
+    bgezal_op    = 0x11,
+    bltzall_op   = 0x12,
+    bgezall_op   = 0x13,
+    bposge32_op  = 0x1c,
+    bposge64_op  = 0x1d,
+    synci_op     = 0x1f,
+  };
+
+  static  const char* regimm_name[];
+
+  //cop0 family, the ops is in bits[25...21], 5 bits
+  enum cop0_ops {
+    mfc0_op     = 0x00,
+    dmfc0_op    = 0x01,
+    //
+    mxgc0_op    = 0x03, //MFGC0, DMFGC0, MTGC0
+    mtc0_op     = 0x04,
+    dmtc0_op    = 0x05,
+    rdpgpr_op   = 0x0a,
+    inter_op    = 0x0b,
+    wrpgpr_op   = 0x0c
+  };
+
+  //cop1 family, the ops is in bits[25...21], 5 bits
+  enum cop1_ops {
+    mfc1_op     = 0x00,
+    dmfc1_op    = 0x01,
+    cfc1_op     = 0x02,
+    mfhc1_op    = 0x03,
+    mtc1_op     = 0x04,
+    dmtc1_op    = 0x05,
+    ctc1_op     = 0x06,
+    mthc1_op    = 0x07,
+    bc1f_op     = 0x08,
+    single_fmt  = 0x10,
+    double_fmt  = 0x11,
+    word_fmt    = 0x14,
+    long_fmt    = 0x15,
+    ps_fmt      = 0x16
+  };
+
+
+  //2 bist (bits[17...16]) of bc1x instructions (cop1)
+  enum bc_ops {
+    bcf_op       = 0x0,
+    bct_op       = 0x1,
+    bcfl_op      = 0x2,
+    bctl_op      = 0x3,
+  };
+
+  // low 6 bits of c_x_fmt instructions (cop1)
+  enum c_conds {
+    f_cond       = 0x30,
+    un_cond      = 0x31,
+    eq_cond      = 0x32,
+    ueq_cond     = 0x33,
+    olt_cond     = 0x34,
+    ult_cond     = 0x35,
+    ole_cond     = 0x36,
+    ule_cond     = 0x37,
+    sf_cond      = 0x38,
+    ngle_cond    = 0x39,
+    seq_cond     = 0x3a,
+    ngl_cond     = 0x3b,
+    lt_cond      = 0x3c,
+    nge_cond     = 0x3d,
+    le_cond      = 0x3e,
+    ngt_cond     = 0x3f
+  };
+
+  // low 6 bits of cop1 instructions
+  enum float_ops {
+    fadd_op      = 0x00,
+    fsub_op      = 0x01,
+    fmul_op      = 0x02,
+    fdiv_op      = 0x03,
+    fsqrt_op     = 0x04,
+    fabs_op      = 0x05,
+    fmov_op      = 0x06,
+    fneg_op      = 0x07,
+    froundl_op   = 0x08,
+    ftruncl_op   = 0x09,
+    fceill_op    = 0x0a,
+    ffloorl_op   = 0x0b,
+    froundw_op   = 0x0c,
+    ftruncw_op   = 0x0d,
+    fceilw_op    = 0x0e,
+    ffloorw_op   = 0x0f,
+    movf_f_op    = 0x11,
+    movt_f_op    = 0x11,
+    movz_f_op    = 0x12,
+    movn_f_op    = 0x13,
+    frecip_op    = 0x15,
+    frsqrt_op    = 0x16,
+    fcvts_op     = 0x20,
+    fcvtd_op     = 0x21,
+    fcvtw_op     = 0x24,
+    fcvtl_op     = 0x25,
+    fcvtps_op    = 0x26,
+    fcvtspl_op   = 0x28,
+    fpll_op      = 0x2c,
+    fplu_op      = 0x2d,
+    fpul_op      = 0x2e,
+    fpuu_op      = 0x2f
+  };
+
+  static const char* cop1_name[];
+
+  //cop1x family, the opcode is in low 6 bits.
+  enum cop1x_ops {
+    lwxc1_op    = 0x00,
+    ldxc1_op    = 0x01,
+    luxc1_op    = 0x05,
+    swxc1_op    = 0x08,
+    sdxc1_op    = 0x09,
+    suxc1_op    = 0x0d,
+    prefx_op    = 0x0f,
+
+    alnv_ps_op  = 0x1e,
+    madd_s_op   = 0x20,
+    madd_d_op   = 0x21,
+    madd_ps_op  = 0x26,
+    msub_s_op   = 0x28,
+    msub_d_op   = 0x29,
+    msub_ps_op  = 0x2e,
+    nmadd_s_op  = 0x30,
+    nmadd_d_op  = 0x31,
+    nmadd_ps_op = 0x36,
+    nmsub_s_op  = 0x38,
+    nmsub_d_op  = 0x39,
+    nmsub_ps_op = 0x3e
+  };
+
+  static const char* cop1x_name[];
+
+  //special2 family, the opcode is in low 6 bits.
+  enum special2_ops {
+    madd_op       = 0x00,
+    maddu_op      = 0x01,
+    mul_op        = 0x02,
+    gs0x03_op     = 0x03,
+    msub_op       = 0x04,
+    msubu_op      = 0x05,
+    gs0x06_op     = 0x06,
+    gsemul2_op    = 0x07,
+    gsemul3_op    = 0x08,
+    gsemul4_op    = 0x09,
+    gsemul5_op    = 0x0a,
+    gsemul6_op    = 0x0b,
+    gsemul7_op    = 0x0c,
+    gsemul8_op    = 0x0d,
+    gsemul9_op    = 0x0e,
+    gsemul10_op   = 0x0f,
+    gsmult_op     = 0x10,
+    gsdmult_op    = 0x11,
+    gsmultu_op    = 0x12,
+    gsdmultu_op   = 0x13,
+    gsdiv_op      = 0x14,
+    gsddiv_op     = 0x15,
+    gsdivu_op     = 0x16,
+    gsddivu_op    = 0x17,
+    gsmod_op      = 0x1c,
+    gsdmod_op     = 0x1d,
+    gsmodu_op     = 0x1e,
+    gsdmodu_op    = 0x1f,
+    clz_op        = 0x20,
+    clo_op        = 0x21,
+    xctx_op       = 0x22, //ctz, cto, dctz, dcto, gsX
+    gsrxr_x_op    = 0x23, //gsX
+    dclz_op       = 0x24,
+    dclo_op       = 0x25,
+    gsle_op       = 0x26,
+    gsgt_op       = 0x27,
+    gs86j_op      = 0x28,
+    gsloop_op     = 0x29,
+    gsaj_op       = 0x2a,
+    gsldpc_op     = 0x2b,
+    gs86set_op    = 0x30,
+    gstm_op       = 0x31,
+    gscvt_ld_op   = 0x32,
+    gscvt_ud_op   = 0x33,
+    gseflag_op    = 0x34,
+    gscam_op      = 0x35,
+    gstop_op      = 0x36,
+    gssettag_op   = 0x37,
+    gssdbbp_op    = 0x38
+  };
+
+  static  const char* special2_name[];
+
+  // special3 family, the opcode is in low 6 bits.
+  enum special3_ops {
+    ext_op         = 0x00,
+    dextm_op       = 0x01,
+    dextu_op       = 0x02,
+    dext_op        = 0x03,
+    ins_op         = 0x04,
+    dinsm_op       = 0x05,
+    dinsu_op       = 0x06,
+    dins_op        = 0x07,
+    lxx_op         = 0x0a, //lwx, lhx, lbux, ldx
+    insv_op        = 0x0c,
+    dinsv_op       = 0x0d,
+    ar1_op         = 0x10, //MIPS DSP
+    cmp1_op        = 0x11, //MIPS DSP
+    re1_op         = 0x12, //MIPS DSP, re1_ops
+    sh1_op         = 0x13, //MIPS DSP
+    ar2_op         = 0x14, //MIPS DSP
+    cmp2_op        = 0x15, //MIPS DSP
+    re2_op         = 0x16, //MIPS DSP, re2_ops
+    sh2_op         = 0x17, //MIPS DSP
+    ar3_op         = 0x18, //MIPS DSP
+    bshfl_op       = 0x20  //seb, seh
+  };
+
+  // re1_ops
+  enum re1_ops {
+    absq_s_qb_op = 0x01,
+    repl_qb_op   = 0x02,
+    replv_qb_op  = 0x03,
+    absq_s_ph_op = 0x09,
+    repl_ph_op   = 0x0a,
+    replv_ph_op  = 0x0b,
+    absq_s_w_op  = 0x11,
+    bitrev_op    = 0x1b
+  };
+
+  // re2_ops
+  enum re2_ops {
+    repl_ob_op   = 0x02,
+    replv_ob_op  = 0x03,
+    absq_s_qh_op = 0x09,
+    repl_qh_op   = 0x0a,
+    replv_qh_op  = 0x0b,
+    absq_s_pw_op = 0x11,
+    repl_pw_op   = 0x12,
+    replv_pw_op  = 0x13,
+  };
+
+  static  const char* special3_name[];
+
+  // lwc2/gs_lwc2 family, the opcode is in low 6 bits.
+  enum gs_lwc2_ops {
+    gslble_op       = 0x10,
+    gslbgt_op       = 0x11,
+    gslhle_op       = 0x12,
+    gslhgt_op       = 0x13,
+    gslwle_op       = 0x14,
+    gslwgt_op       = 0x15,
+    gsldle_op       = 0x16,
+    gsldgt_op       = 0x17,
+    gslwlec1_op     = 0x1c,
+    gslwgtc1_op     = 0x1d,
+    gsldlec1_op     = 0x1e,
+    gsldgtc1_op     = 0x1f,
+    gslq_op         = 0x20
+  };
+
+  static const char* gs_lwc2_name[];
+
+  // ldc2/gs_ldc2 family, the opcode is in low 3 bits.
+  enum gs_ldc2_ops {
+    gslbx_op        =  0x0,
+    gslhx_op        =  0x1,
+    gslwx_op        =  0x2,
+    gsldx_op        =  0x3,
+    gslwxc1_op      =  0x6,
+    gsldxc1_op      =  0x7
+  };
+
+  static const char* gs_ldc2_name[];
+
+  // swc2/gs_swc2 family, the opcode is in low 6 bits.
+  enum gs_swc2_ops {
+    gssble_op       = 0x10,
+    gssbgt_op       = 0x11,
+    gsshle_op       = 0x12,
+    gsshgt_op       = 0x13,
+    gsswle_op       = 0x14,
+    gsswgt_op       = 0x15,
+    gssdle_op       = 0x16,
+    gssdgt_op       = 0x17,
+    gsswlec1_op     = 0x1c,
+    gsswgtc1_op     = 0x1d,
+    gssdlec1_op     = 0x1e,
+    gssdgtc1_op     = 0x1f,
+    gssq_op         = 0x20
+  };
+
+  static const char* gs_swc2_name[];
+
+  // sdc2/gs_sdc2 family, the opcode is in low 3 bits.
+  enum gs_sdc2_ops {
+    gssbx_op        =  0x0,
+    gsshx_op        =  0x1,
+    gsswx_op        =  0x2,
+    gssdx_op        =  0x3,
+    gsswxc1_op      =  0x6,
+    gssdxc1_op      =  0x7
+  };
+
+  static const char* gs_sdc2_name[];
+
+  enum WhichOperand {
+    // input to locate_operand, and format code for relocations
+    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
+    disp32_operand = 1,          // embedded 32-bit displacement or address
+    call32_operand = 2,          // embedded 32-bit self-relative displacement
+    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
+    _WhichOperand_limit = 4
+  };
+
+  static int opcode(int insn) { return (insn>>26)&0x3f; }
+  static int rs(int insn) { return (insn>>21)&0x1f; }
+  static int rt(int insn) { return (insn>>16)&0x1f; }
+  static int rd(int insn) { return (insn>>11)&0x1f; }
+  static int sa(int insn) { return (insn>>6)&0x1f; }
+  static int special(int insn) { return insn&0x3f; }
+  static int imm_off(int insn) { return (short)low16(insn); }
+
+  static int low  (int x, int l) { return bitfield(x, 0, l); }
+  static int low16(int x)        { return low(x, 16); }
+  static int low26(int x)        { return low(x, 26); }
+
+ protected:
+  //help methods for instruction ejection
+
+  // I-Type (Immediate)
+  // 31        26 25        21 20      16 15                              0
+  //|   opcode   |      rs    |    rt    |            immediat             |
+  //|            |            |          |                                 |
+  //      6              5          5                     16
+  static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); }
+
+  // R-Type (Register)
+  // 31         26 25        21 20      16 15      11 10         6 5         0
+  //|   special   |      rs    |    rt    |    rd    |     0      |   opcode  |
+  //| 0 0 0 0 0 0 |            |          |          | 0 0 0 0 0  |           |
+  //      6              5          5           5          5            6
+  static int insn_RRRO(int rs, int rt, int rd,   int op) { return (rs<<21) | (rt<<16) | (rd<<11)  | op; }
+  static int insn_RRSO(int rt, int rd, int sa,   int op) { return (rt<<16) | (rd<<11) | (sa<<6)   | op; }
+  static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; }
+
+  static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); }
+  static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); }
+
+  static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) {
+    return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
+  }
+  static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) {
+    return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
+  }
+
+  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
+  static int high16(int x)        { return high(x, 16); }
+  static int high6 (int x)        { return high(x, 6); }
+
+  //get the offset field of jump/branch instruction
+  int offset(address entry) {
+    assert(is_simm16((entry - pc() - 4) / 4), "change this code");
+    if (!is_simm16((entry - pc() - 4) / 4)) {
+      tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4);
+    }
+    return (entry - pc() - 4) / 4;
+  }
+
+
+public:
+  using AbstractAssembler::offset;
+
+  //sign expand with the sign bit is h
+  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
+
+  // If x is a mask, return the number of one-bit in x.
+  // else return -1.
+  static int is_int_mask(int x);
+
+  // If x is a mask, return the number of one-bit in x.
+  // else return -1.
+  static int is_jlong_mask(jlong x);
+
+  // MIPS lui/addiu is both sign extended, so if you wan't to use off32/imm32, you have to use the follow three
+  static int split_low(int x) {
+    return (x & 0xffff);
+  }
+
+  // Convert 16-bit x to a sign-extended 16-bit integer
+  static int simm16(int x) {
+    assert(x == (x & 0xFFFF), "must be 16-bit only");
+    return (x << 16) >> 16;
+  }
+
+  static int split_high(int x) {
+    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
+  }
+
+  static int merge(int low, int high) {
+    return expand(low, 15) + (high<<16);
+  }
+
+  static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) {
+    return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0;
+  }
+
+  // Test if x is within signed immediate range for nbits.
+  static bool is_simm  (int x, int nbits) {
+    assert(0 < nbits && nbits < 32, "out of bounds");
+    const int   min      = -( ((int)1) << nbits-1 );
+    const int   maxplus1 =  ( ((int)1) << nbits-1 );
+    return min <= x && x < maxplus1;
+  }
+
+  static bool is_simm(jlong x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 64, "out of bounds");
+    const jlong min      = -( ((jlong)1) << nbits-1 );
+    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
+    return min <= x && x < maxplus1;
+  }
+
+  // Test if x is within unsigned immediate range for nbits
+  static bool is_uimm(int x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 32, "out of bounds");
+    const int   maxplus1 = ( ((int)1) << nbits );
+    return 0 <= x && x < maxplus1;
+  }
+
+  static bool is_uimm(jlong x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 64, "out of bounds");
+    const jlong maxplus1 =  ( ((jlong)1) << nbits );
+    return 0 <= x && x < maxplus1;
+  }
+
+  static bool is_simm16(int x)            { return is_simm(x, 16); }
+  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
+
+  static bool fit_in_jal(address target, address pc) {
+    intptr_t mask = 0xfffffffff0000000;
+    return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask);
+  }
+
+  bool fit_int_branch(address entry) {
+    return is_simm16(offset(entry));
+  }
+
+protected:
+#ifdef ASSERT
+    #define CHECK_DELAY
+#endif
+#ifdef CHECK_DELAY
+  enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state;
+#endif
+
+public:
+  void assert_not_delayed() {
+#ifdef CHECK_DELAY
+    assert_not_delayed("next instruction should not be a delay slot");
+#endif
+  }
+
+  void assert_not_delayed(const char* msg) {
+#ifdef CHECK_DELAY
+    assert(delay_state == no_delay, msg);
+#endif
+  }
+
+protected:
+  // Delay slot helpers
+  // cti is called when emitting control-transfer instruction,
+  // BEFORE doing the emitting.
+  // Only effective when assertion-checking is enabled.
+
+  // called when emitting cti with a delay slot, AFTER emitting
+  void has_delay_slot() {
+#ifdef CHECK_DELAY
+    assert_not_delayed("just checking");
+    delay_state = at_delay_slot;
+#endif
+  }
+
+public:
+  Assembler* delayed() {
+#ifdef CHECK_DELAY
+    guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot");
+    delay_state = filling_delay_slot;
+#endif
+    return this;
+  }
+
+  void flush() {
+#ifdef CHECK_DELAY
+    guarantee( delay_state == no_delay, "ending code with a delay slot");
+#endif
+    AbstractAssembler::flush();
+  }
+
+  void emit_long(int);  // shadows AbstractAssembler::emit_long
+  void emit_data(int);
+  void emit_data(int, RelocationHolder const&);
+  void emit_data(int, relocInfo::relocType rtype);
+  void check_delay();
+
+
+  // Generic instructions
+  // Does 32bit or 64bit as needed for the platform. In some sense these
+  // belong in macro assembler but there is no need for both varieties to exist
+
+  void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); }
+  void addiu32(Register rt, Register rs, int imm)   { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
+  void addiu(Register rt, Register rs, int imm)     { daddiu (rt, rs, imm);}
+  void addu(Register rd, Register rs, Register rt)  { daddu  (rd, rs, rt);  }
+
+  void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); }
+  void andi(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
+
+  void beq    (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
+  void beql   (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
+  void bgez   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); }
+  void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); }
+  void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); }
+  void bgezl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); }
+  void bgtz   (Register rs, int off) { emit_long(insn_ORRI(bgtz_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
+  void bgtzl  (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
+  void blez   (Register rs, int off) { emit_long(insn_ORRI(blez_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
+  void blezl  (Register rs, int off) { emit_long(insn_ORRI(blezl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
+  void bltz   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); }
+  void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); }
+  void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); }
+  void bltzl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); }
+  void bne    (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op,  (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
+  void bnel   (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
+  // two versions of brk:
+  // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set
+  // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27)
+  // both versions work
+  void brk    (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); }
+  void brk    (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); }
+
+  void beq    (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); }
+  void beql   (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));}
+  void bgez   (Register rs, address entry) { bgez   (rs, offset(entry)); }
+  void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); }
+  void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); }
+  void bgezl  (Register rs, address entry) { bgezl  (rs, offset(entry)); }
+  void bgtz   (Register rs, address entry) { bgtz   (rs, offset(entry)); }
+  void bgtzl  (Register rs, address entry) { bgtzl  (rs, offset(entry)); }
+  void blez   (Register rs, address entry) { blez   (rs, offset(entry)); }
+  void blezl  (Register rs, address entry) { blezl  (rs, offset(entry)); }
+  void bltz   (Register rs, address entry) { bltz   (rs, offset(entry)); }
+  void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); }
+  void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); }
+  void bltzl  (Register rs, address entry) { bltzl  (rs, offset(entry)); }
+  void bne    (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); }
+  void bnel   (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); }
+
+  void beq    (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); }
+  void beql   (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); }
+  void bgez   (Register rs, Label& L){ bgez   (rs, target(L)); }
+  void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); }
+  void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); }
+  void bgezl  (Register rs, Label& L){ bgezl  (rs, target(L)); }
+  void bgtz   (Register rs, Label& L){ bgtz   (rs, target(L)); }
+  void bgtzl  (Register rs, Label& L){ bgtzl  (rs, target(L)); }
+  void blez   (Register rs, Label& L){ blez   (rs, target(L)); }
+  void blezl  (Register rs, Label& L){ blezl  (rs, target(L)); }
+  void bltz   (Register rs, Label& L){ bltz   (rs, target(L)); }
+  void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); }
+  void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); }
+  void bltzl  (Register rs, Label& L){ bltzl  (rs, target(L)); }
+  void bne    (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); }
+  void bnel   (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); }
+
+  void daddiu(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
+  void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); }
+  void ddiv  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op));  }
+  void ddivu (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); }
+
+  void movz  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movz_op)); }
+  void movn  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movn_op)); }
+
+  void movt  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); }
+  void movf  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); }
+
+  enum bshfl_ops {
+     seb_op = 0x10,
+     seh_op = 0x18
+  };
+  void seb  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); }
+  void seh  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); }
+
+  void ext  (Register rt, Register rs, int pos, int size) {
+     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
+     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
+     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
+
+     int lsb  = pos;
+     int msbd = size - 1;
+
+     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op);
+  }
+
+  void dext  (Register rt, Register rs, int pos, int size) {
+     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
+     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
+     guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]");
+
+     int lsb  = pos;
+     int msbd = size - 1;
+
+     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op);
+  }
+
+  void dextm (Register rt, Register rs, int pos, int size) {
+     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
+     guarantee((32 < size) && (size <= 64), "size must be in (32, 64]");
+     guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]");
+
+     int lsb  = pos;
+     int msbd = size - 1 - 32;
+
+     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op);
+  }
+
+  void rotr (Register rd, Register rt, int sa) {
+     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op);
+  }
+
+  void drotr (Register rd, Register rt, int sa) {
+     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op);
+  }
+
+  void drotr32 (Register rd, Register rt, int sa) {
+     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op);
+  }
+
+  void rotrv (Register rd, Register rt, Register rs) {
+     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op);
+  }
+
+  void drotrv (Register rd, Register rt, Register rs) {
+     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op);
+  }
+
+  void div   (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); }
+  void divu  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); }
+  void dmult (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); }
+  void dmultu(Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); }
+  void dsll  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); }
+  void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); }
+  void dsll32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); }
+  void dsra  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); }
+  void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); }
+  void dsra32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); }
+  void dsrl  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); }
+  void dsrlv (Register rd, Register rt, Register rs)  { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); }
+  void dsrl32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); }
+  void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); }
+
+  void b(int off)       { beq(R0, R0, off); }
+  void b(address entry) { b(offset(entry)); }
+  void b(Label& L)      { b(target(L)); }
+
+  void j(address entry);
+  void jal(address entry);
+
+  void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); }
+  void jalr(Register rs)              { jalr(RA, rs); }
+  void jalr()                         { jalr(RT9); }
+
+  void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); }
+  void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); }
+
+  void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
+  void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
+  void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
+  void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
+  void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
+  void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
+  void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
+  void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
+  void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); }
+  void lui(Register rt, int imm)                { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); }
+  void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
+  void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
+  void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
+  void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
+
+  void lb (Register rt, Address src);
+  void lbu(Register rt, Address src);
+  void ld (Register rt, Address src);
+  void ldl(Register rt, Address src);
+  void ldr(Register rt, Address src);
+  void lh (Register rt, Address src);
+  void lhu(Register rt, Address src);
+  void ll (Register rt, Address src);
+  void lld(Register rt, Address src);
+  void lw (Register rt, Address src);
+  void lwl(Register rt, Address src);
+  void lwr(Register rt, Address src);
+  void lwu(Register rt, Address src);
+  void lea(Register rt, Address src);
+  void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); }
+
+  void mfhi (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); }
+  void mflo (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mflo_op ); }
+  void mthi (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mthi_op ); }
+  void mtlo (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); }
+
+  void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); }
+  void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); }
+
+  void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); }
+
+  void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); }
+  void ori(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
+
+  void sb   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sb_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
+  void sc   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sc_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
+  void scd  (Register rt, Register base, int off)     { emit_long(insn_ORRI(scd_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
+  void sd   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sd_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
+  void sdl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
+  void sdr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
+  void sh   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sh_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
+  void sll  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sll_op)); }
+  void sllv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sllv_op)); }
+  void slt  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), slt_op)); }
+  void slti (Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(slti_op,  (int)rs->encoding(),   (int)rt->encoding(), imm)); }
+  void sltiu(Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(),   (int)rt->encoding(), imm)); }
+  void sltu (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sltu_op)); }
+  void sra  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sra_op)); }
+  void srav (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srav_op)); }
+  void srl  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      srl_op)); }
+  void srlv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srlv_op)); }
+
+  void subu (Register rd, Register rs,   Register rt) { dsubu (rd, rs, rt); }
+  void subu32 (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), subu_op)); }
+  void sw   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sw_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
+  void swl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
+  void swr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
+  void synci(Register base, int off)                  { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); }
+  void sync ()                                        {
+    if (os::is_ActiveCoresMP())
+      emit_long(0);
+    else
+      emit_long(sync_op);
+  }
+  void syscall(int code)                              { emit_long( (code<<6) | syscall_op ); }
+
+  void sb(Register rt, Address dst);
+  void sc(Register rt, Address dst);
+  void scd(Register rt, Address dst);
+  void sd(Register rt, Address dst);
+  void sdl(Register rt, Address dst);
+  void sdr(Register rt, Address dst);
+  void sh(Register rt, Address dst);
+  void sw(Register rt, Address dst);
+  void swl(Register rt, Address dst);
+  void swr(Register rt, Address dst);
+
+  void teq  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, teq_op)); }
+  void teqi (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); }
+  void tge  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tge_op)); }
+  void tgei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); }
+  void tgeiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); }
+  void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tgeu_op)); }
+  void tlt  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tlt_op)); }
+  void tlti (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); }
+  void tltiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); }
+  void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tltu_op)); }
+  void tne  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tne_op)); }
+  void tnei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); }
+
+  void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); }
+  void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
+
+  void nop()               { emit_long(0); }
+
+
+
+  void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
+  void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
+  void ldc1(FloatRegister ft, Address src);
+  void lwc1(FloatRegister ft, Address src);
+
+  //COP0
+  void mfc0  (Register rt, Register rd)       { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
+  void dmfc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
+  // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet
+  void mtc0  (Register rt, Register rd)       { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
+  void dmtc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
+  //COP0 end
+
+
+  //COP1
+  void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
+  void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
+  void cfc1 (Register rt, int fs)           { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); }
+  void mfhc1(Register rt, int fs)           { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); }
+  void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
+  void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
+  void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); }
+  void ctc1 (Register rt, int fs)           { emit_long(insn_COP1(ctc1_op,  (int)rt->encoding(), fs)); }
+  void mthc1(Register rt, int fs)           { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); }
+
+  void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); }
+  void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); }
+  void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); }
+  void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off));  has_delay_slot(); }
+
+  void bc1f (address entry) { bc1f(offset(entry)); }
+  void bc1fl(address entry) { bc1fl(offset(entry)); }
+  void bc1t (address entry) { bc1t(offset(entry)); }
+  void bc1tl(address entry) { bc1tl(offset(entry)); }
+
+  void bc1f (Label& L) { bc1f(target(L)); }
+  void bc1fl(Label& L) { bc1fl(target(L)); }
+  void bc1t (Label& L) { bc1t(target(L)); }
+  void bc1tl(Label& L) { bc1tl(target(L)); }
+
+//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
+#define INSN_SINGLE(r1, r2, r3, op)   \
+  { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
+  void add_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)}
+  void sub_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)}
+  void mul_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)}
+  void div_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)}
+  void sqrt_s   (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)}
+  void abs_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)}
+  void mov_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)}
+  void neg_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)}
+  void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)}
+  void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)}
+  void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)}
+  void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)}
+  void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)}
+  void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)}
+  void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)}
+  void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)}
+  //null
+  void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
+    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
+    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
+  void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
+    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
+    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
+  void movz_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)}
+  void movn_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)}
+  //null
+  void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)}
+  void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)}
+  //null
+  void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)}
+  //null
+  void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)}
+  void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)}
+  void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)}
+  //null
+  void c_f_s   (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)}
+  void c_un_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)}
+  void c_eq_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)}
+  void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)}
+  void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)}
+  void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)}
+  void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)}
+  void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)}
+  void c_sf_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)}
+  void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)}
+  void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)}
+  void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)}
+  void c_lt_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)}
+  void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)}
+  void c_le_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)}
+  void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)}
+
+#undef INSN_SINGLE
+
+
+//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags.
+#define INSN_DOUBLE(r1, r2, r3, op)   \
+  { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
+
+  void add_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)}
+  void sub_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)}
+  void mul_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)}
+  void div_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)}
+  void sqrt_d   (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)}
+  void abs_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)}
+  void mov_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)}
+  void neg_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)}
+  void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)}
+  void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)}
+  void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)}
+  void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)}
+  void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)}
+  void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)}
+  void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)}
+  void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)}
+  //null
+  void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
+    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
+    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
+  void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
+    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
+    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
+  void movz_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)}
+  void movn_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)}
+  //null
+  void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)}
+  void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)}
+  //null
+  void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)}
+  void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)}
+  //null
+  void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)}
+  //null
+  void c_f_d   (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)}
+  void c_un_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)}
+  void c_eq_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)}
+  void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)}
+  void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)}
+  void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)}
+  void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)}
+  void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)}
+  void c_sf_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)}
+  void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)}
+  void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)}
+  void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)}
+  void c_lt_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)}
+  void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)}
+  void c_le_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)}
+  void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)}
+
+#undef INSN_DOUBLE
+
+
+  //null
+  void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
+  void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
+  //null
+  void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
+  void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
+  //null
+
+
+//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
+#define INSN_PS(r1, r2, r3, op)   \
+  { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
+
+  void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)}
+  void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)}
+  void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)}
+  //null
+  void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)}
+  void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)}
+  void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)}
+  //null
+  //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")}
+  //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") }
+  void movz_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)}
+  void movn_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)}
+  //null
+  void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)}
+  //null
+  void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)}
+  //null
+  void pll_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)}
+  void plu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)}
+  void pul_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)}
+  void puu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)}
+  void c_f_ps   (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)}
+  void c_un_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)}
+  void c_eq_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)}
+  void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)}
+  void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)}
+  void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)}
+  void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)}
+  void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)}
+  void c_sf_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)}
+  void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)}
+  void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)}
+  void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)}
+  void c_lt_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)}
+  void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)}
+  void c_le_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)}
+  void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)}
+  //null
+#undef INSN_PS
+  //COP1 end
+
+
+  //COP1X
+//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
+#define INSN_COP1X(r0, r1, r2, r3, op)   \
+  { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
+  void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) }
+  void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) }
+  void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) }
+  void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) }
+  void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) }
+  void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) }
+  void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) }
+  void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) }
+  void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) }
+  void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) }
+  void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) }
+  void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) }
+#undef INSN_COP1X
+  //COP1X end
+
+  //SPECIAL2
+//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
+#define INSN_S2(op)   \
+  { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);}
+
+  void madd    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); }
+  void maddu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); }
+  void mul     (Register rd, Register rs, Register rt) { INSN_S2(mul_op)     }
+  void gsandn  (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) }
+  void msub    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); }
+  void msubu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); }
+  void gsorn   (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) }
+
+  void gsmult  (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op)  }
+  void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) }
+  void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) }
+  void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)}
+  void gsdiv   (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op)   }
+  void gsddiv  (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op)  }
+  void gsdivu  (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op)  }
+  void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) }
+  void gsmod   (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op)   }
+  void gsdmod  (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op)  }
+  void gsmodu  (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op)  }
+  void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) }
+  void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); }
+  void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); }
+  void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); }
+  void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); }
+  void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); }
+  void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); }
+  void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); }
+  void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); }
+
+#undef INSN_S2
+
+  //SPECIAL3
+/*
+// FIXME
+#define is_0_to_32(a, b) \
+  assert (a >= 0, " just a check"); \
+  assert (a <= 0, " just a check"); \
+  assert (b >= 0, " just a check"); \
+  assert (b <= 0, " just a check"); \
+  assert (a+b >= 0, " just a check"); \
+  assert (a+b <= 0, " just a check");
+  */
+#define is_0_to_32(a, b)
+
+  void ins  (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); }
+  void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); }
+  void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); }
+  void dins (Register rt, Register rs, int pos, int size) {
+     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
+     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
+     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
+
+     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op);
+  }
+
+  void repl_qb (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_qb_op  << 6 | re1_op); }
+  void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); }
+  void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_ph_op  << 6 | re1_op); }
+  void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); }
+
+  void repl_ob (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_ob_op  << 6 | re2_op); }
+  void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); }
+  void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_qh_op  << 6 | re2_op); }
+  void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); }
+  void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_pw_op  << 6 | re2_op); }
+  void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); }
+
+  void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
+  void sdc1(FloatRegister ft, Address dst);
+  void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
+  void swc1(FloatRegister ft, Address dst);
+
+
+  static void print_instruction(int);
+  int patched_branch(int dest_pos, int inst, int inst_pos);
+  int branch_destination(int inst, int pos);
+
+  // Loongson extension
+
+  // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4".
+  void gslble(Register rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op);
+  }
+
+  void gslbgt(Register rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op);
+  }
+
+  void gslhle(Register rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op);
+  }
+
+  void gslhgt(Register rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op);
+  }
+
+  void gslwle(Register rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op);
+  }
+
+  void gslwgt(Register rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op);
+  }
+
+  void gsldle(Register rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op);
+  }
+
+  void gsldgt(Register rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op);
+  }
+
+  void gslwlec1(FloatRegister rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op);
+  }
+
+  void gslwgtc1(FloatRegister rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op);
+  }
+
+  void gsldlec1(FloatRegister rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op);
+  }
+
+  void gsldgtc1(FloatRegister rt, Register base, Register bound) {
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op);
+  }
+
+  void gslq(Register rq, Register rt, Register base, int off) {
+    assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0");
+    off = off >> 4;
+    assert(is_simm(off, 9),"gslq: off exceeds 9 bits");
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
+  }
+
+  void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
+    assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0");
+    off = off >> 4;
+    assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits");
+    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
+  }
+
+  void gssble(Register rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op);
+  }
+
+  void gssbgt(Register rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op);
+  }
+
+  void gsshle(Register rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op);
+  }
+
+  void gsshgt(Register rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op);
+  }
+
+  void gsswle(Register rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op);
+  }
+
+  void gsswgt(Register rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op);
+  }
+
+  void gssdle(Register rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op);
+  }
+
+  void gssdgt(Register rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op);
+  }
+
+  void gsswlec1(FloatRegister rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op);
+  }
+
+  void gsswgtc1(FloatRegister rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op);
+  }
+
+  void gssdlec1(FloatRegister rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op);
+  }
+
+  void gssdgtc1(FloatRegister rt, Register base, Register bound) {
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op);
+  }
+
+  void gssq(Register rq, Register rt, Register base, int off) {
+    assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0");
+    off = off >> 4;
+    assert(is_simm(off, 9),"gssq: off exceeds 9 bits");
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
+  }
+
+  void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
+    assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0");
+    off = off >> 4;
+    assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits");
+    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
+  }
+
+  //LDC2 & SDC2
+#define INSN(OPS, OP) \
+    assert(is_simm(off, 8), "NAME: off exceeds 8 bits");                                           \
+    assert(UseLEXT1, "check UseLEXT1");                                                      \
+    emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |         \
+               ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP);
+
+#define INSN_LDC2(NAME, op)  \
+  void NAME(Register rt, Register base, Register index, int off) {                                 \
+    INSN(gs_ldc2_op, op)                                                                           \
+  }
+
+#define INSN_LDC2_F(NAME, op)  \
+  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
+    INSN(gs_ldc2_op, op)                                                                           \
+  }
+
+#define INSN_SDC2(NAME, op)  \
+  void NAME(Register rt, Register base, Register index, int off) {                                 \
+    INSN(gs_sdc2_op, op)                                                                           \
+  }
+
+#define INSN_SDC2_F(NAME, op)  \
+  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
+    INSN(gs_sdc2_op, op)                                                                           \
+  }
+
+/*
+ void gslbx(Register rt, Register base, Register index, int off) {
+    assert(is_simm(off, 8), "gslbx: off exceeds 8 bits");
+    assert(UseLEXT1, "check UseLEXT1");
+    emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |
+               ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op);
+ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);}
+
+  INSN_LDC2(gslbx, gslbx_op)
+  INSN_LDC2(gslhx, gslhx_op)
+  INSN_LDC2(gslwx, gslwx_op)
+  INSN_LDC2(gsldx, gsldx_op)
+  INSN_LDC2_F(gslwxc1, gslwxc1_op)
+  INSN_LDC2_F(gsldxc1, gsldxc1_op)
+
+  INSN_SDC2(gssbx, gssbx_op)
+  INSN_SDC2(gsshx, gsshx_op)
+  INSN_SDC2(gsswx, gsswx_op)
+  INSN_SDC2(gssdx, gssdx_op)
+  INSN_SDC2_F(gsswxc1, gsswxc1_op)
+  INSN_SDC2_F(gssdxc1, gssdxc1_op)
+*/
+  void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) }
+  void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) }
+  void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) }
+  void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) }
+  void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) }
+  void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) }
+
+  void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) }
+  void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) }
+  void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) }
+  void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) }
+  void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) }
+  void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) }
+
+#undef INSN
+#undef INSN_LDC2
+#undef INSN_LDC2_F
+#undef INSN_SDC2
+#undef INSN_SDC2_F
+
+  // cpucfg on Loongson CPUs above 3A4000
+  void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);}
+
+
+public:
+  // Creation
+  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
+#ifdef CHECK_DELAY
+    delay_state = no_delay;
+#endif
+  }
+
+  // Decoding
+  static address locate_operand(address inst, WhichOperand which);
+  static address locate_next_instruction(address inst);
+};
+
+
+
+#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp b/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp
new file mode 100644
index 00000000000..39aeb5509a7
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/assembler_mips.inline.hpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
+#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp
new file mode 100644
index 00000000000..a4a1b28c2d1
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.cpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "interpreter/bytecodeInterpreter.hpp"
+#include "interpreter/bytecodeInterpreter.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#ifdef TARGET_ARCH_MODEL_mips_32
+# include "interp_masm_mips_32.hpp"
+#endif
+#ifdef TARGET_ARCH_MODEL_mips_64
+# include "interp_masm_mips_64.hpp"
+#endif
+
+#ifdef CC_INTERP
+
+#endif // CC_INTERP (all)
diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp
new file mode 100644
index 00000000000..aac8b7a2b7f
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.hpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP
+#define CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP
+
+// Platform specific for C++ based Interpreter
+#define LOTS_OF_REGS    /* Lets interpreter use plenty of registers */
+
+private:
+
+  // save the bottom of the stack after frame manager setup. For ease of restoration after return
+  // from recursive interpreter call
+  intptr_t*  _frame_bottom;             /* saved bottom of frame manager frame */
+  intptr_t* _last_Java_pc;              /* pc to return to in frame manager */
+  intptr_t* _sender_sp;                 /* sender's sp before stack (locals) extension */
+  interpreterState _self_link;          /*  Previous interpreter state  */ /* sometimes points to self??? */
+  double    _native_fresult;            /* save result of native calls that might return floats */
+  intptr_t  _native_lresult;            /* save result of native calls that might return handle/longs */
+public:
+
+  static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp);
+  inline intptr_t* sender_sp() {
+    return _sender_sp;
+  }
+
+
+#define SET_LAST_JAVA_FRAME()
+
+#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set_flags(0);
+
+/*
+ * Macros for accessing the stack.
+ */
+#undef STACK_INT
+#undef STACK_FLOAT
+#undef STACK_ADDR
+#undef STACK_OBJECT
+#undef STACK_DOUBLE
+#undef STACK_LONG
+
+// JavaStack Implementation
+
+#define GET_STACK_SLOT(offset)    (*((intptr_t*) &topOfStack[-(offset)]))
+#define STACK_SLOT(offset)    ((address) &topOfStack[-(offset)])
+#define STACK_ADDR(offset)    (*((address *) &topOfStack[-(offset)]))
+#define STACK_INT(offset)     (*((jint*) &topOfStack[-(offset)]))
+#define STACK_FLOAT(offset)   (*((jfloat *) &topOfStack[-(offset)]))
+#define STACK_OBJECT(offset)  (*((oop *) &topOfStack [-(offset)]))
+#define STACK_DOUBLE(offset)  (((VMJavaVal64*) &topOfStack[-(offset)])->d)
+#define STACK_LONG(offset)    (((VMJavaVal64 *) &topOfStack[-(offset)])->l)
+
+#define SET_STACK_SLOT(value, offset)   (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value))
+#define SET_STACK_ADDR(value, offset)   (*((address *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_INT(value, offset)    (*((jint *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_FLOAT(value, offset)  (*((jfloat *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value))
+#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d =  \
+                                                 ((VMJavaVal64*)(addr))->d)
+#define SET_STACK_LONG(value, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value))
+#define SET_STACK_LONG_FROM_ADDR(addr, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l =  \
+                                                 ((VMJavaVal64*)(addr))->l)
+// JavaLocals implementation
+
+#define LOCALS_SLOT(offset)    ((intptr_t*)&locals[-(offset)])
+#define LOCALS_ADDR(offset)    ((address)locals[-(offset)])
+#define LOCALS_INT(offset)     (*((jint*)&locals[-(offset)]))
+#define LOCALS_FLOAT(offset)   (*((jfloat*)&locals[-(offset)]))
+#define LOCALS_OBJECT(offset)  ((oop)locals[-(offset)])
+#define LOCALS_DOUBLE(offset)  (((VMJavaVal64*)&locals[-((offset) + 1)])->d)
+#define LOCALS_LONG(offset)    (((VMJavaVal64*)&locals[-((offset) + 1)])->l)
+#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
+#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)]))
+
+#define SET_LOCALS_SLOT(value, offset)    (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value))
+#define SET_LOCALS_ADDR(value, offset)    (*((address *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_INT(value, offset)     (*((jint *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_FLOAT(value, offset)   (*((jfloat *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_OBJECT(value, offset)  (*((oop *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_DOUBLE(value, offset)  (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
+#define SET_LOCALS_LONG(value, offset)    (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
+#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
+                                                  ((VMJavaVal64*)(addr))->d)
+#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \
+                                                ((VMJavaVal64*)(addr))->l)
+
+#endif // CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp
new file mode 100644
index 00000000000..8ce77ab92ff
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/bytecodeInterpreter_mips.inline.hpp
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP
+#define CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP
+
+// Inline interpreter functions for MIPS
+
+inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; }
+inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; }
+inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; }
+inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; }
+inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); }
+
+inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; }
+
+inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+
+}
+
+inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) {
+  // x86 can do unaligned copies but not 64bits at a time
+  to[0] = from[0]; to[1] = from[1];
+}
+
+// The long operations depend on compiler support for "long long" on x86
+
+inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) {
+  return op1 + op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) {
+  return op1 & op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) {
+  // QQQ what about check and throw...
+  return op1 / op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) {
+  return op1 * op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
+  return op1 | op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) {
+  return op1 - op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) {
+  return op1 ^ op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) {
+  return op1 % op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) {
+  // CVM did this 0x3f mask, is the really needed??? QQQ
+  return ((unsigned long long) op1) >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) {
+  return op1 >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) {
+  return op1 << (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongNeg(jlong op) {
+  return -op;
+}
+
+inline jlong BytecodeInterpreter::VMlongNot(jlong op) {
+  return ~op;
+}
+
+inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) {
+  return (op <= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGez(jlong op) {
+  return (op >= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) {
+  return (op == 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) {
+  return (op1 == op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) {
+  return (op1 != op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) {
+  return (op1 >= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) {
+  return (op1 <= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) {
+  return (op1 < op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) {
+  return (op1 > op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) {
+  return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0);
+}
+
+// Long conversions
+
+inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) {
+  return (jfloat) val;
+}
+
+inline jint BytecodeInterpreter::VMlong2Int(jlong val) {
+  return (jint) val;
+}
+
+// Double Arithmetic
+
+inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) {
+  return op1 + op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) {
+  // Divide by zero... QQQ
+  return op1 / op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) {
+  return op1 * op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) {
+  return -op;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) {
+  return fmod(op1, op2);
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) {
+  return op1 - op2;
+}
+
+inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+}
+
+// Double Conversions
+
+inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) {
+  return (jfloat) val;
+}
+
+// Float Conversions
+
+inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) {
+  return (jdouble) op;
+}
+
+// Integer Arithmetic
+
+inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) {
+  return op1 + op2;
+}
+
+inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
+  return op1 & op2;
+}
+
+inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
+  // it's possible we could catch this special case implicitly
+  if ((juint)op1 == 0x80000000 && op2 == -1) return op1;
+  else return op1 / op2;
+}
+
+inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) {
+  return op1 * op2;
+}
+
+inline jint BytecodeInterpreter::VMintNeg(jint op) {
+  return -op;
+}
+
+inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
+  return op1 | op2;
+}
+
+inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
+  // it's possible we could catch this special case implicitly
+  if ((juint)op1 == 0x80000000 && op2 == -1) return 0;
+  else return op1 % op2;
+}
+
+inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) {
+  return op1 <<  op2;
+}
+
+inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) {
+  return op1 >> (op2 & 0x1f); // QQ op2 & 0x1f??
+}
+
+inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) {
+  return op1 - op2;
+}
+
+inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
+  return ((juint) op1) >> (op2 & 0x1f); // QQ op2 & 0x1f??
+}
+
+inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) {
+  return op1 ^ op2;
+}
+
+inline jdouble BytecodeInterpreter::VMint2Double(jint val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMint2Float(jint val) {
+  return (jfloat) val;
+}
+
+inline jlong BytecodeInterpreter::VMint2Long(jint val) {
+  return (jlong) val;
+}
+
+inline jchar BytecodeInterpreter::VMint2Char(jint val) {
+  return (jchar) val;
+}
+
+inline jshort BytecodeInterpreter::VMint2Short(jint val) {
+  return (jshort) val;
+}
+
+inline jbyte BytecodeInterpreter::VMint2Byte(jint val) {
+  return (jbyte) val;
+}
+
+#endif // CPU_MIPS_VM_BYTECODEINTERPRETER_MIPS_INLINE_HPP
diff --git a/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp b/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp
new file mode 100644
index 00000000000..61efd1f5611
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/bytecodes_mips.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/bytecodes.hpp"
+
+
+void Bytecodes::pd_initialize() {
+  // No mips specific initialization
+}
+
+
+Bytecodes::Code Bytecodes::pd_base_code_for(Code code) {
+  // No mips specific bytecodes
+  return code;
+}
diff --git a/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp b/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp
new file mode 100644
index 00000000000..25a9562acd5
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/bytecodes_mips.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_BYTECODES_MIPS_HPP
+#define CPU_MIPS_VM_BYTECODES_MIPS_HPP
+
+// No Loongson specific bytecodes
+
+#endif // CPU_MIPS_VM_BYTECODES_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/bytes_mips.hpp b/hotspot/src/cpu/mips/vm/bytes_mips.hpp
new file mode 100644
index 00000000000..515ffad4b07
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/bytes_mips.hpp
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP
+#define CPU_MIPS_VM_BYTES_MIPS_HPP
+
+#include "memory/allocation.hpp"
+
+class Bytes: AllStatic {
+ public:
+  // Returns true if the byte ordering used by Java is different from the native byte ordering
+  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
+  // on Sparc.
+  // we use mipsel, so return true
+  static inline bool is_Java_byte_ordering_different(){ return true; }
+
+
+  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
+  // (no special code is needed since x86 CPUs can access unaligned data)
+  static inline u2   get_native_u2(address p)         {
+    if ((intptr_t)p & 0x1) {
+      return ((u2)p[1] << 8) | (u2)p[0];
+    } else {
+      return *(u2*)p;
+    }
+  }
+
+  static inline u4   get_native_u4(address p)         {
+    if ((intptr_t)p & 3) {
+      u4 res;
+      __asm__ __volatile__ (
+          " .set push\n"
+          " .set mips64\n"
+          " .set noreorder\n"
+
+          "    lwr %[res], 0(%[addr])    \n"
+          "    lwl  %[res], 3(%[addr])    \n"
+
+          " .set pop"
+          :  [res] "=&r" (res)
+          : [addr] "r" (p)
+          : "memory"
+          );
+      return res;
+    } else {
+      return *(u4*)p;
+    }
+  }
+
+  static inline u8   get_native_u8(address p)         {
+    u8 res;
+    u8 temp;
+    //  u4 tp;//tmp register
+    __asm__ __volatile__ (
+        " .set push\n"
+        " .set mips64\n"
+        " .set noreorder\n"
+        " .set noat\n"
+        "    andi $1,%[addr],0x7    \n"
+        "    beqz $1,1f        \n"
+        "    nop        \n"
+        "    ldr %[temp], 0(%[addr])    \n"
+        "    ldl  %[temp], 7(%[addr])  \n"
+        "               b 2f        \n"
+        "    nop        \n"
+        "  1:\t  ld  %[temp],0(%[addr])  \n"
+        "  2:\t   sd  %[temp], %[res]    \n"
+
+        " .set at\n"
+        " .set pop\n"
+        :  [addr]"=r"(p), [temp]"=r" (temp)
+        :  "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res)
+        : "memory"
+        );
+
+    return res;
+  }
+
+  //use mips unaligned load instructions
+  static inline void put_native_u2(address p, u2 x)   {
+    if((intptr_t)p & 0x1) {
+      p[0] = (u_char)(x);
+      p[1] = (u_char)(x>>8);
+    } else {
+      *(u2*)p  = x;
+    }
+  }
+
+  static inline void put_native_u4(address p, u4 x)   {
+    // refer to sparc implementation.
+    // Note that sparc is big-endian, while mips is little-endian
+    switch ( intptr_t(p) & 3 ) {
+    case 0:  *(u4*)p = x;
+        break;
+
+    case 2:  ((u2*)p)[1] = x >> 16;
+       ((u2*)p)[0] = x;
+       break;
+
+    default: ((u1*)p)[3] = x >> 24;
+       ((u1*)p)[2] = x >> 16;
+       ((u1*)p)[1] = x >>  8;
+       ((u1*)p)[0] = x;
+       break;
+    }
+  }
+
+  static inline void put_native_u8(address p, u8 x)   {
+    // refer to sparc implementation.
+    // Note that sparc is big-endian, while mips is little-endian
+    switch ( intptr_t(p) & 7 ) {
+    case 0:  *(u8*)p = x;
+      break;
+
+    case 4:  ((u4*)p)[1] = x >> 32;
+      ((u4*)p)[0] = x;
+      break;
+
+    case 2:  ((u2*)p)[3] = x >> 48;
+      ((u2*)p)[2] = x >> 32;
+      ((u2*)p)[1] = x >> 16;
+      ((u2*)p)[0] = x;
+      break;
+
+    default: ((u1*)p)[7] = x >> 56;
+      ((u1*)p)[6] = x >> 48;
+      ((u1*)p)[5] = x >> 40;
+      ((u1*)p)[4] = x >> 32;
+      ((u1*)p)[3] = x >> 24;
+      ((u1*)p)[2] = x >> 16;
+      ((u1*)p)[1] = x >>  8;
+      ((u1*)p)[0] = x;
+    }
+  }
+
+
+  // Efficient reading and writing of unaligned unsigned data in Java
+  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
+  // needed since MIPS64EL CPUs use little-endian format.
+  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
+  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
+  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
+
+  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
+  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
+  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
+
+
+  // Efficient swapping of byte ordering
+  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
+  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
+  static inline u8   swap_u8(u8 x);
+};
+
+
+// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
+#ifdef TARGET_OS_ARCH_linux_mips
+# include "bytes_linux_mips.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_solaris_mips
+# include "bytes_solaris_mips.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_windows_mips
+# include "bytes_windows_mips.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_bsd_mips
+# include "bytes_bsd_mips.inline.hpp"
+#endif
+
+
+#endif // CPU_MIPS_VM_BYTES_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp b/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp
new file mode 100644
index 00000000000..f254e07abdd
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/c2_globals_mips.hpp
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
+#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the server compiler.
+// (see c2_globals.hpp).  Alpha-sorted.
+define_pd_global(bool, BackgroundCompilation,        true);
+define_pd_global(bool, UseTLAB,                      true);
+define_pd_global(bool, ResizeTLAB,                   true);
+define_pd_global(bool, CICompileOSR,                 true);
+define_pd_global(bool, InlineIntrinsics,             true);
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 true);
+define_pd_global(bool, UseOnStackReplacement,        true);
+#ifdef CC_INTERP
+define_pd_global(bool, ProfileInterpreter,           false);
+#else
+define_pd_global(bool, ProfileInterpreter,           true);
+#endif // CC_INTERP
+define_pd_global(bool, TieredCompilation,            false);   // Disable C1 in server JIT
+define_pd_global(intx, CompileThreshold,             10000);
+define_pd_global(intx, BackEdgeThreshold,            100000);
+
+define_pd_global(intx, OnStackReplacePercentage,     140);
+define_pd_global(intx, ConditionalMoveLimit,         3);
+define_pd_global(intx, FLOATPRESSURE,                6);
+define_pd_global(intx, FreqInlineSize,               325);
+define_pd_global(intx, MinJumpTableSize,             10);
+#ifdef MIPS64
+define_pd_global(intx, INTPRESSURE,                  13);
+define_pd_global(intx, InteriorEntryAlignment,       16);
+define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
+define_pd_global(intx, LoopUnrollLimit,              60);
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(intx, CodeCacheExpansionSize,       64*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
+#else
+define_pd_global(intx, INTPRESSURE,                  6);
+define_pd_global(intx, InteriorEntryAlignment,       4);
+define_pd_global(intx, NewSizeThreadIncrease,        4*K);
+define_pd_global(intx, LoopUnrollLimit,              50);     // Design center runs on 1.3.1
+// InitialCodeCacheSize derived from specjbb2000 run.
+define_pd_global(intx, InitialCodeCacheSize,         2304*K); // Integral multiple of CodeCacheExpansionSize
+define_pd_global(intx, CodeCacheExpansionSize,       32*K);
+
+// Ergonomics related flags
+define_pd_global(uint64_t,MaxRAM,                    4ULL*G);
+#endif // MIPS64
+define_pd_global(intx, RegisterCostAreaRatio,        16000);
+
+// Peephole and CISC spilling both break the graph, and so makes the
+// scheduler sick.
+define_pd_global(bool, OptoPeephole,                 false);
+define_pd_global(bool, UseCISCSpill,                 false);
+define_pd_global(bool, OptoScheduling,               false);
+define_pd_global(bool, OptoBundling,                 false);
+
+define_pd_global(intx, ReservedCodeCacheSize,        120*M);
+define_pd_global(uintx, CodeCacheMinBlockLength,     4);
+define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
+
+define_pd_global(bool,  TrapBasedRangeChecks,        false); // Not needed on x86.
+
+// Heap related flags
+define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
+
+// Ergonomics related flags
+define_pd_global(bool, NeverActAsServerClassMachine, false);
+
+#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/c2_init_mips.cpp b/hotspot/src/cpu/mips/vm/c2_init_mips.cpp
new file mode 100644
index 00000000000..e6d5815f424
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/c2_init_mips.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+
+// processor dependent initialization for mips
+
+void Compile::pd_compiler2_init() {
+  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
+}
diff --git a/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp b/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp
new file mode 100644
index 00000000000..1836b7a9214
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/codeBuffer_mips.hpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
+#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
+
+private:
+  void pd_initialize() {}
+
+public:
+  void flush_bundle(bool start_new_bundle) {}
+
+#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp b/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp
new file mode 100644
index 00000000000..8ffaaaf841e
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/compiledIC_mips.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/compiledIC.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nmethod.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
+
+// Release the CompiledICHolder* associated with this call site is there is one.
+void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
+  // This call site might have become stale so inspect it carefully.
+  NativeCall* call = nativeCall_at(call_site->addr());
+  if (is_icholder_entry(call->destination())) {
+    NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
+    InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
+  }
+}
+
+bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
+  // This call site might have become stale so inspect it carefully.
+  NativeCall* call = nativeCall_at(call_site->addr());
+  return is_icholder_entry(call->destination());
+}
+
+// ----------------------------------------------------------------------------
+
+#define __ _masm.
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
+
+  address mark = cbuf.insts_mark();  // get mark within main instrs section
+
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a stub.
+  MacroAssembler _masm(&cbuf);
+
+  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
+  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
+  // static stub relocation stores the instruction address of the call
+
+  __ relocate(static_stub_Relocation::spec(mark), 0);
+
+  // Code stream for loading method may be changed.
+  __ synci(R0, 0);
+
+  // Rmethod contains methodOop, it should be relocated for GC
+  // static stub relocation also tags the methodOop in the code-stream.
+  __ mov_metadata(Rmethod, NULL);
+  // This is recognized as unresolved by relocs/nativeInst/ic code
+
+  __ relocate(relocInfo::runtime_call_type);
+
+  cbuf.set_insts_mark();
+  address call_pc = (address)-1;
+  __ patchable_jump(call_pc);
+  __ align(16);
+  // Update current stubs pointer and restore code_end.
+  __ end_a_stub();
+  return base;
+}
+#undef __
+
+int CompiledStaticCall::to_interp_stub_size() {
+  int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size;
+  return round_to(size, 16);
+}
+
+// Relocation entries for call stub, compiled java to interpreter.
+int CompiledStaticCall::reloc_to_interp_stub() {
+  return 16;
+}
+
+void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) {
+  address stub = find_stub();
+  guarantee(stub != NULL, "stub not found");
+
+  if (TraceICs) {
+    ResourceMark rm;
+    tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
+                  p2i(instruction_address()),
+                  callee->name_and_sig_as_C_string());
+  }
+
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
+#ifndef MIPS64
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#else
+  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
+#endif
+
+  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
+         "a) MT-unsafe modification of inline cache");
+  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
+         "b) MT-unsafe modification of inline cache");
+
+  // Update stub.
+  method_holder->set_data((intptr_t)callee());
+  jump->set_jump_destination(entry);
+
+  // Update jump to call.
+  set_destination_mt_safe(stub);
+}
+
+void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
+  // Reset stub.
+  address stub = static_stub->addr();
+  assert(stub != NULL, "stub not found");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
+#ifndef MIPS64
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#else
+  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
+#endif
+  method_holder->set_data(0);
+  jump->set_jump_destination((address)-1);
+}
+
+//-----------------------------------------------------------------------------
+// Non-product mode code
+#ifndef PRODUCT
+
+void CompiledStaticCall::verify() {
+  // Verify call.
+  NativeCall::verify();
+  if (os::is_MP()) {
+    verify_alignment();
+  }
+
+  // Verify stub.
+  address stub = find_stub();
+  assert(stub != NULL, "no stub found for static call");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
+#ifndef MIPS64
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+#else
+  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
+#endif
+
+
+  // Verify state.
+  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
+}
+
+#endif // !PRODUCT
diff --git a/hotspot/src/cpu/mips/vm/copy_mips.hpp b/hotspot/src/cpu/mips/vm/copy_mips.hpp
new file mode 100644
index 00000000000..4442e1dc716
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/copy_mips.hpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_COPY_MIPS_HPP
+#define CPU_MIPS_VM_COPY_MIPS_HPP
+
+// Inline functions for memory copy and fill.
+
+// Contains inline asm implementations
+#ifdef TARGET_OS_ARCH_linux_mips
+# include "copy_linux_mips.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_solaris_mips
+# include "copy_solaris_mips.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_windows_mips
+# include "copy_windows_mips.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_bsd_mips
+# include "copy_bsd_mips.inline.hpp"
+#endif
+// Inline functions for memory copy and fill.
+
+// Contains inline asm implementations
+
+// Template for atomic, element-wise copy.
+template <class T>
+static void copy_conjoint_atomic(const T* from, T* to, size_t count) {
+  if (from > to) {
+    while (count-- > 0) {
+      // Copy forwards
+      *to++ = *from++;
+    }
+  } else {
+    from += count - 1;
+    to   += count - 1;
+    while (count-- > 0) {
+      // Copy backwards
+      *to-- = *from--;
+    }
+  }
+}
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+  julong* to = (julong*) tohw;
+  julong  v  = ((julong) value << 32) | value;
+  while (count-- > 0) {
+    *to++ = v;
+  }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+  pd_fill_to_words(tohw, count, value);
+}
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+  (void)memset(to, value, count);
+}
+
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+  pd_fill_to_words(tohw, count, 0);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+  (void)memset(to, 0, count);
+}
+
+#endif //CPU_MIPS_VM_COPY_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp
new file mode 100644
index 00000000000..37bd03b00b0
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/cppInterpreterGenerator_mips.hpp
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP
+#define CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP
+
+ protected:
+
+#if 0
+  address generate_asm_interpreter_entry(bool synchronized);
+  address generate_native_entry(bool synchronized);
+  address generate_abstract_entry(void);
+  address generate_math_entry(AbstractInterpreter::MethodKind kind);
+  address generate_empty_entry(void);
+  address generate_accessor_entry(void);
+  void lock_method(void);
+  void generate_stack_overflow_check(void);
+
+  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
+  void generate_counter_overflow(Label* do_continue);
+#endif
+
+  void generate_more_monitors();
+  void generate_deopt_handling();
+  address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only
+  void generate_compute_interpreter_state(const Register state,
+                                          const Register prev_state,
+                                          const Register sender_sp,
+                                          bool native); // C++ interpreter only
+
+#endif // CPU_MIPS_VM_CPPINTERPRETERGENERATOR_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp
new file mode 100644
index 00000000000..1f8d75d593a
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.cpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 2007, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/cppInterpreter.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterGenerator.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#ifdef SHARK
+#include "shark/shark_globals.hpp"
+#endif
+
+#ifdef CC_INTERP
+
+// Routine exists to make tracebacks look decent in debugger
+// while "shadow" interpreter frames are on stack. It is also
+// used to distinguish interpreter frames.
+
+extern "C" void RecursiveInterpreterActivation(interpreterState istate) {
+  ShouldNotReachHere();
+}
+
+bool CppInterpreter::contains(address pc) {
+  Unimplemented();
+}
+
+#define STATE(field_name) Lstate, in_bytes(byte_offset_of(BytecodeInterpreter, field_name))
+#define __ _masm->
+
+Label frame_manager_entry;
+Label fast_accessor_slow_entry_path;  // fast accessor methods need to be able to jmp to unsynchronized
+                                      // c++ interpreter entry point this holds that entry point label.
+
+static address unctrap_frame_manager_entry  = NULL;
+
+static address interpreter_return_address  = NULL;
+static address deopt_frame_manager_return_atos  = NULL;
+static address deopt_frame_manager_return_btos  = NULL;
+static address deopt_frame_manager_return_itos  = NULL;
+static address deopt_frame_manager_return_ltos  = NULL;
+static address deopt_frame_manager_return_ftos  = NULL;
+static address deopt_frame_manager_return_dtos  = NULL;
+static address deopt_frame_manager_return_vtos  = NULL;
+
+const Register prevState = G1_scratch;
+
+void InterpreterGenerator::save_native_result(void) {
+  Unimplemented();
+}
+
+void InterpreterGenerator::restore_native_result(void) {
+  Unimplemented();
+}
+
+// A result handler converts/unboxes a native call result into
+// a java interpreter/compiler result. The current frame is an
+// interpreter frame. The activation frame unwind code must be
+// consistent with that of TemplateTable::_return(...). In the
+// case of native methods, the caller's SP was not modified.
+address CppInterpreterGenerator::generate_result_handler_for(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) {
+  Unimplemented();
+}
+
+address CppInterpreter::return_entry(TosState state, int length) {
+  Unimplemented();
+}
+
+address CppInterpreter::deopt_entry(TosState state, int length) {
+  Unimplemented();
+}
+
+void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_empty_entry(void) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_accessor_entry(void) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_native_entry(bool synchronized) {
+  Unimplemented();
+}
+
+void CppInterpreterGenerator::generate_compute_interpreter_state(const Register state,
+                                                              const Register prev_state,
+                                                              bool native) {
+  Unimplemented();
+}
+
+void InterpreterGenerator::lock_method(void) {
+  Unimplemented();
+}
+
+void CppInterpreterGenerator::generate_deopt_handling() {
+  Unimplemented();
+}
+
+void CppInterpreterGenerator::generate_more_monitors() {
+  Unimplemented();
+}
+
+
+static address interpreter_frame_manager = NULL;
+
+void CppInterpreterGenerator::adjust_callers_stack(Register args) {
+  Unimplemented();
+}
+
+address InterpreterGenerator::generate_normal_entry(bool synchronized) {
+  Unimplemented();
+}
+
+InterpreterGenerator::InterpreterGenerator(StubQueue* code)
+ : CppInterpreterGenerator(code) {
+  Unimplemented();
+}
+
+
+static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) {
+  Unimplemented();
+}
+
+int AbstractInterpreter::size_top_interpreter_activation(methodOop method) {
+  Unimplemented();
+}
+
+void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill,
+                                           frame* caller,
+                                           frame* current,
+                                           methodOop method,
+                                           intptr_t* locals,
+                                           intptr_t* stack,
+                                           intptr_t* stack_base,
+                                           intptr_t* monitor_base,
+                                           intptr_t* frame_bottom,
+                                           bool is_top_frame
+                                           )
+{
+  Unimplemented();
+}
+
+void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp) {
+  Unimplemented();
+}
+
+
+int AbstractInterpreter::layout_activation(methodOop method,
+                                           int tempcount, // Number of slots on java expression stack in use
+                                           int popframe_extra_args,
+                                           int moncount,  // Number of active monitors
+                                           int callee_param_size,
+                                           int callee_locals_size,
+                                           frame* caller,
+                                           frame* interpreter_frame,
+                                           bool is_top_frame) {
+  Unimplemented();
+}
+
+#endif // CC_INTERP
diff --git a/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp
new file mode 100644
index 00000000000..49c47330495
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/cppInterpreter_mips.hpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP
+#define CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP
+  // Size of interpreter code.  Increase if too small.  Interpreter will
+  // fail with a guarantee ("not enough space for interpreter generation");
+  // if too small.
+  // Run with +PrintInterpreter to get the VM to print out the size.
+  // Max size with JVMTI and TaggedStackInterpreter
+
+  // QQQ this is proably way too large for c++ interpreter
+
+  // The sethi() instruction generates lots more instructions when shell
+  // stack limit is unlimited, so that's why this is much bigger.
+  const static int InterpreterCodeSize = 210 * K;
+
+#endif // CPU_MIPS_VM_CPPINTERPRETER_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/debug_mips.cpp b/hotspot/src/cpu/mips/vm/debug_mips.cpp
new file mode 100644
index 00000000000..50de03653b1
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/debug_mips.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/codeCache.hpp"
+#include "code/nmethod.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/init.hpp"
+#include "runtime/os.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/top.hpp"
+
+#ifndef PRODUCT
+
+void pd_ps(frame f) {
+  intptr_t* sp = f.sp();
+  intptr_t* prev_sp = sp - 1;
+  intptr_t *pc = NULL;
+  intptr_t *next_pc = NULL;
+  int count = 0;
+  tty->print("register window backtrace from %#lx:\n", p2i(sp));
+}
+
+// This function is used to add platform specific info
+// to the error reporting code.
+
+void pd_obfuscate_location(char *buf,int buflen) {}
+
+#endif // PRODUCT
diff --git a/hotspot/src/cpu/mips/vm/depChecker_mips.cpp b/hotspot/src/cpu/mips/vm/depChecker_mips.cpp
new file mode 100644
index 00000000000..756ccb68f9c
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/depChecker_mips.cpp
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "compiler/disassembler.hpp"
+#include "depChecker_mips.hpp"
+
+// Nothing to do on mips
diff --git a/hotspot/src/cpu/mips/vm/depChecker_mips.hpp b/hotspot/src/cpu/mips/vm/depChecker_mips.hpp
new file mode 100644
index 00000000000..11e52b4e8f8
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/depChecker_mips.hpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
+#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
+
+// Nothing to do on MIPS
+
+#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/disassembler_mips.hpp b/hotspot/src/cpu/mips/vm/disassembler_mips.hpp
new file mode 100644
index 00000000000..c5f3a8888dd
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/disassembler_mips.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
+#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
+
+  static int pd_instruction_alignment() {
+    return sizeof(int);
+  }
+
+  static const char* pd_cpu_opts() {
+    return "gpr-names=64";
+  }
+
+#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/frame_mips.cpp b/hotspot/src/cpu/mips/vm/frame_mips.cpp
new file mode 100644
index 00000000000..1c928976fc3
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/frame_mips.cpp
@@ -0,0 +1,711 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/markOop.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/monitorChunk.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_mips.inline.hpp"
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif
+
+
+// Profiling/safepoint support
+// for Profiling - acting on another frame. walks sender frames
+// if valid.
+// frame profile_find_Java_sender_frame(JavaThread *thread);
+
+bool frame::safe_for_sender(JavaThread *thread) {
+  address   sp = (address)_sp;
+  address   fp = (address)_fp;
+  address   unextended_sp = (address)_unextended_sp;
+
+  // consider stack guards when trying to determine "safe" stack pointers
+  static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0;
+  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
+
+  // sp must be within the usable part of the stack (not in guards)
+  bool sp_safe = (sp < thread->stack_base()) &&
+                 (sp >= thread->stack_base() - usable_stack_size);
+
+
+  if (!sp_safe) {
+    return false;
+  }
+
+  // unextended sp must be within the stack and above or equal sp
+  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
+                            (unextended_sp >= sp);
+
+  if (!unextended_sp_safe) {
+    return false;
+  }
+
+  // an fp must be within the stack and above (but not equal) sp
+  // second evaluation on fp+ is added to handle situation where fp is -1
+  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
+
+  // We know sp/unextended_sp are safe only fp is questionable here
+
+  // If the current frame is known to the code cache then we can attempt to
+  // construct the sender and do some validation of it. This goes a long way
+  // toward eliminating issues when we get in frame construction code
+
+  if (_cb != NULL ) {
+
+    // First check if frame is complete and tester is reliable
+    // Unfortunately we can only check frame complete for runtime stubs and nmethod
+    // other generic buffer blobs are more problematic so we just assume they are
+    // ok. adapter blobs never have a frame complete and are never ok.
+
+    if (!_cb->is_frame_complete_at(_pc)) {
+      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
+        return false;
+      }
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!_cb->code_contains(_pc)) {
+      return false;
+    }
+
+    // Entry frame checks
+    if (is_entry_frame()) {
+      // an entry frame must have a valid fp.
+      return fp_safe && is_entry_frame_valid(thread);
+    }
+
+    intptr_t* sender_sp = NULL;
+    intptr_t* sender_unextended_sp = NULL;
+    address   sender_pc = NULL;
+    intptr_t* saved_fp =  NULL;
+
+    if (is_interpreted_frame()) {
+      // fp must be safe
+      if (!fp_safe) {
+        return false;
+      }
+
+      sender_pc = (address) this->fp()[return_addr_offset];
+      // for interpreted frames, the value below is the sender "raw" sp,
+      // which can be different from the sender unextended sp (the sp seen
+      // by the sender) because of current frame local variables
+      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
+      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
+      saved_fp = (intptr_t*) this->fp()[link_offset];
+
+    } else {
+      // must be some sort of compiled/runtime frame
+      // fp does not have to be safe (although it could be check for c1?)
+
+      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
+      if (_cb->frame_size() <= 0) {
+        return false;
+      }
+
+      sender_sp = _unextended_sp + _cb->frame_size();
+      sender_unextended_sp = sender_sp;
+      // On MIPS the return_address is always the word on the stack
+      sender_pc = (address) *(sender_sp-1);
+      // Note: frame::sender_sp_offset is only valid for compiled frame
+      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
+    }
+
+
+    // If the potential sender is the interpreter then we can do some more checking
+    if (Interpreter::contains(sender_pc)) {
+
+      // FP is always saved in a recognizable place in any code we generate. However
+      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
+      // is really a frame pointer.
+
+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+
+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
+
+      return sender.is_interpreted_frame_valid(thread);
+
+    }
+
+    // We must always be able to find a recognizable pc
+    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
+    if (sender_pc == NULL ||  sender_blob == NULL) {
+      return false;
+    }
+
+    // Could be a zombie method
+    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
+      return false;
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!sender_blob->code_contains(sender_pc)) {
+      return false;
+    }
+
+    // We should never be able to see an adapter if the current frame is something from code cache
+    if (sender_blob->is_adapter_blob()) {
+      return false;
+    }
+
+    // Could be the call_stub
+    if (StubRoutines::returns_to_call_stub(sender_pc)) {
+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+
+      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
+
+      // Validate the JavaCallWrapper an entry frame must have
+      address jcw = (address)sender.entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp());
+
+      return jcw_safe;
+    }
+
+    if (sender_blob->is_nmethod()) {
+        nmethod* nm = sender_blob->as_nmethod_or_null();
+        if (nm != NULL) {
+            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
+                return false;
+            }
+        }
+    }
+
+    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
+    // because the return address counts against the callee's frame.
+
+    if (sender_blob->frame_size() <= 0) {
+      assert(!sender_blob->is_nmethod(), "should count return address at least");
+      return false;
+    }
+
+    // We should never be able to see anything here except an nmethod. If something in the
+    // code cache (current frame) is called by an entity within the code cache that entity
+    // should not be anything but the call stub (already covered), the interpreter (already covered)
+    // or an nmethod.
+
+    if (!sender_blob->is_nmethod()) {
+        return false;
+    }
+
+    // Could put some more validation for the potential non-interpreted sender
+    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
+
+    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
+
+    // We've validated the potential sender that would be created
+    return true;
+  }
+  // Note: fp == NULL is not really a prerequisite for this to be safe to
+  // walk for c2. However we've modified the code such that if we get
+  // a failure with fp != NULL that we then try with FP == NULL.
+  // This is basically to mimic what a last_frame would look like if
+  // c2 had generated it.
+
+  // Must be native-compiled frame. Since sender will try and use fp to find
+  // linkages it must be safe
+
+  if (!fp_safe) {
+    return false;
+  }
+
+  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
+
+  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
+
+
+  // could try and do some more potential verification of native frame if we could think of some...
+
+  return true;
+
+}
+
+void frame::patch_pc(Thread* thread, address pc) {
+  assert(_cb == CodeCache::find_blob(pc), "unexpected pc");
+  address* pc_addr = &(((address*) sp())[-1]);
+  if (TracePcPatching) {
+    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
+                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
+  }
+
+  // Either the return address is the original one or we are going to
+  // patch in the same address that's already there.
+  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
+  *pc_addr = pc;
+  _cb = CodeCache::find_blob(pc);
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    assert(original_pc == _pc, "expected original PC to be stored before patching");
+    _deopt_state = is_deoptimized;
+    // leave _pc as is
+  } else {
+    _deopt_state = not_deoptimized;
+    _pc = pc;
+  }
+}
+
+bool frame::is_interpreted_frame() const  {
+  return Interpreter::contains(pc());
+}
+
+int frame::frame_size(RegisterMap* map) const {
+  frame sender = this->sender(map);
+  return sender.sp() - sp();
+}
+
+intptr_t* frame::entry_frame_argument_at(int offset) const {
+  // convert offset to index to deal with tsi
+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+  // Entry frame's arguments are always in relation to unextended_sp()
+  return &unextended_sp()[index];
+}
+
+// sender_sp
+#ifdef CC_INTERP
+intptr_t* frame::interpreter_frame_sender_sp() const {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
+  // seems odd and if we always know interpreted vs. non then sender_sp() is really
+  // doing too much work.
+  return get_interpreterState()->sender_sp();
+}
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+  return get_interpreterState()->monitor_base();
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+  return (BasicObjectLock*) get_interpreterState()->stack_base();
+}
+
+#else // CC_INTERP
+
+intptr_t* frame::interpreter_frame_sender_sp() const {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
+}
+
+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
+}
+
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
+  // make sure the pointer points inside the frame
+  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
+  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
+  return result;
+}
+
+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
+  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
+}
+
+// Used by template based interpreter deoptimization
+void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
+  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
+}
+#endif // CC_INTERP
+
+frame frame::sender_for_entry_frame(RegisterMap* map) const {
+  assert(map != NULL, "map must be set");
+  // Java frame called from C; skip all C frames and return top C
+  // frame of that chunk as the sender
+  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+  assert(!entry_frame_is_first(), "next Java fp must be non zero");
+  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
+  map->clear();
+  assert(map->include_argument_oops(), "should be set by clear");
+  if (jfa->last_Java_pc() != NULL ) {
+    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
+    return fr;
+  }
+  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
+  return fr;
+}
+
+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
+  // sp is the raw sp from the sender after adapter or interpreter extension
+  intptr_t* sender_sp = this->sender_sp();
+
+  // This is the sp before any possible extension (adapter/locals).
+  intptr_t* unextended_sp = interpreter_frame_sender_sp();
+
+  // The interpreter and compiler(s) always save FP in a known
+  // location on entry. We must record where that location is
+  // so this if FP was live on callout from c2 we can find
+  // the saved copy no matter what it called.
+
+  // Since the interpreter always saves FP if we record where it is then
+  // we don't have to always save FP on entry and exit to c2 compiled
+  // code, on entry will be enough.
+#ifdef COMPILER2
+  if (map->update_map()) {
+    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
+  }
+#endif /* COMPILER2 */
+  return frame(sender_sp, unextended_sp, link(), sender_pc());
+}
+
+
+//------------------------------------------------------------------------------
+// frame::verify_deopt_original_pc
+//
+// Verifies the calculated original PC of a deoptimization PC for the
+// given unextended SP.  The unextended SP might also be the saved SP
+// for MethodHandle call sites.
+#ifdef ASSERT
+void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+  frame fr;
+
+  // This is ugly but it's better than to change {get,set}_original_pc
+  // to take an SP value as argument.  And it's only a debugging
+  // method anyway.
+  fr._unextended_sp = unextended_sp;
+
+  address original_pc = nm->get_original_pc(&fr);
+  assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
+  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
+}
+#endif
+
+
+//------------------------------------------------------------------------------
+// frame::adjust_unextended_sp
+void frame::adjust_unextended_sp() {
+  // On MIPS, sites calling method handle intrinsics and lambda forms are treated
+  // as any other call site. Therefore, no special action is needed when we are
+  // returning to any of these call sites.
+
+  nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
+  if (sender_nm != NULL) {
+    // If the sender PC is a deoptimization point, get the original PC.
+    if (sender_nm->is_deopt_entry(_pc) ||
+        sender_nm->is_deopt_mh_entry(_pc)) {
+      DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// frame::update_map_with_saved_link
+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
+  // The interpreter and compiler(s) always save fp in a known
+  // location on entry. We must record where that location is
+  // so that if fp was live on callout from c2 we can find
+  // the saved copy no matter what it called.
+
+  // Since the interpreter always saves fp if we record where it is then
+  // we don't have to always save fp on entry and exit to c2 compiled
+  // code, on entry will be enough.
+  map->set_location(FP->as_VMReg(), (address) link_addr);
+  // this is weird "H" ought to be at a higher address however the
+  // oopMaps seems to have the "H" regs at the same address and the
+  // vanilla register.
+  // XXXX make this go away
+  if (true) {
+    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
+  }
+}
+
+//------------------------------sender_for_compiled_frame-----------------------
+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
+  assert(map != NULL, "map must be set");
+
+  // frame owned by optimizing compiler
+  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
+
+  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
+  intptr_t* unextended_sp = sender_sp;
+
+#ifdef ASSERT
+  const bool c1_compiled = _cb->is_compiled_by_c1();
+  bool native = _cb->is_nmethod() && ((nmethod*)_cb)->is_native_method();
+  if (c1_compiled && native) {
+    assert(sender_sp == fp() + frame::sender_sp_offset, "incorrect frame size");
+  }
+#endif // ASSERT
+  // On Intel the return_address is always the word on the stack
+  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
+  // so getting sender for compiled frame is not same as interpreter frame.
+  // we hard code here temporarily
+  // spark
+  address sender_pc = (address) *(sender_sp-1);
+
+  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
+
+  if (map->update_map()) {
+    // Tell GC to use argument oopmaps for some runtime stubs that need it.
+    // For C1, the runtime stub might not have oop maps, so set this flag
+    // outside of update_register_map.
+    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+    if (_cb->oop_maps() != NULL) {
+      OopMapSet::update_register_map(this, map);
+    }
+
+    // Since the prolog does the save and restore of epb there is no oopmap
+    // for it so we must fill in its location as if there was an oopmap entry
+    // since if our caller was compiled code there could be live jvm state in it.
+    update_map_with_saved_link(map, saved_fp_addr);
+  }
+  assert(sender_sp != sp(), "must have changed");
+  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
+}
+
+frame frame::sender(RegisterMap* map) const {
+  // Default is we done have to follow them. The sender_for_xxx will
+  // update it accordingly
+  map->set_include_argument_oops(false);
+
+  if (is_entry_frame())       return sender_for_entry_frame(map);
+  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
+  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+
+  if (_cb != NULL) {
+    return sender_for_compiled_frame(map);
+  }
+  // Must be native-compiled frame, i.e. the marshaling code for native
+  // methods that exists in the core system.
+  return frame(sender_sp(), link(), sender_pc());
+}
+
+
+bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
+  assert(is_interpreted_frame(), "must be interpreter frame");
+  Method* method = interpreter_frame_method();
+  // When unpacking an optimized frame the frame pointer is
+  // adjusted with:
+  int diff = (method->max_locals() - method->size_of_parameters()) *
+    Interpreter::stackElementWords;
+  printf("^^^^^^^^^^^^^^^adjust fp in deopt fp = 0%lx \n", (intptr_t)(fp - diff));
+  return _fp == (fp - diff);
+}
+
+void frame::pd_gc_epilog() {
+  // nothing done here now
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+// QQQ
+#ifdef CC_INTERP
+#else
+  assert(is_interpreted_frame(), "Not an interpreted frame");
+  // These are reasonable sanity checks
+  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
+    return false;
+  }
+  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
+    return false;
+  }
+  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
+    return false;
+  }
+  // These are hacks to keep us out of trouble.
+  // The problem with these is that they mask other problems
+  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
+    return false;
+  }
+
+  // do some validation of frame elements
+
+  // first the method
+
+  Method* m = *interpreter_frame_method_addr();
+
+  // validate the method we'd find in this potential sender
+  if (!m->is_valid_method()) return false;
+
+  // stack frames shouldn't be much larger than max_stack elements
+
+  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
+  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
+    return false;
+  }
+
+  // validate bci/bcx
+
+  intptr_t  bcx    = interpreter_frame_bcx();
+  if (m->validate_bci_from_bcx(bcx) < 0) {
+    return false;
+  }
+
+  // validate ConstantPoolCache*
+
+  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
+
+  if (cp == NULL || !cp->is_metaspace_object()) return false;
+
+  // validate locals
+
+  address locals =  (address) *interpreter_frame_locals_addr();
+
+  if (locals > thread->stack_base() || locals < (address) fp()) return false;
+
+  // We'd have to be pretty unlucky to be mislead at this point
+
+#endif // CC_INTERP
+  return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+#ifdef CC_INTERP
+  // Needed for JVMTI. The result should always be in the interpreterState object
+  assert(false, "NYI");
+  interpreterState istate = get_interpreterState();
+#endif // CC_INTERP
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  Method* method = interpreter_frame_method();
+  BasicType type = method->result_type();
+
+  intptr_t* tos_addr;
+  if (method->is_native()) {
+    // Prior to calling into the runtime to report the method_exit the possible
+    // return value is pushed to the native stack. If the result is a jfloat/jdouble
+    // then ST0 is saved. See the note in generate_native_result
+    tos_addr = (intptr_t*)sp();
+    if (type == T_FLOAT || type == T_DOUBLE) {
+      tos_addr += 2;
+    }
+  } else {
+    tos_addr = (intptr_t*)interpreter_frame_tos_address();
+  }
+
+  switch (type) {
+    case T_OBJECT  :
+    case T_ARRAY   : {
+      oop obj;
+      if (method->is_native()) {
+#ifdef CC_INTERP
+        obj = istate->_oop_temp;
+#else
+        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
+#endif // CC_INTERP
+      } else {
+        oop* obj_p = (oop*)tos_addr;
+        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
+      }
+      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+      *oop_result = obj;
+      break;
+    }
+    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
+    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
+    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
+    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
+    case T_INT     : value_result->i = *(jint*)tos_addr; break;
+    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
+    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
+    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
+    case T_VOID    : /* Nothing to do */ break;
+    default        : ShouldNotReachHere();
+  }
+
+  return type;
+}
+
+
+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+  return &interpreter_frame_tos_address()[index];
+}
+
+#ifndef PRODUCT
+
+#define DESCRIBE_FP_OFFSET(name) \
+  values.describe(frame_no, fp() + frame::name##_offset, #name)
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+  if (is_interpreted_frame()) {
+    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_method);
+    DESCRIBE_FP_OFFSET(interpreter_frame_mdx);
+    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
+    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
+    DESCRIBE_FP_OFFSET(interpreter_frame_bcx);
+    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+  }
+}
+#endif
+
+intptr_t *frame::initial_deoptimization_info() {
+  // used to reset the saved FP
+  return fp();
+}
+
+intptr_t* frame::real_fp() const {
+  if (_cb != NULL) {
+    // use the frame size if valid
+    int size = _cb->frame_size();
+    if (size > 0) {
+      return unextended_sp() + size;
+    }
+  }
+  // else rely on fp()
+  assert(! is_compiled_frame(), "unknown compiled frame size");
+  return fp();
+}
+
+#ifndef PRODUCT
+// This is a generic constructor which is only used by pns() in debug.cpp.
+frame::frame(void* sp, void* fp, void* pc) {
+  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
+}
+#endif
diff --git a/hotspot/src/cpu/mips/vm/frame_mips.hpp b/hotspot/src/cpu/mips/vm/frame_mips.hpp
new file mode 100644
index 00000000000..9e684a8dc34
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/frame_mips.hpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP
+#define CPU_MIPS_VM_FRAME_MIPS_HPP
+
+#include "runtime/synchronizer.hpp"
+#include "utilities/top.hpp"
+
+// A frame represents a physical stack frame (an activation).  Frames can be
+// C or Java frames, and the Java frames can be interpreted or compiled.
+// In contrast, vframes represent source-level activations, so that one physical frame
+// can correspond to multiple source level frames because of inlining.
+// A frame is comprised of {pc, fp, sp}
+// ------------------------------ Asm interpreter ----------------------------------------
+// Layout of asm interpreter frame:
+//    [expression stack      ] * <- sp
+//    [monitors              ]   \
+//     ...                        | monitor block size
+//    [monitors              ]   /
+//    [monitor block size    ]
+//    [byte code index/pointr]                   = bcx()                bcx_offset
+//    [pointer to locals     ]                   = locals()             locals_offset
+//    [constant pool cache   ]                   = cache()              cache_offset
+//    [methodData            ]                   = mdp()                mdx_offset
+//    [methodOop             ]                   = method()             method_offset
+//    [last sp               ]                   = last_sp()            last_sp_offset
+//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
+//    [old frame pointer     ]   <- fp           = link()
+//    [return pc             ]
+//    [oop temp              ]                     (only for native calls)
+//    [locals and parameters ]
+//                               <- sender sp
+// ------------------------------ Asm interpreter ----------------------------------------
+
+// ------------------------------ C++ interpreter ----------------------------------------
+//
+// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
+//
+//                             <- SP (current sp)
+//    [local variables         ] BytecodeInterpreter::run local variables
+//    ...                        BytecodeInterpreter::run local variables
+//    [local variables         ] BytecodeInterpreter::run local variables
+//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's fp ]
+//    [return pc               ]  (return to frame manager)
+//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
+//    [expression stack        ] <- last_Java_sp                           |
+//    [...                     ] * <- interpreter_state.stack              |
+//    [expression stack        ] * <- interpreter_state.stack_base         |
+//    [monitors                ]   \                                       |
+//     ...                          | monitor block size                   |
+//    [monitors                ]   / <- interpreter_state.monitor_base     |
+//    [struct interpretState   ] <-----------------------------------------|
+//    [return pc               ] (return to callee of frame manager [1]
+//    [locals and parameters   ]
+//                               <- sender sp
+
+// [1] When the c++ interpreter calls a new method it returns to the frame
+//     manager which allocates a new frame on the stack. In that case there
+//     is no real callee of this newly allocated frame. The frame manager is
+//     aware of the  additional frame(s) and will pop them as nested calls
+//     complete. Howevers tTo make it look good in the debugger the frame
+//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
+//     with a fake interpreter_state* parameter to make it easy to debug
+//     nested calls.
+
+// Note that contrary to the layout for the assembly interpreter the
+// expression stack allocated for the C++ interpreter is full sized.
+// However this is not as bad as it seems as the interpreter frame_manager
+// will truncate the unused space on succesive method calls.
+//
+// ------------------------------ C++ interpreter ----------------------------------------
+
+// Layout of interpreter frame:
+//
+//    [ monitor entry            ] <--- sp
+//      ...
+//    [ monitor entry            ]
+// -9 [ monitor block top        ] ( the top monitor entry )
+// -8 [ byte code pointer        ] (if native, bcp = 0)
+// -7 [ constant pool cache      ]
+// -6 [ methodData               ] mdx_offset(not core only)
+// -5 [ mirror                   ]
+// -4 [ methodOop                ]
+// -3 [ locals offset            ]
+// -2 [ last_sp                  ]
+// -1 [ sender's sp              ]
+//  0 [ sender's fp              ] <--- fp
+//  1 [ return address           ]
+//  2 [ oop temp offset          ] (only for native calls)
+//  3 [ result handler offset    ] (only for native calls)
+//  4 [ result type info         ] (only for native calls)
+//    [ local var m-1            ]
+//      ...
+//    [ local var 0              ]
+//    [ argumnet word n-1        ] <--- ( sender's sp )
+//        ...
+//    [ argument word 0          ] <--- S7
+
+ public:
+  enum {
+    pc_return_offset                                 =  0,
+    // All frames
+    link_offset                                      =  0,
+    return_addr_offset                               =  1,
+    // non-interpreter frames
+    sender_sp_offset                                 =  2,
+
+#ifndef CC_INTERP
+
+    // Interpreter frames
+    interpreter_frame_return_addr_offset             =  1,
+    interpreter_frame_result_handler_offset          =  3, // for native calls only
+    interpreter_frame_oop_temp_offset                =  2, // for native calls only
+
+    interpreter_frame_sender_fp_offset               =  0,
+    interpreter_frame_sender_sp_offset               = -1,
+    // outgoing sp before a call to an invoked method
+    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
+    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
+    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
+    interpreter_frame_mdx_offset                     = interpreter_frame_method_offset - 1,
+    interpreter_frame_cache_offset                   = interpreter_frame_mdx_offset - 1,
+    interpreter_frame_bcx_offset                     = interpreter_frame_cache_offset - 1,
+    interpreter_frame_initial_sp_offset              = interpreter_frame_bcx_offset - 1,
+
+    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
+    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
+
+#endif // CC_INTERP
+
+    // Entry frames
+    entry_frame_call_wrapper_offset                  =  -9,
+
+    // Native frames
+
+    native_frame_initial_param_offset                =  2
+
+  };
+
+  intptr_t ptr_at(int offset) const {
+    return *ptr_at_addr(offset);
+  }
+
+  void ptr_at_put(int offset, intptr_t value) {
+    *ptr_at_addr(offset) = value;
+  }
+
+ private:
+  // an additional field beyond _sp and _pc:
+  intptr_t*   _fp; // frame pointer
+  // The interpreter and adapters will extend the frame of the caller.
+  // Since oopMaps are based on the sp of the caller before extension
+  // we need to know that value. However in order to compute the address
+  // of the return address we need the real "raw" sp. Since sparc already
+  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
+  // original sp we use that convention.
+
+  intptr_t*     _unextended_sp;
+  void adjust_unextended_sp();
+
+  intptr_t* ptr_at_addr(int offset) const {
+    return (intptr_t*) addr_at(offset);
+  }
+#ifdef ASSERT
+  // Used in frame::sender_for_{interpreter,compiled}_frame
+  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
+  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
+    verify_deopt_original_pc(nm, unextended_sp, true);
+  }
+#endif
+
+ public:
+  // Constructors
+
+  frame(intptr_t* sp, intptr_t* fp, address pc);
+
+  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
+
+  frame(intptr_t* sp, intptr_t* fp);
+
+  void init(intptr_t* sp, intptr_t* fp, address pc);
+
+  // accessors for the instance variables
+  intptr_t*   fp() const { return _fp; }
+
+  inline address* sender_pc_addr() const;
+
+  // return address of param, zero origin index.
+  inline address* native_param_addr(int idx) const;
+
+  // expression stack tos if we are nested in a java call
+  intptr_t* interpreter_frame_last_sp() const;
+
+  // helper to update a map with callee-saved FP
+  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
+
+#ifndef CC_INTERP
+  // deoptimization support
+  void interpreter_frame_set_last_sp(intptr_t* sp);
+#endif // CC_INTERP
+
+#ifdef CC_INTERP
+  inline interpreterState get_interpreterState() const;
+#endif // CC_INTERP
+
+#endif // CPU_MIPS_VM_FRAME_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp b/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp
new file mode 100644
index 00000000000..60e56ac7aba
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/frame_mips.inline.hpp
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
+#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
+
+#include "code/codeCache.hpp"
+
+// Inline functions for Loongson frames:
+
+// Constructors:
+
+inline frame::frame() {
+  _pc = NULL;
+  _sp = NULL;
+  _unextended_sp = NULL;
+  _fp = NULL;
+  _cb = NULL;
+  _deopt_state = unknown;
+}
+
+inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
+  _sp = sp;
+  _unextended_sp = sp;
+  _fp = fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
+  init(sp, fp, pc);
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
+  _sp = sp;
+  _unextended_sp = unextended_sp;
+  _fp = fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* fp) {
+  _sp = sp;
+  _unextended_sp = sp;
+  _fp = fp;
+  _pc = (address)(sp[-1]);
+
+  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
+  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
+  // unlucky the junk value could be to a zombied method and we'll die on the
+  // find_blob call. This is also why we can have no asserts on the validity
+  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
+  // -> pd_last_frame should use a specialized version of pd_last_frame which could
+  // call a specilaized frame constructor instead of this one.
+  // Then we could use the assert below. However this assert is of somewhat dubious
+  // value.
+  // assert(_pc != NULL, "no pc?");
+
+  _cb = CodeCache::find_blob(_pc);
+  adjust_unextended_sp();
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+// Accessors
+
+inline bool frame::equal(frame other) const {
+  bool ret =  sp() == other.sp()
+              && unextended_sp() == other.unextended_sp()
+              && fp() == other.fp()
+              && pc() == other.pc();
+  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
+  return ret;
+}
+
+// Return unique id for this frame. The id must have a value where we can distinguish
+// identity and younger/older relationship. NULL represents an invalid (incomparable)
+// frame.
+inline intptr_t* frame::id(void) const { return unextended_sp(); }
+
+// Relationals on frames based
+// Return true if the frame is younger (more recent activation) than the frame represented by id
+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() < id ; }
+
+// Return true if the frame is older (less recent activation) than the frame represented by id
+inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() > id ; }
+
+
+
+inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
+inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
+
+
+inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
+
+// Return address:
+
+inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
+inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
+
+// return address of param, zero origin index.
+inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); }
+
+#ifdef CC_INTERP
+
+inline interpreterState frame::get_interpreterState() const {
+  return ((interpreterState)addr_at( -sizeof(BytecodeInterpreter)/wordSize ));
+}
+
+inline intptr_t*    frame::sender_sp()        const {
+  // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames?
+  if (is_interpreted_frame()) {
+    assert(false, "should never happen");
+    return get_interpreterState()->sender_sp();
+  } else {
+    return            addr_at(sender_sp_offset);
+  }
+}
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_locals);
+}
+
+inline intptr_t* frame::interpreter_frame_bcx_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return (intptr_t*) &(get_interpreterState()->_bcp);
+}
+
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_constants);
+}
+
+// Method
+
+inline Method** frame::interpreter_frame_method_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_method);
+}
+
+inline intptr_t* frame::interpreter_frame_mdx_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return (intptr_t*) &(get_interpreterState()->_mdx);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  assert(is_interpreted_frame(), "wrong frame type");
+  return get_interpreterState()->_stack + 1;
+}
+
+#else // asm interpreter
+inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_last_sp() const {
+  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_bcx_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_bcx_offset);
+}
+
+
+inline intptr_t* frame::interpreter_frame_mdx_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_mdx_offset);
+}
+
+
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
+}
+
+// Method
+
+inline Method** frame::interpreter_frame_method_addr() const {
+  return (Method**)addr_at(interpreter_frame_method_offset);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  intptr_t* last_sp = interpreter_frame_last_sp();
+  if (last_sp == NULL ) {
+    return sp();
+  } else {
+    // sp() may have been extended by an adapter
+    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
+    return last_sp;
+  }
+}
+
+inline oop* frame::interpreter_frame_temp_oop_addr() const {
+  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
+}
+
+#endif // CC_INTERP
+
+inline int frame::pd_oop_map_offset_adjustment() const {
+  return 0;
+}
+
+inline int frame::interpreter_frame_monitor_size() {
+  return BasicObjectLock::size();
+}
+
+
+// expression stack
+// (the max_stack arguments are used by the GC; see class FrameClosure)
+
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
+  return monitor_end-1;
+}
+
+
+inline jint frame::interpreter_frame_expression_stack_direction() { return -1; }
+
+
+// Entry frames
+
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
+}
+
+// Compiled frames
+
+inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return (nof_args - local_index + (local_index < nof_args ? 1: -1));
+}
+
+inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors);
+}
+
+inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1);
+}
+
+inline bool frame::volatile_across_calls(Register reg) {
+  return true;
+}
+
+
+
+inline oop frame::saved_oop_result(RegisterMap* map) const       {
+  return *((oop*) map->location(V0->as_VMReg()));
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+  *((oop*) map->location(V0->as_VMReg())) = obj;
+}
+
+#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
diff --git a/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp b/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp
new file mode 100644
index 00000000000..bd00a8d473d
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/globalDefinitions_mips.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
+#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
+// Size of MIPS Instructions
+const int BytesPerInstWord = 4;
+
+const int StackAlignmentInBytes = (2*wordSize);
+
+// Indicates whether the C calling conventions require that
+// 32-bit integer argument values are properly extended to 64 bits.
+// If set, SharedRuntime::c_calling_convention() must adapt
+// signatures accordingly.
+const bool CCallingConventionRequiresIntsAsLongs = false;
+
+#define SUPPORTS_NATIVE_CX8
+
+#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/globals_mips.hpp b/hotspot/src/cpu/mips/vm/globals_mips.hpp
new file mode 100644
index 00000000000..988bc35137d
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/globals_mips.hpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP
+#define CPU_MIPS_VM_GLOBALS_MIPS_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+#ifdef CORE
+define_pd_global(bool,  UseSSE,      0);
+#endif /* CORE */
+define_pd_global(bool,  ConvertSleepToYield,      true);
+define_pd_global(bool,  ShareVtableStubs,         true);
+define_pd_global(bool,  CountInterpCalls,         true);
+
+define_pd_global(bool, ImplicitNullChecks,          true);  // Generate code for implicit null checks
+define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
+define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
+define_pd_global(bool, NeedsDeoptSuspend,           false); // only register window machines need this
+
+// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
+// assign a different value for C2 without touching a number of files. Use
+// #ifdef to minimize the change as it's late in Mantis. -- FIXME.
+// c1 doesn't have this problem because the fix to 4858033 assures us
+// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
+// the uep and the vep doesn't get real alignment but just slops on by
+// only assured that the entry instruction meets the 5 byte size requirement.
+define_pd_global(intx,  CodeEntryAlignment,      16);
+define_pd_global(intx, OptoLoopAlignment,        16);
+define_pd_global(intx, InlineFrequencyCount,     100);
+define_pd_global(intx, InlineSmallCode,          4000); // MIPS generates 3x instructions than X86
+
+define_pd_global(uintx, TLABSize,                 0);
+define_pd_global(uintx, NewSize,                  1024 * K);
+define_pd_global(intx,  PreInflateSpin,      10);
+
+define_pd_global(intx, PrefetchCopyIntervalInBytes, -1);
+define_pd_global(intx, PrefetchScanIntervalInBytes, -1);
+define_pd_global(intx, PrefetchFieldsAhead,         -1);
+
+define_pd_global(intx, StackYellowPages, 2);
+define_pd_global(intx, StackRedPages, 1);
+define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1));
+
+define_pd_global(bool, RewriteBytecodes,     true);
+define_pd_global(bool, RewriteFrequentPairs, true);
+define_pd_global(bool, UseMembar,            true);
+// GC Ergo Flags
+define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
+
+define_pd_global(uintx, TypeProfileLevel, 111);
+
+define_pd_global(bool, PreserveFramePointer, false);
+// Only c2 cares about this at the moment
+define_pd_global(intx, AllocatePrefetchStyle,        2);
+define_pd_global(intx, AllocatePrefetchDistance,     -1);
+
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+                                                                            \
+  product(bool, UseLEXT1, false,                                            \
+                "Use LoongISA general EXTensions 1")                        \
+                                                                            \
+  product(bool, UseLEXT2, false,                                            \
+                "Use LoongISA general EXTensions 2")                        \
+                                                                            \
+  product(bool, UseLEXT3, false,                                            \
+                "Use LoongISA general EXTensions 3")                        \
+                                                                            \
+  product(bool, UseCodeCacheAllocOpt, true,                                 \
+                "Allocate code cache within 32-bit memory address space")   \
+                                                                            \
+  product(intx, UseSyncLevel, 10000,                                        \
+                "The sync level on Loongson CPUs"                           \
+                "UseSyncLevel == 10000, 111, for all Loongson CPUs, "       \
+                "UseSyncLevel == 4000, 101, maybe for GS464V"               \
+                "UseSyncLevel == 3000, 001, maybe for GS464V"               \
+                "UseSyncLevel == 2000, 011, maybe for GS464E/GS264"         \
+                "UseSyncLevel == 1000, 110, maybe for GS464")               \
+                                                                            \
+  develop(bool, UseBoundCheckInstruction, false,                            \
+                "Use bound check instruction")                              \
+                                                                            \
+  product(intx, SetFSFOFN, 999,                                             \
+          "Set the FS/FO/FN bits in FCSR"                                   \
+          "999 means FS/FO/FN will not be changed"                          \
+          "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on")         \
+                                                                            \
+  /* assembler */                                                           \
+  product(bool, UseCountLeadingZerosInstructionMIPS64, true,                \
+          "Use count leading zeros instruction")                            \
+                                                                            \
+  product(bool, UseCountTrailingZerosInstructionMIPS64, false,              \
+          "Use count trailing zeros instruction")                           \
+                                                                            \
+  product(bool, UseActiveCoresMP, false,                                    \
+                "Eliminate barriers for single active cpu")
+
+#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp b/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp
new file mode 100644
index 00000000000..96ea3453606
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/icBuffer_mips.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/icBuffer.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_mips.hpp"
+#include "oops/oop.inline.hpp"
+#include "oops/oop.inline2.hpp"
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+int InlineCacheBuffer::ic_stub_code_size() {
+  return NativeMovConstReg::instruction_size +
+         NativeGeneralJump::instruction_size +
+         1;
+  // so that code_end can be set in CodeBuffer
+  // 64bit 15 = 6 + 8 bytes + 1 byte
+  // 32bit 7 = 2 + 4 bytes + 1 byte
+}
+
+
+// we use T1 as cached oop(klass) now. this is the target of virtual call,
+// when reach here, the receiver in T0
+// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
+  ResourceMark rm;
+  CodeBuffer      code(code_begin, ic_stub_code_size());
+  MacroAssembler* masm            = new MacroAssembler(&code);
+  // note: even though the code contains an embedded oop, we do not need reloc info
+  // because
+  // (1) the oop is old (i.e., doesn't matter for scavenges)
+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
+//  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
+#define __ masm->
+  __ patchable_set48(T1, (long)cached_value);
+
+  __ patchable_jump(entry_point);
+  __ flush();
+#undef __
+}
+
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
+  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
+  return jump->jump_destination();
+}
+
+
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
+  // creation also verifies the object
+  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
+  // Verifies the jump
+  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
+  void* o= (void*)move->data();
+  return o;
+}
diff --git a/hotspot/src/cpu/mips/vm/icache_mips.cpp b/hotspot/src/cpu/mips/vm/icache_mips.cpp
new file mode 100644
index 00000000000..848964b63f6
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/icache_mips.cpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "runtime/icache.hpp"
+
+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
+{
+#define __ _masm->
+  StubCodeMark mark(this, "ICache", "flush_icache_stub");
+  address start = __ pc();
+
+  __ jr_hb(RA);
+  __ delayed()->ori(V0, RA2, 0);
+
+  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
+#undef __
+}
diff --git a/hotspot/src/cpu/mips/vm/icache_mips.hpp b/hotspot/src/cpu/mips/vm/icache_mips.hpp
new file mode 100644
index 00000000000..78ee11cc733
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/icache_mips.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP
+#define CPU_MIPS_VM_ICACHE_MIPS_HPP
+
+// Interface for updating the instruction cache.  Whenever the VM modifies
+// code, part of the processor instruction cache potentially has to be flushed.
+
+class ICache : public AbstractICache {
+ public:
+  enum {
+    stub_size      = 2 * BytesPerInstWord, // Size of the icache flush stub in bytes
+    line_size      = 32,  // flush instruction affects a dword
+    log2_line_size = 5    // log2(line_size)
+  };
+};
+
+#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp
new file mode 100644
index 00000000000..ed2d931e94c
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.cpp
@@ -0,0 +1,2084 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interp_masm_mips_64.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiRedefineClassesTrace.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.inline.hpp"
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+// Implementation of InterpreterMacroAssembler
+
+#ifdef CC_INTERP
+void InterpreterMacroAssembler::get_method(Register reg) {
+}
+#endif // CC_INTERP
+
+void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
+  // The runtime address of BCP may be unaligned.
+  // Refer to the SPARC implementation.
+  lbu(reg, BCP, offset+1);
+  lbu(tmp, BCP, offset);
+  dsll(reg, reg, 8);
+  daddu(reg, tmp, reg);
+}
+
+void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
+  assert(reg != tmp, "need separate temp register");
+  if (offset & 3) { // Offset unaligned?
+    lbu(reg, BCP, offset+3);
+    lbu(tmp, BCP, offset+2);
+    dsll(reg, reg, 8);
+    daddu(reg, tmp, reg);
+    lbu(tmp, BCP, offset+1);
+    dsll(reg, reg, 8);
+    daddu(reg, tmp, reg);
+    lbu(tmp, BCP, offset);
+    dsll(reg, reg, 8);
+    daddu(reg, tmp, reg);
+  } else {
+    lwu(reg, BCP, offset);
+  }
+}
+
+#ifndef CC_INTERP
+
+void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
+                                                  int number_of_arguments) {
+  // interpreter specific
+  //
+  // Note: No need to save/restore bcp & locals (r13 & r14) pointer
+  //       since these are callee saved registers and no blocking/
+  //       GC can happen in leaf calls.
+  // Further Note: DO NOT save/restore bcp/locals. If a caller has
+  // already saved them so that it can use BCP/LVP as temporaries
+  // then a save/restore here will DESTROY the copy the caller
+  // saved! There used to be a save_bcp() that only happened in
+  // the ASSERT path (no restore_bcp). Which caused bizarre failures
+  // when jvm built with ASSERTs.
+#ifdef ASSERT
+  save_bcp();
+  {
+    Label L;
+    ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
+    beq(AT,R0,L);
+    delayed()->nop();
+    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
+    bind(L);
+  }
+#endif
+  // super call
+  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
+  // interpreter specific
+  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
+  // but since they may not have been saved (and we don't want to
+  // save them here (see note above) the assert is invalid.
+}
+
+void InterpreterMacroAssembler::call_VM_base(Register oop_result,
+                                             Register java_thread,
+                                             Register last_java_sp,
+                                             address  entry_point,
+                                             int      number_of_arguments,
+                                             bool     check_exceptions) {
+  // interpreter specific
+  //
+  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
+  //       really make a difference for these runtime calls, since they are
+  //       slow anyway. Btw., bcp must be saved/restored since it may change
+  //       due to GC.
+  assert(java_thread == noreg , "not expecting a precomputed java thread");
+  save_bcp();
+#ifdef ASSERT
+  {
+    Label L;
+    ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+    beq(AT, R0, L);
+    delayed()->nop();
+    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
+    bind(L);
+  }
+#endif /* ASSERT */
+  // super call
+  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
+                               entry_point, number_of_arguments,
+                               check_exceptions);
+  // interpreter specific
+  restore_bcp();
+  restore_locals();
+}
+
+
+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
+  if (JvmtiExport::can_pop_frame()) {
+    Label L;
+    // Initiate popframe handling only if it is not already being
+    // processed.  If the flag has the popframe_processing bit set, it
+    // means that this code is called *during* popframe handling - we
+    // don't want to reenter.
+    // This method is only called just after the call into the vm in
+    // call_VM_base, so the arg registers are available.
+    // Not clear if any other register is available, so load AT twice
+    assert(AT != java_thread, "check");
+    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
+    andi(AT, AT, JavaThread::popframe_pending_bit);
+    beq(AT, R0, L);
+    delayed()->nop();
+
+    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
+    andi(AT, AT, JavaThread::popframe_processing_bit);
+    bne(AT, R0, L);
+    delayed()->nop();
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
+    jr(V0);
+    delayed()->nop();
+    bind(L);
+  }
+}
+
+
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
+  Register thread = T8;
+#ifndef OPT_THREAD
+  get_thread(thread);
+#else
+  move(T8, TREG);
+#endif
+  ld_ptr(thread, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
+  const Address tos_addr (thread, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
+  const Address oop_addr (thread, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
+  const Address val_addr (thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+  //V0, oop_addr,V1,val_addr
+  switch (state) {
+    case atos:
+      ld_ptr(V0, oop_addr);
+      st_ptr(R0, oop_addr);
+      verify_oop(V0, state);
+      break;
+    case ltos:
+      ld_ptr(V0, val_addr);               // fall through
+      break;
+    case btos:                                     // fall through
+    case ztos:                                     // fall through
+    case ctos:                                     // fall through
+    case stos:                                     // fall through
+    case itos:
+      lw(V0, val_addr);
+      break;
+    case ftos:
+      lwc1(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+      break;
+    case dtos:
+      ldc1(F0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+      break;
+    case vtos: /* nothing to do */                    break;
+    default  : ShouldNotReachHere();
+  }
+  // Clean up tos value in the thread object
+  move(AT, (int)ilgl);
+  sw(AT, tos_addr);
+  sw(R0, thread, in_bytes(JvmtiThreadState::earlyret_value_offset()));
+}
+
+
+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
+  if (JvmtiExport::can_force_early_return()) {
+    Label L;
+    Register tmp = T9;
+
+    assert(java_thread != AT, "check");
+    assert(java_thread != tmp, "check");
+    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
+    beq(AT, R0, L);
+    delayed()->nop();
+
+    // Initiate earlyret handling only if it is not already being processed.
+    // If the flag has the earlyret_processing bit set, it means that this code
+    // is called *during* earlyret handling - we don't want to reenter.
+    lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
+    move(tmp, JvmtiThreadState::earlyret_pending);
+    bne(tmp, AT, L);
+    delayed()->nop();
+
+    // Call Interpreter::remove_activation_early_entry() to get the address of the
+    // same-named entrypoint in the generated interpreter code.
+    ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
+    lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
+    move(A0, AT);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
+    jr(V0);
+    delayed()->nop();
+    bind(L);
+  }
+}
+
+
+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
+                                                                 int bcp_offset) {
+  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
+  lbu(AT, BCP, bcp_offset);
+  lbu(reg, BCP, bcp_offset + 1);
+  ins(reg, AT, 8, 8);
+}
+
+
+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
+                                                       int bcp_offset,
+                                                       size_t index_size) {
+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  if (index_size == sizeof(u2)) {
+    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
+  } else if (index_size == sizeof(u4)) {
+    assert(EnableInvokeDynamic, "giant index used only for JSR 292");
+    get_4_byte_integer_at_bcp(index, AT, bcp_offset);
+    // Check if the secondary index definition is still ~x, otherwise
+    // we have to change the following assembler code to calculate the
+    // plain index.
+    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
+    nor(index, index, R0);
+    sll(index, index, 0);
+  } else if (index_size == sizeof(u1)) {
+    lbu(index, BCP, bcp_offset);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
+                                                           Register index,
+                                                           int bcp_offset,
+                                                           size_t index_size) {
+  assert_different_registers(cache, index);
+  get_cache_index_at_bcp(index, bcp_offset, index_size);
+  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
+  shl(index, 2);
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+                                                                        Register index,
+                                                                        Register bytecode,
+                                                                        int byte_no,
+                                                                        int bcp_offset,
+                                                                        size_t index_size) {
+  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
+  // We use a 32-bit load here since the layout of 64-bit words on
+  // little-endian machines allow us that.
+  dsll(AT, index, Address::times_ptr);
+  daddu(AT, cache, AT);
+  lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
+  if(os::is_MP()) {
+    sync(); // load acquire
+  }
+
+  const int shift_count = (1 + byte_no) * BitsPerByte;
+  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
+         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
+         "correct shift count");
+  dsrl(bytecode, bytecode, shift_count);
+  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
+  move(AT, ConstantPoolCacheEntry::bytecode_1_mask);
+  andr(bytecode, bytecode, AT);
+}
+
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
+                                                               Register tmp,
+                                                               int bcp_offset,
+                                                               size_t index_size) {
+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  assert(cache != tmp, "must use different register");
+  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+  // convert from field index to ConstantPoolCacheEntry index
+  // and from word offset to byte offset
+  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
+  shl(tmp, 2 + LogBytesPerWord);
+  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
+  // skip past the header
+  daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
+  daddu(cache, cache, tmp);
+}
+
+void InterpreterMacroAssembler::get_method_counters(Register method,
+                                                    Register mcs, Label& skip) {
+  Label has_counters;
+  ld(mcs, method, in_bytes(Method::method_counters_offset()));
+  bne(mcs, R0, has_counters);
+  delayed()->nop();
+  call_VM(noreg, CAST_FROM_FN_PTR(address,
+          InterpreterRuntime::build_method_counters), method);
+  ld(mcs, method, in_bytes(Method::method_counters_offset()));
+  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
+  delayed()->nop();
+  bind(has_counters);
+}
+
+// Load object from cpool->resolved_references(index)
+void InterpreterMacroAssembler::load_resolved_reference_at_index(
+                                           Register result, Register index) {
+  assert_different_registers(result, index);
+  // convert from field index to resolved_references() index and from
+  // word index to byte offset. Since this is a java object, it can be compressed
+  Register tmp = index;  // reuse
+  shl(tmp, LogBytesPerHeapOop);
+
+  get_constant_pool(result);
+  // load pointer for resolved_references[] objArray
+  ld(result, result, ConstantPool::resolved_references_offset_in_bytes());
+  // JNIHandles::resolve(obj);
+  ld(result, result, 0); //? is needed?
+  // Add in the index
+  daddu(result, result, tmp);
+  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+}
+
+// Resets LVP to locals.  Register sub_klass cannot be any of the above.
+void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
+  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
+  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
+  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
+  // Profile the not-null value's klass.
+  // Here T9 and T1 are used as temporary registers.
+  profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1
+
+  // Do the check.
+  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
+
+  // Profile the failure of the check.
+  profile_typecheck_failed(T9); // blows T9
+}
+
+
+
+// Java Expression Stack
+
+void InterpreterMacroAssembler::pop_ptr(Register r) {
+  ld(r, SP, 0);
+  daddiu(SP, SP, Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::pop_i(Register r) {
+  lw(r, SP, 0);
+  daddiu(SP, SP, Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::pop_l(Register r) {
+  ld(r, SP, 0);
+  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::pop_f(FloatRegister r) {
+  lwc1(r, SP, 0);
+  daddiu(SP, SP, Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::pop_d(FloatRegister r) {
+  ldc1(r, SP, 0);
+  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r) {
+  daddiu(SP, SP, - Interpreter::stackElementSize);
+  sd(r, SP, 0);
+}
+
+void InterpreterMacroAssembler::push_i(Register r) {
+  // For compatibility reason, don't change to sw.
+  daddiu(SP, SP, - Interpreter::stackElementSize);
+  sd(r, SP, 0);
+}
+
+void InterpreterMacroAssembler::push_l(Register r) {
+  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
+  sd(r, SP, 0);
+  sd(R0, SP, Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::push_f(FloatRegister r) {
+  daddiu(SP, SP, - Interpreter::stackElementSize);
+  swc1(r, SP, 0);
+}
+
+void InterpreterMacroAssembler::push_d(FloatRegister r) {
+  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
+  sdc1(r, SP, 0);
+  sd(R0, SP, Interpreter::stackElementSize);
+}
+
+void InterpreterMacroAssembler::pop(TosState state) {
+  switch (state) {
+    case atos: pop_ptr();           break;
+    case btos:
+    case ztos:
+    case ctos:
+    case stos:
+    case itos: pop_i();             break;
+    case ltos: pop_l();             break;
+    case ftos: pop_f();             break;
+    case dtos: pop_d();             break;
+    case vtos: /* nothing to do */  break;
+    default:   ShouldNotReachHere();
+  }
+  verify_oop(FSR, state);
+}
+
+//FSR=V0,SSR=V1
+void InterpreterMacroAssembler::push(TosState state) {
+  verify_oop(FSR, state);
+  switch (state) {
+    case atos: push_ptr();          break;
+    case btos:
+    case ztos:
+    case ctos:
+    case stos:
+    case itos: push_i();            break;
+    case ltos: push_l();            break;
+    case ftos: push_f();            break;
+    case dtos: push_d();            break;
+    case vtos: /* nothing to do */  break;
+    default  : ShouldNotReachHere();
+  }
+}
+
+
+
+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
+  ld(val, SP, Interpreter::expr_offset_in_bytes(n));
+}
+
+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
+  sd(val, SP, Interpreter::expr_offset_in_bytes(n));
+}
+
+// Jump to from_interpreted entry of a call unless single stepping is possible
+// in this thread in which case we must call the i2i entry
+void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
+  // record last_sp
+  move(Rsender, SP);
+  sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled.  Check here for
+    // interp_only_mode if these events CAN be enabled.
+#ifndef OPT_THREAD
+    get_thread(temp);
+#else
+    move(temp, TREG);
+#endif
+    // interp_only is an int, on little endian it is sufficient to test the byte only
+    // Is a cmpl faster?
+    lw(AT, temp, in_bytes(JavaThread::interp_only_mode_offset()));
+    beq(AT, R0, run_compiled_code);
+    delayed()->nop();
+    ld(AT, method, in_bytes(Method::interpreter_entry_offset()));
+    jr(AT);
+    delayed()->nop();
+    bind(run_compiled_code);
+  }
+
+  ld(AT, method, in_bytes(Method::from_interpreted_offset()));
+  jr(AT);
+  delayed()->nop();
+}
+
+
+// The following two routines provide a hook so that an implementation
+// can schedule the dispatch in two parts.  mips64 does not do this.
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
+  // Nothing mips64 specific to be done here
+}
+
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
+  dispatch_next(state, step);
+}
+
+// assume the next bytecode in T8.
+void InterpreterMacroAssembler::dispatch_base(TosState state,
+                                              address* table,
+                                              bool verifyoop) {
+  if (VerifyActivationFrameSize) {
+    Label L;
+
+    dsubu(T2, FP, SP);
+    int min_frame_size = (frame::link_offset -
+      frame::interpreter_frame_initial_sp_offset) * wordSize;
+    daddiu(T2, T2,- min_frame_size);
+    bgez(T2, L);
+    delayed()->nop();
+    stop("broken stack frame");
+    bind(L);
+  }
+  // FIXME: I do not know which register should pass to verify_oop
+  if (verifyoop) verify_oop(FSR, state);
+  dsll(T2, Rnext, LogBytesPerWord);
+
+  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
+     (long)table <= (long)Interpreter::dispatch_table(vtos)
+    ) {
+     int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos);
+     int table_offset = ((int)state - (int)itos) * table_size;
+
+     // GP points to the starting address of Interpreter::dispatch_table(itos).
+     // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP.
+     if(table_offset != 0) {
+        daddiu(T3, GP, table_offset);
+        if (UseLEXT1) {
+          gsldx(T3, T2, T3, 0);
+        } else {
+          daddu(T3, T2, T3);
+          ld(T3, T3, 0);
+        }
+     } else {
+        if (UseLEXT1) {
+          gsldx(T3, T2, GP, 0);
+        } else {
+          daddu(T3, T2, GP);
+          ld(T3, T3, 0);
+        }
+     }
+  } else {
+     li(T3, (long)table);
+     if (UseLEXT1) {
+       gsldx(T3, T2, T3, 0);
+     } else {
+       daddu(T3, T2, T3);
+       ld(T3, T3, 0);
+     }
+  }
+  jr(T3);
+  delayed()->nop();
+}
+
+void InterpreterMacroAssembler::dispatch_only(TosState state) {
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state), false);
+}
+
+
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
+  // load next bytecode (load before advancing r13 to prevent AGI)
+  lbu(Rnext, BCP, step);
+  increment(BCP, step);
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
+  // load current bytecode
+  lbu(Rnext, BCP, 0);
+  dispatch_base(state, table);
+}
+
+// remove activation
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+// Remove the activation from the stack.
+//
+// If there are locked Java monitors
+//    If throw_monitor_exception
+//       throws IllegalMonitorStateException
+//    Else if install_monitor_exception
+//       installs IllegalMonitorStateException
+//    Else
+//       no error processing
+// used registers : T1, T2, T3, T8
+// T1 : thread, method access flags
+// T2 : monitor entry pointer
+// T3 : method, monitor top
+// T8 : unlock flag
+void InterpreterMacroAssembler::remove_activation(
+        TosState state,
+        Register ret_addr,
+        bool throw_monitor_exception,
+        bool install_monitor_exception,
+  bool notify_jvmdi) {
+  // Note: Registers V0, V1 and F0, F1 may be in use for the result
+  // check if synchronized method
+  Label unlocked, unlock, no_unlock;
+
+  // get the value of _do_not_unlock_if_synchronized into T8
+#ifndef OPT_THREAD
+  Register thread = T1;
+  get_thread(thread);
+#else
+  Register thread = TREG;
+#endif
+  lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  // reset the flag
+  sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  // get method access flags
+  ld(T3, FP, frame::interpreter_frame_method_offset * wordSize);
+  lw(T1, T3, in_bytes(Method::access_flags_offset()));
+  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
+  beq(T1, R0, unlocked);
+  delayed()->nop();
+
+  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
+  bne(T8, R0, no_unlock);
+  delayed()->nop();
+  // unlock monitor
+  push(state); // save result
+
+  // BasicObjectLock will be first in list, since this is a
+  // synchronized method. However, need to check that the object has
+  // not been unlocked by an explicit monitorexit bytecode.
+  daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
+      - (int)sizeof(BasicObjectLock));
+  // address of first monitor
+  ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
+  bne(T1, R0, unlock);
+  delayed()->nop();
+  pop(state);
+  if (throw_monitor_exception) {
+    // Entry already unlocked, need to throw exception
+    // I think mips do not need empty_FPU_stack
+    // remove possible return value from FPU-stack, otherwise stack could overflow
+    empty_FPU_stack();
+    call_VM(NOREG, CAST_FROM_FN_PTR(address,
+    InterpreterRuntime::throw_illegal_monitor_state_exception));
+    should_not_reach_here();
+  } else {
+    // Monitor already unlocked during a stack unroll. If requested,
+    // install an illegal_monitor_state_exception.  Continue with
+    // stack unrolling.
+    if (install_monitor_exception) {
+      // remove possible return value from FPU-stack,
+      // otherwise stack could overflow
+      empty_FPU_stack();
+      call_VM(NOREG, CAST_FROM_FN_PTR(address,
+      InterpreterRuntime::new_illegal_monitor_state_exception));
+
+    }
+
+    b(unlocked);
+    delayed()->nop();
+  }
+
+  bind(unlock);
+  unlock_object(c_rarg0);
+  pop(state);
+
+  // Check that for block-structured locking (i.e., that all locked
+  // objects has been unlocked)
+  bind(unlocked);
+
+  // V0, V1: Might contain return value
+
+  // Check that all monitors are unlocked
+  {
+    Label loop, exception, entry, restart;
+    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+    const Address monitor_block_top(FP,
+        frame::interpreter_frame_monitor_block_top_offset * wordSize);
+
+    bind(restart);
+    // points to current entry, starting with top-most entry
+    ld(c_rarg0, monitor_block_top);
+    // points to word before bottom of monitor block
+    daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
+    b(entry);
+    delayed()->nop();
+
+    // Entry already locked, need to throw exception
+    bind(exception);
+
+    if (throw_monitor_exception) {
+      // Throw exception
+      // remove possible return value from FPU-stack,
+      // otherwise stack could overflow
+      empty_FPU_stack();
+      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::throw_illegal_monitor_state_exception));
+      should_not_reach_here();
+    } else {
+      // Stack unrolling. Unlock object and install illegal_monitor_exception
+      // Unlock does not block, so don't have to worry about the frame
+      // We don't have to preserve c_rarg0, since we are going to
+      // throw an exception
+
+      push(state);
+      unlock_object(c_rarg0);
+      pop(state);
+
+      if (install_monitor_exception) {
+        empty_FPU_stack();
+        call_VM(NOREG, CAST_FROM_FN_PTR(address,
+                                        InterpreterRuntime::new_illegal_monitor_state_exception));
+      }
+
+      b(restart);
+      delayed()->nop();
+    }
+
+    bind(loop);
+    ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
+    bne(T1, R0, exception);// check if current entry is used
+    delayed()->nop();
+
+    daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
+    bind(entry);
+    bne(c_rarg0, T3, loop);  // check if bottom reached
+    delayed()->nop();  // if not at bottom then check this entry
+  }
+
+  bind(no_unlock);
+
+  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
+  if (notify_jvmdi) {
+    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
+  } else {
+    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
+  }
+
+  // remove activation
+  ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
+  ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize);
+  ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
+}
+
+#endif // C_INTERP
+
+// Lock object
+//
+// Args:
+//      c_rarg1: BasicObjectLock to be used for locking
+//
+// Kills:
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
+//      rscratch1, rscratch2 (scratch regs)
+void InterpreterMacroAssembler::lock_object(Register lock_reg) {
+  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
+
+  if (UseHeavyMonitors) {
+    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+            lock_reg);
+  } else {
+    Label done;
+
+    const Register swap_reg = T2;  // Must use T2 for cmpxchg instruction
+    const Register obj_reg  = T1;  // Will contain the oop
+
+    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
+    const int mark_offset = lock_offset +
+                            BasicLock::displaced_header_offset_in_bytes();
+
+    Label slow_case;
+
+    // Load object pointer into obj_reg %T1
+    ld(obj_reg, lock_reg, obj_offset);
+
+    if (UseBiasedLocking) {
+      // Note: we use noreg for the temporary register since it's hard
+      // to come up with a free register on all incoming code paths
+      biased_locking_enter(lock_reg, obj_reg, swap_reg, noreg, false, done, &slow_case);
+    }
+
+
+    // Load (object->mark() | 1) into swap_reg %T2
+    ld(AT, obj_reg, 0);
+    ori(swap_reg, AT, 1);
+
+
+    // Save (object->mark() | 1) into BasicLock's displaced header
+    sd(swap_reg, lock_reg, mark_offset);
+
+    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
+    //if (os::is_MP()) {
+      //  lock();
+    //}
+    cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
+
+    if (PrintBiasedLockingStatistics) {
+      Label L;
+      beq(AT, R0, L);
+      delayed()->nop();
+      push(T0);
+      push(T1);
+      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
+      pop(T1);
+      pop(T0);
+      bind(L);
+    }
+
+    bne(AT, R0, done);
+    delayed()->nop();
+
+    // Test if the oopMark is an obvious stack pointer, i.e.,
+    //  1) (mark & 3) == 0, and
+    //  2) SP <= mark < SP + os::pagesize()
+    //
+    // These 3 tests can be done by evaluating the following
+    // expression: ((mark - sp) & (3 - os::vm_page_size())),
+    // assuming both stack pointer and pagesize have their
+    // least significant 2 bits clear.
+    // NOTE: the oopMark is in swap_reg %T2 as the result of cmpxchg
+
+    dsubu(swap_reg, swap_reg, SP);
+    move(AT, 3 - os::vm_page_size());
+    andr(swap_reg, swap_reg, AT);
+    // Save the test result, for recursive case, the result is zero
+    sd(swap_reg, lock_reg, mark_offset);
+    if (PrintBiasedLockingStatistics) {
+      Label L;
+      bne(swap_reg, R0, L);
+      delayed()->nop();
+      push(T0);
+      push(T1);
+      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
+      pop(T1);
+      pop(T0);
+      bind(L);
+    }
+
+    beq(swap_reg, R0, done);
+    delayed()->nop();
+    bind(slow_case);
+    // Call the runtime routine for slow case
+    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
+
+    bind(done);
+  }
+}
+
+
+// Unlocks an object. Used in monitorexit bytecode and
+// remove_activation.  Throws an IllegalMonitorException if object is
+// not locked by current thread.
+//
+// Args:
+//      c_rarg1: BasicObjectLock for lock
+//
+// Kills:
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
+//      rscratch1, rscratch2 (scratch regs)
+// Argument: T6 : Points to BasicObjectLock structure for lock
+// Argument: c_rarg0 : Points to BasicObjectLock structure for lock
+// Throw an IllegalMonitorException if object is not locked by current thread
+void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
+  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
+
+  if (UseHeavyMonitors) {
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
+  } else {
+    Label done;
+
+    const Register swap_reg   = T2;  // Must use T2 for cmpxchg instruction
+    const Register header_reg = T3;  // Will contain the old oopMark
+    const Register obj_reg    = T1;  // Will contain the oop
+
+    save_bcp(); // Save in case of exception
+
+    // Convert from BasicObjectLock structure to object and BasicLock structure
+    // Store the BasicLock address into %T2
+    daddiu(swap_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
+
+    // Load oop into obj_reg(%T1)
+    ld(obj_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes ());
+    //free entry
+    sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
+    if (UseBiasedLocking) {
+      biased_locking_exit(obj_reg, header_reg, done);
+    }
+
+    // Load the old header from BasicLock structure
+    ld(header_reg, swap_reg, BasicLock::displaced_header_offset_in_bytes());
+    // zero for recursive case
+    beq(header_reg, R0, done);
+    delayed()->nop();
+
+    // Atomic swap back the old header
+    if (os::is_MP()); //lock();
+    cmpxchg(header_reg, Address(obj_reg, 0), swap_reg);
+
+    // zero for recursive case
+    bne(AT, R0, done);
+    delayed()->nop();
+
+    // Call the runtime routine for slow case.
+    sd(obj_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
+    call_VM(NOREG,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            lock_reg);
+
+    bind(done);
+
+    restore_bcp();
+  }
+}
+
+#ifndef CC_INTERP
+
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
+                                                         Label& zero_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  ld(mdp, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
+  beq(mdp, R0, zero_continue);
+  delayed()->nop();
+}
+
+
+// Set the method data pointer for the current bcp.
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Label set_mdp;
+
+  // V0 and T0 will be used as two temporary registers.
+  push2(V0, T0);
+
+  get_method(T0);
+  // Test MDO to avoid the call if it is NULL.
+  ld(V0, T0, in_bytes(Method::method_data_offset()));
+  beq(V0, R0, set_mdp);
+  delayed()->nop();
+
+  // method: T0
+  // bcp: BCP --> S0
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
+  // mdi: V0
+  // mdo is guaranteed to be non-zero here, we checked for it before the call.
+  get_method(T0);
+  ld(T0, T0, in_bytes(Method::method_data_offset()));
+  daddiu(T0, T0, in_bytes(MethodData::data_offset()));
+  daddu(V0, T0, V0);
+  bind(set_mdp);
+  sd(V0, FP, frame::interpreter_frame_mdx_offset * wordSize);
+  pop2(V0, T0);
+}
+
+void InterpreterMacroAssembler::verify_method_data_pointer() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+#ifdef ASSERT
+  Label verify_continue;
+  Register method = V0;
+  Register mdp = V1;
+  Register tmp = A0;
+  push(method);
+  push(mdp);
+  push(tmp);
+  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
+  get_method(method);
+
+  // If the mdp is valid, it will point to a DataLayout header which is
+  // consistent with the bcp.  The converse is highly probable also.
+  lhu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
+  ld(AT, method, in_bytes(Method::const_offset()));
+  daddu(tmp, tmp, AT);
+  daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
+  beq(tmp, BCP, verify_continue);
+  delayed()->nop();
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
+  bind(verify_continue);
+  pop(tmp);
+  pop(mdp);
+  pop(method);
+#endif // ASSERT
+}
+
+
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
+                                                int constant,
+                                                Register value) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Address data(mdp_in, constant);
+  sd(value, data);
+}
+
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      int constant,
+                                                      bool decrement) {
+  // Counter address
+  Address data(mdp_in, constant);
+
+  increment_mdp_data_at(data, decrement);
+}
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
+                                                      bool decrement) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  // %%% this does 64bit counters at best it is wasting space
+  // at worst it is a rare bug when counters overflow
+  Register tmp = S0;
+  push(tmp);
+  if (decrement) {
+    // Decrement the register.
+    ld(AT, data);
+    daddiu(tmp, AT, (int32_t) -DataLayout::counter_increment);
+    // If the decrement causes the counter to overflow, stay negative
+    Label L;
+    slt(AT, tmp, R0);
+    bne(AT, R0, L);
+    delayed()->nop();
+    daddiu(tmp, tmp, (int32_t) DataLayout::counter_increment);
+    bind(L);
+    sd(tmp, data);
+  } else {
+    assert(DataLayout::counter_increment == 1,
+           "flow-free idiom only works with 1");
+    ld(AT, data);
+    // Increment the register.
+    daddiu(tmp, AT, DataLayout::counter_increment);
+    // If the increment causes the counter to overflow, pull back by 1.
+    slt(AT, tmp, R0);
+    dsubu(tmp, tmp, AT);
+    sd(tmp, data);
+  }
+  pop(tmp);
+}
+
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      Register reg,
+                                                      int constant,
+                                                      bool decrement) {
+  Register tmp = S0;
+  push(S0);
+  if (decrement) {
+    // Decrement the register.
+    daddu(AT, mdp_in, reg);
+    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
+    ld(AT, AT, constant);
+
+    daddiu(tmp, AT, (int32_t) -DataLayout::counter_increment);
+    // If the decrement causes the counter to overflow, stay negative
+    Label L;
+    slt(AT, tmp, R0);
+    bne(AT, R0, L);
+    delayed()->nop();
+    daddiu(tmp, tmp, (int32_t) DataLayout::counter_increment);
+    bind(L);
+
+    daddu(AT, mdp_in, reg);
+    sd(tmp, AT, constant);
+  } else {
+    daddu(AT, mdp_in, reg);
+    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
+    ld(AT, AT, constant);
+
+    // Increment the register.
+    daddiu(tmp, AT, DataLayout::counter_increment);
+    // If the increment causes the counter to overflow, pull back by 1.
+    slt(AT, tmp, R0);
+    dsubu(tmp, tmp, AT);
+
+    daddu(AT, mdp_in, reg);
+    sd(tmp, AT, constant);
+  }
+  pop(S0);
+}
+
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
+                                                int flag_byte_constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  int header_offset = in_bytes(DataLayout::header_offset());
+  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
+  // Set the flag
+  lw(AT, Address(mdp_in, header_offset));
+  if(Assembler::is_simm16(header_bits)) {
+    ori(AT, AT, header_bits);
+  } else {
+    push(T8);
+    // T8 is used as a temporary register.
+    move(T8, header_bits);
+    orr(AT, AT, T8);
+    pop(T8);
+  }
+  sw(AT, Address(mdp_in, header_offset));
+}
+
+
+
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
+                                                 int offset,
+                                                 Register value,
+                                                 Register test_value_out,
+                                                 Label& not_equal_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  if (test_value_out == noreg) {
+    ld(AT, Address(mdp_in, offset));
+    bne(AT, value, not_equal_continue);
+    delayed()->nop();
+  } else {
+    // Put the test value into a register, so caller can use it:
+    ld(test_value_out, Address(mdp_in, offset));
+    bne(value, test_value_out, not_equal_continue);
+    delayed()->nop();
+  }
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
+  ld(AT, mdp_in, offset_of_disp);
+  daddu(mdp_in, mdp_in, AT);
+  sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     Register reg,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  daddu(AT, reg, mdp_in);
+  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
+  ld(AT, AT, offset_of_disp);
+  daddu(mdp_in, mdp_in, AT);
+  sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
+                                                       int constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  if(Assembler::is_simm16(constant)) {
+    daddiu(mdp_in, mdp_in, constant);
+  } else {
+    move(AT, constant);
+    daddu(mdp_in, mdp_in, AT);
+  }
+  sd(mdp_in, Address(FP, frame::interpreter_frame_mdx_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  push(return_bci); // save/restore across call_VM
+  call_VM(noreg,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
+          return_bci);
+  pop(return_bci);
+}
+
+
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
+                                                     Register bumped_count) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    // Otherwise, assign to mdp
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the taken count.
+    // We inline increment_mdp_data_at to return bumped_count in a register
+    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
+    ld(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
+    assert(DataLayout::counter_increment == 1,
+           "flow-free idiom only works with 1");
+    push(T8);
+    // T8 is used as a temporary register.
+    daddiu(T8, bumped_count, DataLayout::counter_increment);
+    slt(AT, T8, R0);
+    dsubu(bumped_count, T8, AT);
+    pop(T8);
+    sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the not taken count.
+    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
+
+    // The method data pointer needs to be updated to correspond to
+    // the next bytecode
+    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
+                                                     Register mdp,
+                                                     Register reg2,
+                                                     bool receiver_can_be_null) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    Label skip_receiver_profile;
+    if (receiver_can_be_null) {
+      Label not_null;
+      bne(receiver, R0, not_null);
+      delayed()->nop();
+      // We are making a call.  Increment the count.
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+      beq(R0, R0, skip_receiver_profile);
+      delayed()->nop();
+      bind(not_null);
+    }
+
+    // Record the receiver type.
+    record_klass_in_profile(receiver, mdp, reg2, true);
+    bind(skip_receiver_profile);
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+// This routine creates a state machine for updating the multi-row
+// type profile at a virtual call site (or other type-sensitive bytecode).
+// The machine visits each row (of receiver/count) until the receiver type
+// is found, or until it runs out of rows.  At the same time, it remembers
+// the location of the first empty row.  (An empty row records null for its
+// receiver, and can be allocated for a newly-observed receiver type.)
+// Because there are two degrees of freedom in the state, a simple linear
+// search will not work; it must be a decision tree.  Hence this helper
+// function is recursive, to generate the required tree structured code.
+// It's the interpreter, so we are trading off code space for speed.
+// See below for example code.
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
+                                        Register receiver, Register mdp,
+                                        Register reg2, int start_row,
+                                        Label& done, bool is_virtual_call) {
+  if (TypeProfileWidth == 0) {
+    if (is_virtual_call) {
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+    }
+    return;
+  }
+
+  int last_row = VirtualCallData::row_limit() - 1;
+  assert(start_row <= last_row, "must be work left to do");
+  // Test this row for both the receiver and for null.
+  // Take any of three different outcomes:
+  //   1. found receiver => increment count and goto done
+  //   2. found null => keep looking for case 1, maybe allocate this cell
+  //   3. found something else => keep looking for cases 1 and 2
+  // Case 3 is handled by a recursive call.
+  for (int row = start_row; row <= last_row; row++) {
+    Label next_test;
+    bool test_for_null_also = (row == start_row);
+
+    // See if the receiver is receiver[n].
+    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
+    test_mdp_data_at(mdp, recvr_offset, receiver,
+                     (test_for_null_also ? reg2 : noreg),
+                     next_test);
+    // (Reg2 now contains the receiver from the CallData.)
+
+    // The receiver is receiver[n].  Increment count[n].
+    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    increment_mdp_data_at(mdp, count_offset);
+    beq(R0, R0, done);
+    delayed()->nop();
+    bind(next_test);
+
+    if (test_for_null_also) {
+      Label found_null;
+      // Failed the equality check on receiver[n]...  Test for null.
+      if (start_row == last_row) {
+        // The only thing left to do is handle the null case.
+        if (is_virtual_call) {
+          beq(reg2, R0, found_null);
+          delayed()->nop();
+          // Receiver did not match any saved receiver and there is no empty row for it.
+          // Increment total counter to indicate polymorphic case.
+          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+          beq(R0, R0, done);
+          delayed()->nop();
+          bind(found_null);
+        } else {
+          bne(reg2, R0, done);
+          delayed()->nop();
+        }
+        break;
+      }
+      // Since null is rare, make it be the branch-taken case.
+      beq(reg2, R0, found_null);
+      delayed()->nop();
+
+      // Put all the "Case 3" tests here.
+      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
+
+      // Found a null.  Keep searching for a matching receiver,
+      // but remember that this is an empty (unused) slot.
+      bind(found_null);
+    }
+  }
+
+  // In the fall-through case, we found no matching receiver, but we
+  // observed the receiver[start_row] is NULL.
+
+  // Fill in the receiver field and increment the count.
+  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
+  set_mdp_data_at(mdp, recvr_offset, receiver);
+  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  move(reg2, DataLayout::counter_increment);
+  set_mdp_data_at(mdp, count_offset, reg2);
+  if (start_row > 0) {
+    beq(R0, R0, done);
+    delayed()->nop();
+  }
+}
+
+// Example state machine code for three profile rows:
+//   // main copy of decision tree, rooted at row[1]
+//   if (row[0].rec == rec) { row[0].incr(); goto done; }
+//   if (row[0].rec != NULL) {
+//     // inner copy of decision tree, rooted at row[1]
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[1].rec != NULL) {
+//       // degenerate decision tree, rooted at row[2]
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       if (row[2].rec != NULL) { goto done; } // overflow
+//       row[2].init(rec); goto done;
+//     } else {
+//       // remember row[1] is empty
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       row[1].init(rec); goto done;
+//     }
+//   } else {
+//     // remember row[0] is empty
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[2].rec == rec) { row[2].incr(); goto done; }
+//     row[0].init(rec); goto done;
+//   }
+//   done:
+
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
+                                                        Register mdp, Register reg2,
+                                                        bool is_virtual_call) {
+  assert(ProfileInterpreter, "must be profiling");
+  Label done;
+
+  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
+
+  bind (done);
+}
+
+void InterpreterMacroAssembler::profile_ret(Register return_bci,
+                                            Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+    uint row;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the total ret count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    for (row = 0; row < RetData::row_limit(); row++) {
+      Label next_test;
+
+      // See if return_bci is equal to bci[n]:
+      test_mdp_data_at(mdp,
+                       in_bytes(RetData::bci_offset(row)),
+                       return_bci, noreg,
+                       next_test);
+
+      // return_bci is equal to bci[n].  Increment the count.
+      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
+
+      // The method data pointer needs to be updated to reflect the new target.
+      update_mdp_by_offset(mdp,
+                           in_bytes(RetData::bci_displacement_offset(row)));
+      beq(R0, R0, profile_continue);
+      delayed()->nop();
+      bind(next_test);
+    }
+
+    update_mdp_for_ret(return_bci);
+
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
+  if (ProfileInterpreter && TypeProfileCasts) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    int count_offset = in_bytes(CounterData::count_offset());
+    // Back up the address, since we have already bumped the mdp.
+    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+
+    // *Decrement* the counter.  We expect to see zero or small negatives.
+    increment_mdp_data_at(mdp, count_offset, true);
+
+    bind (profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+
+      // Record the object type.
+      record_klass_in_profile(klass, mdp, reg2, false);
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the default case count
+    increment_mdp_data_at(mdp,
+                          in_bytes(MultiBranchData::default_count_offset()));
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp,
+                         in_bytes(MultiBranchData::
+                                  default_displacement_offset()));
+
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_switch_case(Register index,
+                                                    Register mdp,
+                                                    Register reg2) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Build the base (index * per_case_size_in_bytes()) +
+    // case_array_offset_in_bytes()
+    move(reg2, in_bytes(MultiBranchData::per_case_size()));
+    if (UseLEXT1) {
+      gsdmult(index, index, reg2);
+    } else {
+      dmult(index, reg2);
+      mflo(index);
+    }
+    daddiu(index, index, in_bytes(MultiBranchData::case_array_offset()));
+
+    // Update the case count
+    increment_mdp_data_at(mdp,
+                          index,
+                          in_bytes(MultiBranchData::relative_count_offset()));
+
+    // The method data pointer needs to be updated.
+    update_mdp_by_offset(mdp,
+                         index,
+                         in_bytes(MultiBranchData::
+                                  relative_displacement_offset()));
+
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::narrow(Register result) {
+
+  // Get method->_constMethod->_result_type
+  ld(T9, FP, frame::interpreter_frame_method_offset * wordSize);
+  ld(T9, T9, in_bytes(Method::const_offset()));
+  lbu(T9, T9, in_bytes(ConstMethod::result_type_offset()));
+
+  Label done, notBool, notByte, notChar;
+
+  // common case first
+  addiu(AT, T9, -T_INT);
+  beq(AT, R0, done);
+  delayed()->nop();
+
+  // mask integer result to narrower return type.
+  addiu(AT, T9, -T_BOOLEAN);
+  bne(AT, R0, notBool);
+  delayed()->nop();
+  andi(result, result, 0x1);
+  beq(R0, R0, done);
+  delayed()->nop();
+
+  bind(notBool);
+  addiu(AT, T9, -T_BYTE);
+  bne(AT, R0, notByte);
+  delayed()->nop();
+  seb(result, result);
+  beq(R0, R0, done);
+  delayed()->nop();
+
+  bind(notByte);
+  addiu(AT, T9, -T_CHAR);
+  bne(AT, R0, notChar);
+  delayed()->nop();
+  andi(result, result, 0xFFFF);
+  beq(R0, R0, done);
+  delayed()->nop();
+
+  bind(notChar);
+  seh(result, result);
+
+  // Nothing to do for T_INT
+  bind(done);
+}
+
+
+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
+  Label update, next, none;
+
+  verify_oop(obj);
+
+  if (mdo_addr.index() != noreg) {
+    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
+    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
+    push(T0);
+    dsll(T0, mdo_addr.index(), mdo_addr.scale());
+    daddu(T0, T0, mdo_addr.base());
+  }
+
+  bne(obj, R0, update);
+  delayed()->nop();
+
+  if (mdo_addr.index() == noreg) {
+    ld(AT, mdo_addr);
+  } else {
+    ld(AT, T0, mdo_addr.disp());
+  }
+  ori(AT, AT, TypeEntries::null_seen);
+  if (mdo_addr.index() == noreg) {
+    sd(AT, mdo_addr);
+  } else {
+    sd(AT, T0, mdo_addr.disp());
+  }
+
+  beq(R0, R0, next);
+  delayed()->nop();
+
+  bind(update);
+  load_klass(obj, obj);
+
+  if (mdo_addr.index() == noreg) {
+    ld(AT, mdo_addr);
+  } else {
+    ld(AT, T0, mdo_addr.disp());
+  }
+  xorr(obj, obj, AT);
+
+  assert(TypeEntries::type_klass_mask == -4, "must be");
+  dextm(AT, obj, 2, 62);
+  beq(AT, R0, next);
+  delayed()->nop();
+
+  andi(AT, obj, TypeEntries::type_unknown);
+  bne(AT, R0, next);
+  delayed()->nop();
+
+  if (mdo_addr.index() == noreg) {
+    ld(AT, mdo_addr);
+  } else {
+    ld(AT, T0, mdo_addr.disp());
+  }
+  beq(AT, R0, none);
+  delayed()->nop();
+
+  daddiu(AT, AT, -(TypeEntries::null_seen));
+  beq(AT, R0, none);
+  delayed()->nop();
+
+  // There is a chance that the checks above (re-reading profiling
+  // data from memory) fail if another thread has just set the
+  // profiling to this obj's klass
+  if (mdo_addr.index() == noreg) {
+    ld(AT, mdo_addr);
+  } else {
+    ld(AT, T0, mdo_addr.disp());
+  }
+  xorr(obj, obj, AT);
+  assert(TypeEntries::type_klass_mask == -4, "must be");
+  dextm(AT, obj, 2, 62);
+  beq(AT, R0, next);
+  delayed()->nop();
+
+  // different than before. Cannot keep accurate profile.
+  if (mdo_addr.index() == noreg) {
+    ld(AT, mdo_addr);
+  } else {
+    ld(AT, T0, mdo_addr.disp());
+  }
+  ori(AT, AT, TypeEntries::type_unknown);
+  if (mdo_addr.index() == noreg) {
+    sd(AT, mdo_addr);
+  } else {
+    sd(AT, T0, mdo_addr.disp());
+  }
+  beq(R0, R0, next);
+  delayed()->nop();
+
+  bind(none);
+  // first time here. Set profile type.
+  if (mdo_addr.index() == noreg) {
+    sd(obj, mdo_addr);
+  } else {
+    sd(obj, T0, mdo_addr.disp());
+  }
+
+  bind(next);
+  if (mdo_addr.index() != noreg) {
+    pop(T0);
+  }
+}
+
+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
+  if (!ProfileInterpreter) {
+    return;
+  }
+
+  if (MethodData::profile_arguments() || MethodData::profile_return()) {
+    Label profile_continue;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
+
+    lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
+    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
+    bne(tmp, AT, profile_continue);
+    delayed()->nop();
+
+
+    if (MethodData::profile_arguments()) {
+      Label done;
+      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
+      if (Assembler::is_simm16(off_to_args)) {
+        daddiu(mdp, mdp, off_to_args);
+      } else {
+        move(AT, off_to_args);
+        daddu(mdp, mdp, AT);
+      }
+
+
+      for (int i = 0; i < TypeProfileArgsLimit; i++) {
+        if (i > 0 || MethodData::profile_return()) {
+          // If return value type is profiled we may have no argument to profile
+          ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
+
+          if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) {
+            addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
+          } else {
+            li(AT, i*TypeStackSlotEntries::per_arg_count());
+            subu32(tmp, tmp, AT);
+          }
+
+          li(AT, TypeStackSlotEntries::per_arg_count());
+          slt(AT, tmp, AT);
+          bne(AT, R0, done);
+          delayed()->nop();
+        }
+        ld(tmp, callee, in_bytes(Method::const_offset()));
+
+        lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
+
+        // stack offset o (zero based) from the start of the argument
+        // list, for n arguments translates into offset n - o - 1 from
+        // the end of the argument list
+        ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
+        subu(tmp, tmp, AT);
+
+        addiu32(tmp, tmp, -1);
+
+        Address arg_addr = argument_address(tmp);
+        ld(tmp, arg_addr);
+
+        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
+        profile_obj_type(tmp, mdo_arg_addr);
+
+        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
+        if (Assembler::is_simm16(to_add)) {
+          daddiu(mdp, mdp, to_add);
+        } else {
+          move(AT, to_add);
+          daddu(mdp, mdp, AT);
+        }
+
+        off_to_args += to_add;
+      }
+
+      if (MethodData::profile_return()) {
+        ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
+
+        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
+        if (Assembler::is_simm16(-1 * tmp_arg_counts)) {
+          addiu32(tmp, tmp, -1 * tmp_arg_counts);
+        } else {
+          move(AT, tmp_arg_counts);
+          subu32(mdp, mdp, AT);
+        }
+      }
+
+      bind(done);
+
+      if (MethodData::profile_return()) {
+        // We're right after the type profile for the last
+        // argument. tmp is the number of cells left in the
+        // CallTypeData/VirtualCallTypeData to reach its end. Non null
+        // if there's a return to profile.
+        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
+        sll(tmp, tmp, exact_log2(DataLayout::cell_size));
+        daddu(mdp, mdp, tmp);
+      }
+      sd(mdp, FP, frame::interpreter_frame_mdx_offset * wordSize);
+    } else {
+      assert(MethodData::profile_return(), "either profile call args or call ret");
+      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
+    }
+
+    // mdp points right after the end of the
+    // CallTypeData/VirtualCallTypeData, right after the cells for the
+    // return value type if there's one
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
+  assert_different_registers(mdp, ret, tmp, _bcp_register);
+  if (ProfileInterpreter && MethodData::profile_return()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    if (MethodData::profile_return_jsr292_only()) {
+      // If we don't profile all invoke bytecodes we must make sure
+      // it's a bytecode we indeed profile. We can't go back to the
+      // begining of the ProfileData we intend to update to check its
+      // type because we're right after it and we don't known its
+      // length
+      Label do_profile;
+      lb(tmp, _bcp_register, 0);
+      daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic);
+      beq(AT, R0, do_profile);
+      delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle);
+      beq(AT, R0, do_profile);
+      delayed()->nop();
+
+      get_method(tmp);
+      lb(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
+      li(AT, vmIntrinsics::_compiledLambdaForm);
+      bne(tmp, AT, profile_continue);
+      delayed()->nop();
+
+      bind(do_profile);
+    }
+
+    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
+    daddu(tmp, ret, R0);
+    profile_obj_type(tmp, mdo_ret_addr);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
+  guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !");
+
+  if (ProfileInterpreter && MethodData::profile_parameters()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Load the offset of the area within the MDO used for
+    // parameters. If it's negative we're not profiling any parameters
+    lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
+    bltz(tmp1, profile_continue);
+    delayed()->nop();
+
+    // Compute a pointer to the area for parameters from the offset
+    // and move the pointer to the slot for the last
+    // parameters. Collect profiling from last parameter down.
+    // mdo start + parameters offset + array length - 1
+    daddu(mdp, mdp, tmp1);
+    ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
+    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
+
+
+    Label loop;
+    bind(loop);
+
+    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
+    int type_base = in_bytes(ParametersTypeData::type_offset(0));
+    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
+    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
+
+    // load offset on the stack from the slot for this parameter
+    dsll(AT, tmp1, per_arg_scale);
+    daddu(AT, AT, mdp);
+    ld(tmp2, AT, off_base);
+
+    subu(tmp2, R0, tmp2);
+
+    // read the parameter from the local area
+    dsll(AT, tmp2, Interpreter::stackElementScale());
+    daddu(AT, AT, _locals_register);
+    ld(tmp2, AT, 0);
+
+    // profile the parameter
+    profile_obj_type(tmp2, arg_type);
+
+    // go to next parameter
+    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
+    bgtz(tmp1, loop);
+    delayed()->nop();
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
+  if (state == atos) {
+    MacroAssembler::verify_oop(reg);
+  }
+}
+
+void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
+}
+#endif // !CC_INTERP
+
+
+void InterpreterMacroAssembler::notify_method_entry() {
+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+  // track stack depth.  If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  Register tempreg = T0;
+#ifndef OPT_THREAD
+    get_thread(T8);
+#else
+    move(T8, TREG);
+#endif
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label L;
+    lw(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset()));
+    beq(tempreg, R0, L);
+    delayed()->nop();
+    call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                    InterpreterRuntime::post_method_entry));
+    bind(L);
+  }
+
+  {
+    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
+    get_method(S3);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+                                  //Rthread,
+                                  T8,
+                                  //Rmethod);
+                                  S3);
+  }
+
+}
+
+void InterpreterMacroAssembler::notify_method_exit(
+    TosState state, NotifyMethodExitMode mode) {
+  Register tempreg = T0;
+#ifndef OPT_THREAD
+    get_thread(T8);
+#else
+    move(T8, TREG);
+#endif
+  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
+  // track stack depth.  If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
+    Label skip;
+    // Note: frame::interpreter_frame_result has a dependency on how the
+    // method result is saved across the call to post_method_exit. If this
+    // is changed then the interpreter_frame_result implementation will
+    // need to be updated too.
+
+    // For c++ interpreter the result is always stored at a known location in the frame
+    // template interpreter will leave it on the top of the stack.
+    NOT_CC_INTERP(push(state);)
+    lw(tempreg, T8, in_bytes(JavaThread::interp_only_mode_offset()));
+    beq(tempreg, R0, skip);
+    delayed()->nop();
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
+    bind(skip);
+    NOT_CC_INTERP(pop(state));
+  }
+
+  {
+    // Dtrace notification
+    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
+    NOT_CC_INTERP(push(state);)
+    get_method(S3);
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+                 //Rthread, Rmethod);
+                 T8, S3);
+    NOT_CC_INTERP(pop(state));
+  }
+}
+
+// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
+void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
+                                                        int increment, int mask,
+                                                        Register scratch, bool preloaded,
+                                                        Condition cond, Label* where) {
+  assert_different_registers(scratch, AT);
+
+  if (!preloaded) {
+    lw(scratch, counter_addr);
+  }
+  addiu32(scratch, scratch, increment);
+  sw(scratch, counter_addr);
+
+  move(AT, mask);
+  andr(scratch, scratch, AT);
+
+  if (cond == Assembler::zero) {
+    beq(scratch, R0, *where);
+    delayed()->nop();
+  } else {
+    unimplemented();
+  }
+}
diff --git a/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp
new file mode 100644
index 00000000000..a2ebdec3adb
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/interp_masm_mips_64.hpp
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
+#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
+
+#include "asm/assembler.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/invocationCounter.hpp"
+#include "runtime/frame.hpp"
+
+// This file specializes the assember with interpreter-specific macros
+
+
+class InterpreterMacroAssembler: public MacroAssembler {
+#ifndef CC_INTERP
+ private:
+
+  Register _locals_register; // register that contains the pointer to the locals
+  Register _bcp_register; // register that contains the bcp
+
+ protected:
+  // Interpreter specific version of call_VM_base
+  virtual void call_VM_leaf_base(address entry_point,
+                                 int number_of_arguments);
+
+  virtual void call_VM_base(Register oop_result,
+                            Register java_thread,
+                            Register last_java_sp,
+                            address  entry_point,
+                            int number_of_arguments,
+                            bool check_exceptions);
+
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  // base routine for all dispatches
+  void dispatch_base(TosState state, address* table, bool verifyoop = true);
+#endif // CC_INTERP
+
+ public:
+  // narrow int return value
+  void narrow(Register result);
+
+  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
+
+  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
+  void  get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset);
+
+  void load_earlyret_value(TosState state);
+
+#ifdef CC_INTERP
+  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
+  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
+
+  // Helpers for runtime call arguments/results
+  void get_method(Register reg);
+
+#else
+
+  // Interpreter-specific registers
+  void save_bcp() {
+    sd(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize);
+  }
+
+  void restore_bcp() {
+    ld(BCP, FP, frame::interpreter_frame_bcx_offset * wordSize);
+  }
+
+  void restore_locals() {
+    ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
+  }
+
+  // Helpers for runtime call arguments/results
+  void get_method(Register reg) {
+    ld(reg, FP, frame::interpreter_frame_method_offset * wordSize);
+  }
+
+  void get_const(Register reg){
+    get_method(reg);
+    ld(reg, reg, in_bytes(Method::const_offset()));
+  }
+
+  void get_constant_pool(Register reg) {
+    get_const(reg);
+    ld(reg, reg, in_bytes(ConstMethod::constants_offset()));
+  }
+
+  void get_constant_pool_cache(Register reg) {
+    get_constant_pool(reg);
+    ld(reg, reg, ConstantPool::cache_offset_in_bytes());
+  }
+
+  void get_cpool_and_tags(Register cpool, Register tags) {
+    get_constant_pool(cpool);
+    ld(tags, cpool, ConstantPool::tags_offset_in_bytes());
+  }
+
+  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
+  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
+  void get_method_counters(Register method, Register mcs, Label& skip);
+
+  // load cpool->resolved_references(index);
+  void load_resolved_reference_at_index(Register result, Register index);
+
+  void pop_ptr(   Register r = FSR);
+  void pop_i(     Register r = FSR);
+  void pop_l(     Register r = FSR);
+  void pop_f(FloatRegister r = FSF);
+  void pop_d(FloatRegister r = FSF);
+
+  void push_ptr(   Register r = FSR);
+  void push_i(     Register r = FSR);
+  void push_l(     Register r = FSR);
+  void push_f(FloatRegister r = FSF);
+  void push_d(FloatRegister r = FSF);
+
+  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
+
+  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
+
+  void pop(TosState state); // transition vtos -> state
+  void push(TosState state); // transition state -> vtos
+
+  void empty_expression_stack() {
+    ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    // NULL last_sp until next java call
+    sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  }
+
+  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
+  void load_ptr(int n, Register val);
+  void store_ptr(int n, Register val);
+
+  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
+  // a subtype of super_klass.
+  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
+  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
+
+  // Dispatching
+  void dispatch_prolog(TosState state, int step = 0);
+  void dispatch_epilog(TosState state, int step = 0);
+  void dispatch_only(TosState state);
+  void dispatch_only_normal(TosState state);
+  void dispatch_only_noverify(TosState state);
+  void dispatch_next(TosState state, int step = 0);
+  void dispatch_via (TosState state, address* table);
+
+  // jump to an invoked target
+  void prepare_to_jump_from_interpreted();
+  void jump_from_interpreted(Register method, Register temp);
+
+
+  // Returning from interpreted functions
+  //
+  // Removes the current activation (incl. unlocking of monitors)
+  // and sets up the return address.  This code is also used for
+  // exception unwindwing. In that case, we do not want to throw
+  // IllegalMonitorStateExceptions, since that might get us into an
+  // infinite rethrow exception loop.
+  // Additionally this code is used for popFrame and earlyReturn.
+  // In popFrame case we want to skip throwing an exception,
+  // installing an exception, and notifying jvmdi.
+  // In earlyReturn case we only want to skip throwing an exception
+  // and installing an exception.
+  void remove_activation(TosState state, Register ret_addr,
+                         bool throw_monitor_exception = true,
+                         bool install_monitor_exception = true,
+                         bool notify_jvmdi = true);
+#endif // CC_INTERP
+
+  // Object locking
+  void lock_object  (Register lock_reg);
+  void unlock_object(Register lock_reg);
+
+#ifndef CC_INTERP
+
+  // Interpreter profiling operations
+  void set_method_data_pointer_for_bcp();
+  void test_method_data_pointer(Register mdp, Label& zero_continue);
+  void verify_method_data_pointer();
+
+  void set_mdp_data_at(Register mdp_in, int constant, Register value);
+  void increment_mdp_data_at(Address data, bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, int constant,
+                             bool decrement = false);
+  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
+                             bool decrement = false);
+  void increment_mask_and_jump(Address counter_addr,
+                               int increment, int mask,
+                               Register scratch, bool preloaded,
+                               Condition cond, Label* where);
+  void set_mdp_flag_at(Register mdp_in, int flag_constant);
+  void test_mdp_data_at(Register mdp_in, int offset, Register value,
+                        Register test_value_out,
+                        Label& not_equal_continue);
+
+  void record_klass_in_profile(Register receiver, Register mdp,
+                               Register reg2, bool is_virtual_call);
+  void record_klass_in_profile_helper(Register receiver, Register mdp,
+                                      Register reg2, int start_row,
+                                      Label& done, bool is_virtual_call);
+
+  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
+  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
+  void update_mdp_by_constant(Register mdp_in, int constant);
+  void update_mdp_for_ret(Register return_bci);
+
+  void profile_taken_branch(Register mdp, Register bumped_count);
+  void profile_not_taken_branch(Register mdp);
+  void profile_call(Register mdp);
+  void profile_final_call(Register mdp);
+  void profile_virtual_call(Register receiver, Register mdp,
+                            Register scratch2,
+                            bool receiver_can_be_null = false);
+  void profile_ret(Register return_bci, Register mdp);
+  void profile_null_seen(Register mdp);
+  void profile_typecheck(Register mdp, Register klass, Register scratch);
+  void profile_typecheck_failed(Register mdp);
+  void profile_switch_default(Register mdp);
+  void profile_switch_case(Register index_in_scratch, Register mdp,
+                           Register scratch2);
+
+  // Debugging
+  // only if +VerifyOops && state == atos
+  void verify_oop(Register reg, TosState state = atos);
+  // only if +VerifyFPU  && (state == ftos || state == dtos)
+  void verify_FPU(int stack_depth, TosState state = ftos);
+
+  void profile_obj_type(Register obj, const Address& mdo_addr);
+  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
+  void profile_return_type(Register mdp, Register ret, Register tmp);
+  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
+#endif // !CC_INTERP
+
+  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
+
+  // support for jvmti/dtrace
+  void notify_method_entry();
+  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
+};
+
+#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
diff --git a/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp
new file mode 100644
index 00000000000..26fced492a8
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/interpreterGenerator_mips.hpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP
+#define CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP
+
+
+// Generation of Interpreter
+//
+  friend class AbstractInterpreterGenerator;
+
+ private:
+
+  address generate_normal_entry(bool synchronized);
+  address generate_native_entry(bool synchronized);
+  address generate_abstract_entry(void);
+  address generate_math_entry(AbstractInterpreter::MethodKind kind);
+  address generate_empty_entry(void);
+  address generate_accessor_entry(void);
+  address generate_Reference_get_entry();
+  void lock_method(void);
+  void generate_stack_overflow_check(void);
+
+  void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
+  void generate_counter_overflow(Label* do_continue);
+
+#endif // CPU_MIPS_VM_INTERPRETERGENERATOR_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp b/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp
new file mode 100644
index 00000000000..8dec2007c6b
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/interpreterRT_mips.hpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
+#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
+
+#include "memory/allocation.hpp"
+
+// native method calls
+
+class SignatureHandlerGenerator: public NativeSignatureIterator {
+ private:
+  MacroAssembler* _masm;
+
+  void move(int from_offset, int to_offset);
+
+  void box(int from_offset, int to_offset);
+  void pass_int();
+  void pass_long();
+  void pass_object();
+  void pass_float();
+  void pass_double();
+
+ public:
+  // Creation
+  SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
+    _masm = new MacroAssembler(buffer);
+  }
+
+  // Code generation
+  void generate(uint64_t fingerprint);
+
+  // Code generation support
+  static Register from();
+  static Register to();
+  static Register temp();
+};
+
+#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp b/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp
new file mode 100644
index 00000000000..14b7e39af76
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/interpreterRT_mips_64.cpp
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/signature.hpp"
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+// Implementation of SignatureHandlerGenerator
+
+void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
+  __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
+  __ sd(temp(), to(), to_offset * longSize);
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
+  __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
+  __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
+
+  Label L;
+  __ bne(AT, R0, L);
+  __ delayed()->nop();
+  __ move(temp(), R0);
+  __ bind(L);
+  __ sw(temp(), to(), to_offset * wordSize);
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
+  // generate code to handle arguments
+  iterate(fingerprint);
+  // return result handler
+  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
+  // return
+  __ jr(RA);
+  __ delayed()->nop();
+
+  __ flush();
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
+  Argument jni_arg(jni_offset());
+  if(jni_arg.is_Register()) {
+    __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
+  } else {
+    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
+    __ sw(temp(), jni_arg.as_caller_address());
+  }
+}
+
+// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
+  Argument jni_arg(jni_offset());
+  if(jni_arg.is_Register()) {
+    __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
+  } else {
+    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
+    __ sd(temp(), jni_arg.as_caller_address());
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
+  Argument jni_arg(jni_offset());
+
+  // the handle for a receiver will never be null
+  bool do_NULL_check = offset() != 0 || is_static();
+  if (do_NULL_check) {
+    __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset()));
+    __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset()));
+    __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT);
+  } else {
+    __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
+  }
+
+  if (!jni_arg.is_Register())
+    __ sd(temp(), jni_arg.as_caller_address());
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
+  Argument jni_arg(jni_offset());
+  if(jni_arg.is_Register()) {
+    __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset()));
+  } else {
+    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
+    __ sw(temp(), jni_arg.as_caller_address());
+  }
+}
+
+// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
+  Argument jni_arg(jni_offset());
+  if(jni_arg.is_Register()) {
+    __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
+  } else {
+    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
+    __ sd(temp(), jni_arg.as_caller_address());
+  }
+}
+
+
+Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
+Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
+Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
+
+// Implementation of SignatureHandlerLibrary
+
+void SignatureHandlerLibrary::pd_set_handler(address handler) {}
+
+
+class SlowSignatureHandler
+  : public NativeSignatureIterator {
+ private:
+  address   _from;
+  intptr_t* _to;
+  intptr_t* _reg_args;
+  intptr_t* _fp_identifiers;
+  unsigned int _num_args;
+
+  virtual void pass_int()
+  {
+    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+    _from -= Interpreter::stackElementSize;
+
+    if (_num_args < Argument::n_register_parameters) {
+      *_reg_args++ = from_obj;
+      _num_args++;
+    } else {
+      *_to++ = from_obj;
+    }
+  }
+
+  virtual void pass_long()
+  {
+    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+    _from -= 2 * Interpreter::stackElementSize;
+
+    if (_num_args < Argument::n_register_parameters) {
+      *_reg_args++ = from_obj;
+      _num_args++;
+    } else {
+      *_to++ = from_obj;
+    }
+  }
+
+  virtual void pass_object()
+  {
+    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
+    _from -= Interpreter::stackElementSize;
+    if (_num_args < Argument::n_register_parameters) {
+      *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
+      _num_args++;
+    } else {
+      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
+    }
+  }
+
+  virtual void pass_float()
+  {
+    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
+    _from -= Interpreter::stackElementSize;
+
+    if (_num_args < Argument::n_float_register_parameters) {
+      *_reg_args++ = from_obj;
+      *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float
+      _num_args++;
+    } else {
+      *_to++ = from_obj;
+    }
+  }
+
+  virtual void pass_double()
+  {
+    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
+    _from -= 2*Interpreter::stackElementSize;
+
+    if (_num_args < Argument::n_float_register_parameters) {
+      *_reg_args++ = from_obj;
+      *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double
+      _num_args++;
+    } else {
+      *_to++ = from_obj;
+    }
+  }
+
+ public:
+  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
+    : NativeSignatureIterator(method)
+  {
+    _from = from;
+    _to   = to;
+
+    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
+    _reg_args = to - Argument::n_register_parameters + jni_offset() - 1;
+    _fp_identifiers = to - 1;
+    *(int*) _fp_identifiers = 0;
+    _num_args = jni_offset();
+  }
+};
+
+
+IRT_ENTRY(address,
+          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
+                                                     Method* method,
+                                                     intptr_t* from,
+                                                     intptr_t* to))
+  methodHandle m(thread, (Method*)method);
+  assert(m->is_native(), "sanity check");
+
+  // handle arguments
+  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
+
+  // return result handler
+  return Interpreter::result_handler(m->result_type());
+IRT_END
diff --git a/hotspot/src/cpu/mips/vm/interpreter_mips.hpp b/hotspot/src/cpu/mips/vm/interpreter_mips.hpp
new file mode 100644
index 00000000000..9a21d704fa3
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/interpreter_mips.hpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_INTERPRETER_MIPS_HPP
+#define CPU_MIPS_VM_INTERPRETER_MIPS_HPP
+
+ public:
+
+  // Sentinel placed in the code for interpreter returns so
+  // that i2c adapters and osr code can recognize an interpreter
+  // return address and convert the return to a specialized
+  // block of code to handle compiedl return values and cleaning
+  // the fpu stack.
+  static const int return_sentinel;
+
+  static Address::ScaleFactor stackElementScale() {
+    return Address::times_8;
+  }
+
+  // Offset from sp (which points to the last stack element)
+  static int expr_offset_in_bytes(int i) { return stackElementSize * i; }
+  // Size of interpreter code.  Increase if too small.  Interpreter will
+  // fail with a guarantee ("not enough space for interpreter generation");
+  // if too small.
+  // Run with +PrintInterpreterSize to get the VM to print out the size.
+  // Max size with JVMTI and TaggedStackInterpreter
+  const static int InterpreterCodeSize = 168 * 1024;
+#endif // CPU_MIPS_VM_INTERPRETER_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp b/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp
new file mode 100644
index 00000000000..014c8127131
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/interpreter_mips_64.cpp
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterGenerator.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+
+address AbstractInterpreterGenerator::generate_slow_signature_handler() {
+  address entry = __ pc();
+
+  // Rmethod: method
+  // LVP: pointer to locals
+  // A3: first stack arg
+  __ move(A3, SP);
+  __ daddiu(SP, SP, -10 * wordSize);
+  __ sd(RA, SP, 0);
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::slow_signature_handler),
+             Rmethod, LVP, A3);
+
+  // V0: result handler
+
+  // Stack layout:
+  //        ...
+  //     10 stack arg0      <--- old sp
+  //      9 float/double identifiers
+  //      8 register arg7
+  //        ...
+  //      2 register arg1
+  //      1 aligned slot
+  // SP:  0 return address
+
+  // Do FP first so we can use T3 as temp
+  __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers
+
+  // A0 is for env.
+  // If the mothed is not static, A1 will be corrected in generate_native_entry.
+  for ( int i = 1; i < Argument::n_register_parameters; i++ ) {
+    Register reg = as_Register(i + A0->encoding());
+    FloatRegister floatreg = as_FloatRegister(i + F12->encoding());
+    Label isfloatordouble, isdouble, next;
+
+    __ andi(AT, T3, 1 << (i*2)); // Float or Double?
+    __ bne(AT, R0, isfloatordouble);
+    __ delayed()->nop();
+
+    // Do Int register here
+    __ ld(reg, SP, (1 + i) * wordSize);
+    __ b (next);
+    __ delayed()->nop();
+
+    __ bind(isfloatordouble);
+    __ andi(AT, T3, 1 << ((i*2)+1)); // Double?
+    __ bne(AT, R0, isdouble);
+    __ delayed()->nop();
+
+    // Do Float Here
+    __ lwc1(floatreg, SP, (1 + i) * wordSize);
+    __ b(next);
+    __ delayed()->nop();
+
+    // Do Double here
+    __ bind(isdouble);
+    __ ldc1(floatreg, SP, (1 + i) * wordSize);
+
+    __ bind(next);
+  }
+
+  __ ld(RA, SP, 0);
+  __ daddiu(SP, SP, 10 * wordSize);
+  __ jr(RA);
+  __ delayed()->nop();
+  return entry;
+}
+
+
+//
+// Various method entries
+//
+
+address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+
+  // Rmethod: methodOop
+  // V0: scratrch
+  // Rsender: send 's sp
+
+  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
+
+  address entry_point = __ pc();
+
+  // These don't need a safepoint check because they aren't virtually
+  // callable. We won't enter these intrinsics from compiled code.
+  // If in the future we added an intrinsic which was virtually callable
+  // we'd have to worry about how to safepoint so that this code is used.
+
+  // mathematical functions inlined by compiler
+  // (interpreter must provide identical implementation
+  // in order to avoid monotonicity bugs when switching
+  // from interpreter to compiler in the middle of some
+  // computation)
+  //
+  // stack: [ lo(arg) ] <-- sp
+  //        [ hi(arg) ]
+  {
+    // Note: For JDK 1.3 StrictMath exists and Math.sin/cos/sqrt are
+    //       java methods.  Interpreter::method_kind(...) will select
+    //       this entry point for the corresponding methods in JDK 1.3.
+    __ ldc1(F12, SP, 0 * wordSize);
+    __ ldc1(F13, SP, 1 * wordSize);
+    __ push2(RA, FP);
+    __ daddiu(FP, SP, 2 * wordSize);
+
+    // [ fp     ] <-- sp
+    // [ ra     ]
+    // [ lo     ] <-- fp
+    // [ hi     ]
+    //FIXME, need consider this
+    switch (kind) {
+      case Interpreter::java_lang_math_sin :
+        __ trigfunc('s');
+        break;
+      case Interpreter::java_lang_math_cos :
+        __ trigfunc('c');
+        break;
+      case Interpreter::java_lang_math_tan :
+        __ trigfunc('t');
+        break;
+      case Interpreter::java_lang_math_sqrt:
+        __ sqrt_d(F0, F12);
+        break;
+      case Interpreter::java_lang_math_abs:
+        __ abs_d(F0, F12);
+        break;
+      case Interpreter::java_lang_math_log:
+        // Store to stack to convert 80bit precision back to 64bits
+        break;
+      case Interpreter::java_lang_math_log10:
+        // Store to stack to convert 80bit precision back to 64bits
+        break;
+      case Interpreter::java_lang_math_pow:
+        break;
+      case Interpreter::java_lang_math_exp:
+        break;
+
+      default                              :
+        ShouldNotReachHere();
+    }
+
+    // must maintain return value in F0:F1
+    __ ld(RA, FP, (-1) * wordSize);
+    //FIXME
+    __ ld(FP, FP, (-2) * wordSize);
+    __ move(SP, Rsender);
+    __ jr(RA);
+    __ delayed()->nop();
+  }
+  return entry_point;
+}
+
+
+// Abstract method entry
+// Attempt to execute abstract method. Throw exception
+address InterpreterGenerator::generate_abstract_entry(void) {
+
+  // Rmethod: methodOop
+  // V0: receiver (unused)
+  // Rsender : sender 's sp
+  address entry_point = __ pc();
+
+  // abstract method entry
+  // throw exception
+  // adjust stack to what a normal return would do
+  __ empty_expression_stack();
+  __ restore_bcp();
+  __ restore_locals();
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  return entry_point;
+}
+
+
+// Empty method, generate a very fast return.
+
+address InterpreterGenerator::generate_empty_entry(void) {
+
+  // Rmethod: methodOop
+  // V0: receiver (unused)
+  // Rsender: sender 's sp , must set sp to this value on return , on mips ,now use T0,as it right?
+  if (!UseFastEmptyMethods) return NULL;
+
+  address entry_point = __ pc();
+
+  Label slow_path;
+  __ li(RT0, SafepointSynchronize::address_of_state());
+  __ lw(AT, RT0, 0);
+  __ move(RT0, (SafepointSynchronize::_not_synchronized));
+  __ bne(AT, RT0,slow_path);
+  __ delayed()->nop();
+  __ move(SP, Rsender);
+  __ jr(RA);
+  __ delayed()->nop();
+  __ bind(slow_path);
+  (void) generate_normal_entry(false);
+
+  return entry_point;
+
+}
+
+void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
+
+  // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
+  // the days we had adapter frames. When we deoptimize a situation where a
+  // compiled caller calls a compiled caller will have registers it expects
+  // to survive the call to the callee. If we deoptimize the callee the only
+  // way we can restore these registers is to have the oldest interpreter
+  // frame that we create restore these values. That is what this routine
+  // will accomplish.
+
+  // At the moment we have modified c2 to not have any callee save registers
+  // so this problem does not exist and this routine is just a place holder.
+
+  assert(f->is_interpreted_frame(), "must be interpreted");
+}
diff --git a/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp b/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp
new file mode 100644
index 00000000000..dccdf6a019c
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/javaFrameAnchor_mips.hpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
+#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
+
+private:
+
+  // FP value associated with _last_Java_sp:
+  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
+
+public:
+  // Each arch must define reset, save, restore
+  // These are used by objects that only care about:
+  //  1 - initializing a new state (thread creation, javaCalls)
+  //  2 - saving a current state (javaCalls)
+  //  3 - restoring an old state (javaCalls)
+
+  void clear(void) {
+    // clearing _last_Java_sp must be first
+    _last_Java_sp = NULL;
+    // fence?
+    _last_Java_fp = NULL;
+    _last_Java_pc = NULL;
+  }
+
+  void copy(JavaFrameAnchor* src) {
+    // In order to make sure the transition state is valid for "this"
+    // We must clear _last_Java_sp before copying the rest of the new data
+    //
+    // Hack Alert: Temporary bugfix for 4717480/4721647
+    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
+    // unless the value is changing
+    //
+    if (_last_Java_sp != src->_last_Java_sp)
+      _last_Java_sp = NULL;
+
+    _last_Java_fp = src->_last_Java_fp;
+    _last_Java_pc = src->_last_Java_pc;
+    // Must be last so profiler will always see valid frame if has_last_frame() is true
+    _last_Java_sp = src->_last_Java_sp;
+  }
+
+  // Always walkable
+  bool walkable(void) { return true; }
+  // Never any thing to do since we are always walkable and can find address of return addresses
+  void make_walkable(JavaThread* thread) { }
+
+  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
+
+  address last_Java_pc(void)                     { return _last_Java_pc; }
+
+private:
+
+  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
+
+public:
+
+  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
+
+  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
+  // Assert (last_Java_sp == NULL || fp == NULL)
+  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
+
+#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp b/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp
new file mode 100644
index 00000000000..0f7dd9424aa
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/jniFastGetField_mips_64.cpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/codeBlob.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+#include "runtime/safepoint.hpp"
+
+#define __ masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+#define BUFFER_SIZE 30*wordSize
+
+// Instead of issuing lfence for LoadLoad barrier, we create data dependency
+// between loads, which is more efficient than lfence.
+
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+  const char *name = NULL;
+  switch (type) {
+    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
+    case T_BYTE:    name = "jni_fast_GetByteField";    break;
+    case T_CHAR:    name = "jni_fast_GetCharField";    break;
+    case T_SHORT:   name = "jni_fast_GetShortField";   break;
+    case T_INT:     name = "jni_fast_GetIntField";     break;
+    case T_LONG:    name = "jni_fast_GetLongField";    break;
+    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
+    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
+    default:        ShouldNotReachHere();
+  }
+  ResourceMark rm;
+  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
+  CodeBuffer cbuf(blob);
+  MacroAssembler* masm = new MacroAssembler(&cbuf);
+  address fast_entry = __ pc();
+
+  Label slow;
+
+  //  return pc        RA
+  //  jni env          A0
+  //  obj              A1
+  //  jfieldID         A2
+
+  address counter_addr = SafepointSynchronize::safepoint_counter_addr();
+  __ set64(AT, (long)counter_addr);
+  __ lw(T1, AT, 0);
+
+  // Parameters(A0~A3) should not be modified, since they will be used in slow path
+  __ andi(AT, T1, 1);
+  __ bne(AT, R0, slow);
+  __ delayed()->nop();
+
+  __ move(T0, A1);
+  __ clear_jweak_tag(T0);
+
+  __ ld(T0, T0, 0);              // unbox, *obj
+  __ dsrl(T2, A2, 2);                 // offset
+  __ daddu(T0, T0, T2);
+
+  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
+  speculative_load_pclist[count] = __ pc();
+  switch (type) {
+    case T_BOOLEAN: __ lbu (V0, T0, 0); break;
+    case T_BYTE:    __ lb  (V0, T0, 0); break;
+    case T_CHAR:    __ lhu (V0, T0, 0); break;
+    case T_SHORT:   __ lh  (V0, T0, 0); break;
+    case T_INT:     __ lw  (V0, T0, 0); break;
+    case T_LONG:    __ ld  (V0, T0, 0); break;
+    case T_FLOAT:   __ lwc1(F0, T0, 0); break;
+    case T_DOUBLE:  __ ldc1(F0, T0, 0); break;
+    default:        ShouldNotReachHere();
+  }
+
+  __ set64(AT, (long)counter_addr);
+  __ lw(AT, AT, 0);
+  __ bne(T1, AT, slow);
+  __ delayed()->nop();
+
+  __ jr(RA);
+  __ delayed()->nop();
+
+  slowcase_entry_pclist[count++] = __ pc();
+  __ bind (slow);
+  address slow_case_addr = NULL;
+  switch (type) {
+    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
+    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
+    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
+    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
+    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
+    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
+    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
+    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
+    default:        ShouldNotReachHere();
+  }
+  __ jmp(slow_case_addr);
+  __ delayed()->nop();
+
+  __ flush ();
+
+  return fast_entry;
+}
+
+address JNI_FastGetField::generate_fast_get_boolean_field() {
+  return generate_fast_get_int_field0(T_BOOLEAN);
+}
+
+address JNI_FastGetField::generate_fast_get_byte_field() {
+  return generate_fast_get_int_field0(T_BYTE);
+}
+
+address JNI_FastGetField::generate_fast_get_char_field() {
+  return generate_fast_get_int_field0(T_CHAR);
+}
+
+address JNI_FastGetField::generate_fast_get_short_field() {
+  return generate_fast_get_int_field0(T_SHORT);
+}
+
+address JNI_FastGetField::generate_fast_get_int_field() {
+  return generate_fast_get_int_field0(T_INT);
+}
+
+address JNI_FastGetField::generate_fast_get_long_field() {
+  return generate_fast_get_int_field0(T_LONG);
+}
+
+address JNI_FastGetField::generate_fast_get_float_field() {
+  return generate_fast_get_int_field0(T_FLOAT);
+}
+
+address JNI_FastGetField::generate_fast_get_double_field() {
+  return generate_fast_get_int_field0(T_DOUBLE);
+}
diff --git a/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp b/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp
new file mode 100644
index 00000000000..dfcd47b478b
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/jniTypes_mips.hpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP
+#define CPU_MIPS_VM_JNITYPES_MIPS_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+#include "prims/jni.h"
+
+// This file holds platform-dependent routines used to write primitive jni
+// types to the array of arguments passed into JavaCalls::call
+
+class JNITypes : AllStatic {
+  // These functions write a java primitive type (in native format)
+  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
+  // I.e., they are functionally 'push' operations if they have a 'pos'
+  // formal parameter.  Note that jlong's and jdouble's are written
+  // _in reverse_ of the order in which they appear in the interpreter
+  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
+  // reverse the argument list constructed by JavaCallArguments (see
+  // javaCalls.hpp).
+
+private:
+
+  // 32bit Helper routines.
+  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
+                                                                        *(jint *)(to  ) = from[0]; }
+  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
+
+public:
+  // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
+  //   is 8 bytes.
+  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
+  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
+  // This error occurs in ReflectInvoke.java
+  // The parameter of DD(int) should be 4 instead of 0x550000004.
+  //
+  // See: [runtime/javaCalls.hpp]
+
+  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
+  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
+  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
+
+  // Longs are stored in native format in one JavaCallArgument slot at
+  // *(to).
+  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
+  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
+  static inline void put_long(jlong  from, intptr_t *to) {
+    *(jlong*) (to + 1) = from;
+    *(jlong*) (to) = from;
+  }
+
+  // A long parameter occupies two slot.
+  // It must fit the layout rule in methodHandle.
+  //
+  // See: [runtime/reflection.cpp] Reflection::invoke()
+  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
+
+  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = from;
+    *(jlong*) (to + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = *from;
+    *(jlong*) (to + pos) = *from;
+    pos += 2;
+  }
+
+  // Oops are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
+  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
+  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
+
+  // Floats are stored in native format in one JavaCallArgument slot at *to.
+  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
+  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
+  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
+
+#undef _JNI_SLOT_OFFSET
+#define _JNI_SLOT_OFFSET 0
+
+  // Longs are stored in native format in one JavaCallArgument slot at
+  // *(to).
+  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
+  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
+  static inline void put_double(jdouble  from, intptr_t *to) {
+    *(jdouble*) (to + 1) = from;
+    *(jdouble*) (to) = from;
+  }
+
+  // A long parameter occupies two slot.
+  // It must fit the layout rule in methodHandle.
+  //
+  // See: [runtime/reflection.cpp] Reflection::invoke()
+  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
+
+  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = from;
+    *(jdouble*) (to + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = *from;
+    *(jdouble*) (to + pos) = *from;
+    pos += 2;
+  }
+
+  // The get_xxx routines, on the other hand, actually _do_ fetch
+  // java primitive types from the interpreter stack.
+  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
+  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
+  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
+  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
+  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
+#undef _JNI_SLOT_OFFSET
+};
+
+#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/jni_mips.h b/hotspot/src/cpu/mips/vm/jni_mips.h
new file mode 100644
index 00000000000..6714f51d5d6
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/jni_mips.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef _JAVASOFT_JNI_MD_H_
+#define _JAVASOFT_JNI_MD_H_
+
+// Note: please do not change these without also changing jni_md.h in the JDK
+// repository
+#ifndef __has_attribute
+  #define __has_attribute(x) 0
+#endif
+#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility)
+  #define JNIEXPORT     __attribute__((visibility("default")))
+  #define JNIIMPORT     __attribute__((visibility("default")))
+#else
+  #define JNIEXPORT
+  #define JNIIMPORT
+#endif
+
+#define JNICALL
+
+typedef int jint;
+
+typedef long jlong;
+
+typedef signed char jbyte;
+
+#endif
diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp
new file mode 100644
index 00000000000..2b8840ae100
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.cpp
@@ -0,0 +1,4332 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "memory/universe.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+// Implementation of MacroAssembler
+
+intptr_t MacroAssembler::i[32] = {0};
+float MacroAssembler::f[32] = {0.0};
+
+void MacroAssembler::print(outputStream *s) {
+  unsigned int k;
+  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
+    s->print_cr("i%d = 0x%.16lx", k, i[k]);
+  }
+  s->cr();
+
+  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
+    s->print_cr("f%d = %f", k, f[k]);
+  }
+  s->cr();
+}
+
+int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
+int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
+
+void MacroAssembler::save_registers(MacroAssembler *masm) {
+#define __ masm->
+  for(int k=0; k<32; k++) {
+    __ sw (as_Register(k), A0, i_offset(k));
+  }
+
+  for(int k=0; k<32; k++) {
+    __ swc1 (as_FloatRegister(k), A0, f_offset(k));
+  }
+#undef __
+}
+
+void MacroAssembler::restore_registers(MacroAssembler *masm) {
+#define __ masm->
+  for(int k=0; k<32; k++) {
+    __ lw (as_Register(k), A0, i_offset(k));
+  }
+
+  for(int k=0; k<32; k++) {
+    __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
+  }
+#undef __
+}
+
+
+void MacroAssembler::pd_patch_instruction(address branch, address target) {
+  jint& stub_inst = *(jint*) branch;
+  jint *pc = (jint *)branch;
+
+  if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) {
+    //b_far:
+    //  move(AT, RA); // daddu
+    //  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
+    //  nop();
+    //  lui(T9, 0); // to be patched
+    //  ori(T9, 0);
+    //  daddu(T9, T9, RA);
+    //  move(RA, AT);
+    //  jr(T9);
+
+    assert(opcode(pc[3]) == lui_op
+        && opcode(pc[4]) == ori_op
+        && special(pc[5]) == daddu_op, "Not a branch label patch");
+    if(!(opcode(pc[3]) == lui_op
+          && opcode(pc[4]) == ori_op
+          && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
+
+    int offset = target - branch;
+    if (!is_simm16(offset)) {
+      pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
+      pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
+    } else {
+      // revert to "beq + nop"
+      CodeBuffer cb(branch, 4 * 10);
+      MacroAssembler masm(&cb);
+#define __ masm.
+      __ b(target);
+      __ delayed()->nop();
+      __ nop();
+      __ nop();
+      __ nop();
+      __ nop();
+      __ nop();
+      __ nop();
+    }
+    return;
+  } else if (special(pc[4]) == jr_op
+             && opcode(pc[4]) == special_op
+             && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
+    //jmp_far:
+    //  patchable_set48(T9, target);
+    //  jr(T9);
+    //  nop();
+
+    CodeBuffer cb(branch, 4 * 4);
+    MacroAssembler masm(&cb);
+    masm.patchable_set48(T9, (long)(target));
+    return;
+  }
+
+#ifndef PRODUCT
+  if (!is_simm16((target - branch - 4) >> 2)) {
+    tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target));
+    tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch));
+    Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
+    tty->print_cr("======= End of decoding =======");
+  }
+#endif
+
+  stub_inst = patched_branch(target - branch, stub_inst, 0);
+}
+
+static inline address first_cache_address() {
+  return CodeCache::low_bound() + sizeof(HeapBlock::Header);
+}
+
+static inline address last_cache_address() {
+  return CodeCache::high_bound() - Assembler::InstructionSize;
+}
+
+int MacroAssembler::call_size(address target, bool far, bool patchable) {
+  if (patchable) return 6 << Assembler::LogInstructionSize;
+  if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
+  return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
+}
+
+// Can we reach target using jal/j from anywhere
+// in the code cache (because code can be relocated)?
+bool MacroAssembler::reachable_from_cache(address target) {
+  address cl = first_cache_address();
+  address ch = last_cache_address();
+
+  return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
+}
+
+bool MacroAssembler::reachable_from_cache() {
+  if (ForceUnreachable) {
+    return false;
+  } else {
+    address cl = first_cache_address();
+    address ch = last_cache_address();
+
+    return fit_in_jal(cl, ch);
+  }
+}
+
+void MacroAssembler::general_jump(address target) {
+  if (reachable_from_cache(target)) {
+    j(target);
+    delayed()->nop();
+  } else {
+    set64(T9, (long)target);
+    jr(T9);
+    delayed()->nop();
+  }
+}
+
+int MacroAssembler::insts_for_general_jump(address target) {
+  if (reachable_from_cache(target)) {
+    //j(target);
+    //nop();
+    return 2;
+  } else {
+    //set64(T9, (long)target);
+    //jr(T9);
+    //nop();
+    return insts_for_set64((jlong)target) + 2;
+  }
+}
+
+void MacroAssembler::patchable_jump(address target) {
+  if (reachable_from_cache(target)) {
+    nop();
+    nop();
+    nop();
+    nop();
+    j(target);
+    delayed()->nop();
+  } else {
+    patchable_set48(T9, (long)target);
+    jr(T9);
+    delayed()->nop();
+  }
+}
+
+int MacroAssembler::insts_for_patchable_jump(address target) {
+  return 6;
+}
+
+void MacroAssembler::general_call(address target) {
+  if (reachable_from_cache(target)) {
+    jal(target);
+    delayed()->nop();
+  } else {
+    set64(T9, (long)target);
+    jalr(T9);
+    delayed()->nop();
+  }
+}
+
+int MacroAssembler::insts_for_general_call(address target) {
+  if (reachable_from_cache(target)) {
+    //jal(target);
+    //nop();
+    return 2;
+  } else {
+    //set64(T9, (long)target);
+    //jalr(T9);
+    //nop();
+    return insts_for_set64((jlong)target) + 2;
+  }
+}
+
+void MacroAssembler::patchable_call(address target) {
+  if (reachable_from_cache(target)) {
+    nop();
+    nop();
+    nop();
+    nop();
+    jal(target);
+    delayed()->nop();
+  } else {
+    patchable_set48(T9, (long)target);
+    jalr(T9);
+    delayed()->nop();
+  }
+}
+
+int MacroAssembler::insts_for_patchable_call(address target) {
+  return 6;
+}
+
+// Maybe emit a call via a trampoline.  If the code cache is small
+// trampolines won't be emitted.
+
+address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
+  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
+  assert(entry.rspec().type() == relocInfo::runtime_call_type
+         || entry.rspec().type() == relocInfo::opt_virtual_call_type
+         || entry.rspec().type() == relocInfo::static_call_type
+         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
+
+  address target = entry.target();
+  if (!reachable_from_cache()) {
+    address stub = emit_trampoline_stub(offset(), target);
+    if (stub == NULL) {
+      return NULL; // CodeCache is full
+    }
+  }
+
+  if (cbuf) cbuf->set_insts_mark();
+  relocate(entry.rspec());
+
+  if (reachable_from_cache()) {
+    nop();
+    nop();
+    nop();
+    nop();
+    jal(target);
+    delayed()->nop();
+  } else {
+    // load the call target from the trampoline stub
+    // branch
+    long dest = (long)pc();
+    dest += (dest & 0x8000) << 1;
+    lui(T9, dest >> 32);
+    ori(T9, T9, split_low(dest >> 16));
+    dsll(T9, T9, 16);
+    ld(T9, T9, simm16(split_low(dest)));
+    jalr(T9);
+    delayed()->nop();
+  }
+  return pc();
+}
+
+// Emit a trampoline stub for a call to a target which is too far away.
+address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
+                                             address dest) {
+  // Max stub size: alignment nop, TrampolineStub.
+  address stub = start_a_stub(NativeInstruction::nop_instruction_size
+                   + NativeCallTrampolineStub::instruction_size);
+  if (stub == NULL) {
+    return NULL;  // CodeBuffer::expand failed
+  }
+
+  // Create a trampoline stub relocation which relates this trampoline stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+  align(wordSize);
+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
+                                            + insts_call_instruction_offset));
+  emit_int64((int64_t)dest);
+  end_a_stub();
+  return stub;
+}
+
+void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
+  u_char * cur_pc = pc();
+
+  // Near/Far jump
+  if(is_simm16((entry - pc() - 4) / 4)) {
+    Assembler::beq(rs, rt, offset(entry));
+  } else {
+    Label not_jump;
+    bne(rs, rt, not_jump);
+    delayed()->nop();
+
+    b_far(entry);
+    delayed()->nop();
+
+    bind(not_jump);
+    has_delay_slot();
+  }
+}
+
+void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
+  if (L.is_bound()) {
+    beq_far(rs, rt, target(L));
+  } else {
+    u_char * cur_pc = pc();
+    Label not_jump;
+    bne(rs, rt, not_jump);
+    delayed()->nop();
+
+    b_far(L);
+    delayed()->nop();
+
+    bind(not_jump);
+    has_delay_slot();
+  }
+}
+
+void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
+  u_char * cur_pc = pc();
+
+  //Near/Far jump
+  if(is_simm16((entry - pc() - 4) / 4)) {
+    Assembler::bne(rs, rt, offset(entry));
+  } else {
+    Label not_jump;
+    beq(rs, rt, not_jump);
+    delayed()->nop();
+
+    b_far(entry);
+    delayed()->nop();
+
+    bind(not_jump);
+    has_delay_slot();
+  }
+}
+
+void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
+  if (L.is_bound()) {
+    bne_far(rs, rt, target(L));
+  } else {
+    u_char * cur_pc = pc();
+    Label not_jump;
+    beq(rs, rt, not_jump);
+    delayed()->nop();
+
+    b_far(L);
+    delayed()->nop();
+
+    bind(not_jump);
+    has_delay_slot();
+  }
+}
+
+void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
+  Label not_taken;
+
+  bne(rs, rt, not_taken);
+  delayed()->nop();
+
+  jmp_far(L);
+
+  bind(not_taken);
+}
+
+void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
+  Label not_taken;
+
+  beq(rs, rt, not_taken);
+  delayed()->nop();
+
+  jmp_far(L);
+
+  bind(not_taken);
+}
+
+void MacroAssembler::bc1t_long(Label& L) {
+  Label not_taken;
+
+  bc1f(not_taken);
+  delayed()->nop();
+
+  jmp_far(L);
+
+  bind(not_taken);
+}
+
+void MacroAssembler::bc1f_long(Label& L) {
+  Label not_taken;
+
+  bc1t(not_taken);
+  delayed()->nop();
+
+  jmp_far(L);
+
+  bind(not_taken);
+}
+
+void MacroAssembler::b_far(Label& L) {
+  if (L.is_bound()) {
+    b_far(target(L));
+  } else {
+    volatile address dest = target(L);
+//
+// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
+//   0x00000055651ed514: daddu at, ra, zero
+//   0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
+//
+//   0x00000055651ed51c: sll zero, zero, 0
+//   0x00000055651ed520: lui t9, 0x0
+//   0x00000055651ed524: ori t9, t9, 0x21b8
+//   0x00000055651ed528: daddu t9, t9, ra
+//   0x00000055651ed52c: daddu ra, at, zero
+//   0x00000055651ed530: jr t9
+//   0x00000055651ed534: sll zero, zero, 0
+//
+    move(AT, RA);
+    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
+    nop();
+    lui(T9, 0); // to be patched
+    ori(T9, T9, 0);
+    daddu(T9, T9, RA);
+    move(RA, AT);
+    jr(T9);
+  }
+}
+
+void MacroAssembler::b_far(address entry) {
+  u_char * cur_pc = pc();
+
+  // Near/Far jump
+  if(is_simm16((entry - pc() - 4) / 4)) {
+    b(offset(entry));
+  } else {
+    // address must be bounded
+    move(AT, RA);
+    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
+    nop();
+    li32(T9, entry - pc());
+    daddu(T9, T9, RA);
+    move(RA, AT);
+    jr(T9);
+  }
+}
+
+void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
+  addu_long(AT, base, offset);
+  ld_ptr(rt, AT, 0);
+}
+
+void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
+  guarantee(AT != rt, "AT must not equal rt");
+  addu_long(AT, base, offset);
+  st_ptr(rt, AT, 0);
+}
+
+Address MacroAssembler::as_Address(AddressLiteral adr) {
+  return Address(adr.target(), adr.rspec());
+}
+
+Address MacroAssembler::as_Address(ArrayAddress adr) {
+  return Address::make_array(adr);
+}
+
+// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
+void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
+  Label again;
+
+  li(tmp_reg1, counter_addr);
+  bind(again);
+  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
+  ll(tmp_reg2, tmp_reg1, 0);
+  addiu(tmp_reg2, tmp_reg2, inc);
+  sc(tmp_reg2, tmp_reg1, 0);
+  beq(tmp_reg2, R0, again);
+  delayed()->nop();
+}
+
+int MacroAssembler::biased_locking_enter(Register lock_reg,
+                                         Register obj_reg,
+                                         Register swap_reg,
+                                         Register tmp_reg,
+                                         bool swap_reg_contains_mark,
+                                         Label& done,
+                                         Label* slow_case,
+                                         BiasedLockingCounters* counters) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+  bool need_tmp_reg = false;
+  if (tmp_reg == noreg) {
+    need_tmp_reg = true;
+    tmp_reg = T9;
+  }
+  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
+  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
+  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
+  Address saved_mark_addr(lock_reg, 0);
+
+  // Biased locking
+  // See whether the lock is currently biased toward our thread and
+  // whether the epoch is still valid
+  // Note that the runtime guarantees sufficient alignment of JavaThread
+  // pointers to allow age to be placed into low bits
+  // First check to see whether biasing is even enabled for this object
+  Label cas_label;
+  int null_check_offset = -1;
+  if (!swap_reg_contains_mark) {
+    null_check_offset = offset();
+    ld_ptr(swap_reg, mark_addr);
+  }
+
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  move(tmp_reg, swap_reg);
+  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
+  daddiu(AT, R0, markOopDesc::biased_lock_pattern);
+  dsubu(AT, AT, tmp_reg);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+
+  bne(AT, R0, cas_label);
+  delayed()->nop();
+
+
+  // The bias pattern is present in the object's header. Need to check
+  // whether the bias owner and the epoch are both still current.
+  // Note that because there is no current thread register on MIPS we
+  // need to store off the mark word we read out of the object to
+  // avoid reloading it and needing to recheck invariants below. This
+  // store is unfortunate but it makes the overall code shorter and
+  // simpler.
+  st_ptr(swap_reg, saved_mark_addr);
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  if (swap_reg_contains_mark) {
+    null_check_offset = offset();
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+  xorr(tmp_reg, tmp_reg, swap_reg);
+  get_thread(swap_reg);
+  xorr(swap_reg, swap_reg, tmp_reg);
+
+  move(AT, ~((int) markOopDesc::age_mask_in_place));
+  andr(swap_reg, swap_reg, AT);
+
+  if (PrintBiasedLockingStatistics) {
+    Label L;
+    bne(swap_reg, R0, L);
+    delayed()->nop();
+    push(tmp_reg);
+    push(A0);
+    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
+    pop(A0);
+    pop(tmp_reg);
+    bind(L);
+  }
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  beq(swap_reg, R0, done);
+  delayed()->nop();
+  Label try_revoke_bias;
+  Label try_rebias;
+
+  // At this point we know that the header has the bias pattern and
+  // that we are not the bias owner in the current epoch. We need to
+  // figure out more details about the state of the header in order to
+  // know what operations can be legally performed on the object's
+  // header.
+
+  // If the low three bits in the xor result aren't clear, that means
+  // the prototype header is no longer biased and we have to revoke
+  // the bias on this object.
+
+  move(AT, markOopDesc::biased_lock_mask_in_place);
+  andr(AT, swap_reg, AT);
+  bne(AT, R0, try_revoke_bias);
+  delayed()->nop();
+  // Biasing is still enabled for this data type. See whether the
+  // epoch of the current bias is still valid, meaning that the epoch
+  // bits of the mark word are equal to the epoch bits of the
+  // prototype header. (Note that the prototype header's epoch bits
+  // only change at a safepoint.) If not, attempt to rebias the object
+  // toward the current thread. Note that we must be absolutely sure
+  // that the current epoch is invalid in order to do this because
+  // otherwise the manipulations it performs on the mark word are
+  // illegal.
+
+  move(AT, markOopDesc::epoch_mask_in_place);
+  andr(AT,swap_reg, AT);
+  bne(AT, R0, try_rebias);
+  delayed()->nop();
+  // The epoch of the current bias is still valid but we know nothing
+  // about the owner; it might be set or it might be clear. Try to
+  // acquire the bias of the object using an atomic operation. If this
+  // fails we will go in to the runtime to revoke the object's bias.
+  // Note that we first construct the presumed unbiased header so we
+  // don't accidentally blow away another thread's valid bias.
+
+  ld_ptr(swap_reg, saved_mark_addr);
+
+  move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
+  andr(swap_reg, swap_reg, AT);
+
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  get_thread(tmp_reg);
+  orr(tmp_reg, tmp_reg, swap_reg);
+  //if (os::is_MP()) {
+  //  sync();
+  //}
+  cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, this means that
+  // another thread succeeded in biasing it toward itself and we
+  // need to revoke that bias. The revocation will occur in the
+  // interpreter runtime in the slow case.
+  if (PrintBiasedLockingStatistics) {
+    Label L;
+    bne(AT, R0, L);
+    delayed()->nop();
+    push(tmp_reg);
+    push(A0);
+    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
+    pop(A0);
+    pop(tmp_reg);
+    bind(L);
+  }
+  if (slow_case != NULL) {
+    beq_far(AT, R0, *slow_case);
+    delayed()->nop();
+  }
+  b(done);
+  delayed()->nop();
+
+  bind(try_rebias);
+  // At this point we know the epoch has expired, meaning that the
+  // current "bias owner", if any, is actually invalid. Under these
+  // circumstances _only_, we are allowed to use the current header's
+  // value as the comparison value when doing the cas to acquire the
+  // bias in the current epoch. In other words, we allow transfer of
+  // the bias from one thread to another directly in this situation.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+  get_thread(swap_reg);
+  orr(tmp_reg, tmp_reg, swap_reg);
+  ld_ptr(swap_reg, saved_mark_addr);
+
+  //if (os::is_MP()) {
+  //  sync();
+  //}
+  cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // If the biasing toward our thread failed, then another thread
+  // succeeded in biasing it toward itself and we need to revoke that
+  // bias. The revocation will occur in the runtime in the slow case.
+  if (PrintBiasedLockingStatistics) {
+    Label L;
+    bne(AT, R0, L);
+    delayed()->nop();
+    push(AT);
+    push(tmp_reg);
+    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
+    pop(tmp_reg);
+    pop(AT);
+    bind(L);
+  }
+  if (slow_case != NULL) {
+    beq_far(AT, R0, *slow_case);
+    delayed()->nop();
+  }
+
+  b(done);
+  delayed()->nop();
+  bind(try_revoke_bias);
+  // The prototype mark in the klass doesn't have the bias bit set any
+  // more, indicating that objects of this data type are not supposed
+  // to be biased any more. We are going to try to reset the mark of
+  // this object to the prototype value and fall through to the
+  // CAS-based locking scheme. Note that if our CAS fails, it means
+  // that another thread raced us for the privilege of revoking the
+  // bias of this particular object, so it's okay to continue in the
+  // normal locking code.
+  //
+  // FIXME: due to a lack of registers we currently blow away the age
+  // bits in this situation. Should attempt to preserve them.
+  ld_ptr(swap_reg, saved_mark_addr);
+
+  if (need_tmp_reg) {
+    push(tmp_reg);
+  }
+  load_prototype_header(tmp_reg, obj_reg);
+  //if (os::is_MP()) {
+  // lock();
+  //}
+  cmpxchg(tmp_reg, Address(obj_reg, 0), swap_reg);
+  if (need_tmp_reg) {
+    pop(tmp_reg);
+  }
+  // Fall through to the normal CAS-based lock, because no matter what
+  // the result of the above CAS, some thread must have succeeded in
+  // removing the bias bit from the object's header.
+  if (PrintBiasedLockingStatistics) {
+    Label L;
+    bne(AT, R0, L);
+    delayed()->nop();
+    push(AT);
+    push(tmp_reg);
+    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
+    pop(tmp_reg);
+    pop(AT);
+    bind(L);
+  }
+
+  bind(cas_label);
+  return null_check_offset;
+}
+
+void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
+  assert(UseBiasedLocking, "why call this otherwise?");
+
+  // Check for biased locking unlock case, which is a no-op
+  // Note: we do not have to check the thread ID for two reasons.
+  // First, the interpreter checks for IllegalMonitorStateException at
+  // a higher level. Second, if the bias was revoked while we held the
+  // lock, the object could not be rebiased toward another thread, so
+  // the bias bit would be clear.
+  ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
+  daddiu(AT, R0, markOopDesc::biased_lock_pattern);
+
+  beq(AT, temp_reg, done);
+  delayed()->nop();
+}
+
+// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
+// this method will handle the stack problem, you need not to preserve the stack space for the argument now
+void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
+  Label L, E;
+
+  assert(number_of_arguments <= 4, "just check");
+
+  andi(AT, SP, 0xf);
+  beq(AT, R0, L);
+  delayed()->nop();
+  daddiu(SP, SP, -8);
+  call(entry_point, relocInfo::runtime_call_type);
+  delayed()->nop();
+  daddiu(SP, SP, 8);
+  b(E);
+  delayed()->nop();
+
+  bind(L);
+  call(entry_point, relocInfo::runtime_call_type);
+  delayed()->nop();
+  bind(E);
+}
+
+
+void MacroAssembler::jmp(address entry) {
+  patchable_set48(T9, (long)entry);
+  jr(T9);
+}
+
+void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
+  switch (rtype) {
+    case relocInfo::runtime_call_type:
+    case relocInfo::none:
+      jmp(entry);
+      break;
+    default:
+      {
+      InstructionMark im(this);
+      relocate(rtype);
+      patchable_set48(T9, (long)entry);
+      jr(T9);
+      }
+      break;
+  }
+}
+
+void MacroAssembler::jmp_far(Label& L) {
+  if (L.is_bound()) {
+    address entry = target(L);
+    assert(entry != NULL, "jmp most probably wrong");
+    InstructionMark im(this);
+
+    relocate(relocInfo::internal_word_type);
+    patchable_set48(T9, (long)entry);
+  } else {
+    InstructionMark im(this);
+    L.add_patch_at(code(), locator());
+
+    relocate(relocInfo::internal_word_type);
+    patchable_set48(T9, (long)pc());
+  }
+
+  jr(T9);
+  delayed()->nop();
+}
+void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
+  int oop_index;
+  if (obj) {
+    oop_index = oop_recorder()->find_index(obj);
+  } else {
+    oop_index = oop_recorder()->allocate_metadata_index(obj);
+  }
+  relocate(metadata_Relocation::spec(oop_index));
+  patchable_set48(AT, (long)obj);
+  sd(AT, dst);
+}
+
+void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
+  int oop_index;
+  if (obj) {
+    oop_index = oop_recorder()->find_index(obj);
+  } else {
+    oop_index = oop_recorder()->allocate_metadata_index(obj);
+  }
+  relocate(metadata_Relocation::spec(oop_index));
+  patchable_set48(dst, (long)obj);
+}
+
+void MacroAssembler::call(address entry) {
+// c/c++ code assume T9 is entry point, so we just always move entry to t9
+// maybe there is some more graceful method to handle this. FIXME
+// For more info, see class NativeCall.
+  patchable_set48(T9, (long)entry);
+  jalr(T9);
+}
+
+void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
+  switch (rtype) {
+    case relocInfo::runtime_call_type:
+    case relocInfo::none:
+      call(entry);
+      break;
+    default:
+      {
+  InstructionMark im(this);
+  relocate(rtype);
+  call(entry);
+      }
+      break;
+  }
+}
+
+void MacroAssembler::call(address entry, RelocationHolder& rh)
+{
+  switch (rh.type()) {
+    case relocInfo::runtime_call_type:
+    case relocInfo::none:
+      call(entry);
+      break;
+    default:
+      {
+  InstructionMark im(this);
+  relocate(rh);
+  call(entry);
+      }
+      break;
+  }
+}
+
+void MacroAssembler::ic_call(address entry) {
+  RelocationHolder rh = virtual_call_Relocation::spec(pc());
+  patchable_set48(IC_Klass, (long)Universe::non_oop_word());
+  assert(entry != NULL, "call most probably wrong");
+  InstructionMark im(this);
+  trampoline_call(AddressLiteral(entry, rh));
+}
+
+void MacroAssembler::c2bool(Register r) {
+  Label L;
+  Assembler::beq(r, R0, L);
+  delayed()->nop();
+  move(r, 1);
+  bind(L);
+}
+
+#ifndef PRODUCT
+extern "C" void findpc(intptr_t x);
+#endif
+
+void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
+  if ( ShowMessageBoxOnError ) {
+    JavaThreadState saved_state = JavaThread::current()->thread_state();
+    JavaThread::current()->set_thread_state(_thread_in_vm);
+    {
+      // In order to get locks work, we need to fake a in_VM state
+      ttyLocker ttyl;
+      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
+      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
+  BytecodeCounter::print();
+      }
+
+    }
+    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
+  }
+  else
+    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
+}
+
+
+void MacroAssembler::stop(const char* msg) {
+  li(A0, (long)msg);
+  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
+  delayed()->nop();
+  brk(17);
+}
+
+void MacroAssembler::warn(const char* msg) {
+  pushad();
+  li(A0, (long)msg);
+  push(S2);
+  move(AT, -(StackAlignmentInBytes));
+  move(S2, SP);     // use S2 as a sender SP holder
+  andr(SP, SP, AT); // align stack as required by ABI
+  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
+  delayed()->nop();
+  move(SP, S2);     // use S2 as a sender SP holder
+  pop(S2);
+  popad();
+}
+
+void MacroAssembler::increment(Register reg, int imm) {
+  if (!imm) return;
+  if (is_simm16(imm)) {
+    daddiu(reg, reg, imm);
+  } else {
+    move(AT, imm);
+    daddu(reg, reg, AT);
+  }
+}
+
+void MacroAssembler::decrement(Register reg, int imm) {
+  increment(reg, -imm);
+}
+
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             bool check_exceptions) {
+  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  if (arg_1!=A1) move(A1, arg_1);
+  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+  if (arg_1!=A1) move(A1, arg_1);
+  if (arg_2!=A2) move(A2, arg_2);
+  assert(arg_2 != A1, "smashed argument");
+  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+  if (arg_1!=A1) move(A1, arg_1);
+  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
+  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
+  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             int number_of_arguments,
+                             bool check_exceptions) {
+  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             bool check_exceptions) {
+  if (arg_1 != A1) move(A1, arg_1);
+  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             bool check_exceptions) {
+  if (arg_1 != A1) move(A1, arg_1);
+  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
+  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
+}
+
+void MacroAssembler::call_VM(Register oop_result,
+                             Register last_java_sp,
+                             address entry_point,
+                             Register arg_1,
+                             Register arg_2,
+                             Register arg_3,
+                             bool check_exceptions) {
+  if (arg_1 != A1) move(A1, arg_1);
+  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
+  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
+  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
+}
+
+void MacroAssembler::call_VM_base(Register oop_result,
+                                  Register java_thread,
+                                  Register last_java_sp,
+                                  address  entry_point,
+                                  int      number_of_arguments,
+                                  bool     check_exceptions) {
+
+  address before_call_pc;
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+#ifndef OPT_THREAD
+    java_thread = T2;
+    get_thread(java_thread);
+#else
+    java_thread = TREG;
+#endif
+  }
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = SP;
+  }
+  // debugging support
+  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
+  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
+  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
+  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
+
+  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
+
+  // set last Java frame before call
+  before_call_pc = (address)pc();
+  set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
+
+  // do the call
+  move(A0, java_thread);
+  call(entry_point, relocInfo::runtime_call_type);
+  delayed()->nop();
+
+  // restore the thread (cannot use the pushed argument since arguments
+  // may be overwritten by C code generated by an optimizing compiler);
+  // however can use the register value directly if it is callee saved.
+#ifndef OPT_THREAD
+  get_thread(java_thread);
+#else
+#ifdef ASSERT
+  {
+    Label L;
+    get_thread(AT);
+    beq(java_thread, AT, L);
+    delayed()->nop();
+    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
+    bind(L);
+  }
+#endif
+#endif
+
+  // discard thread and arguments
+  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // reset last Java frame
+  reset_last_Java_frame(java_thread, false);
+
+  check_and_handle_popframe(java_thread);
+  check_and_handle_earlyret(java_thread);
+  if (check_exceptions) {
+    // check for pending exceptions (java_thread is set upon return)
+    Label L;
+    ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
+    beq(AT, R0, L);
+    delayed()->nop();
+    li(AT, before_call_pc);
+    push(AT);
+    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+    delayed()->nop();
+    bind(L);
+  }
+
+  // get oop result if there is one and reset the value in the thread
+  if (oop_result->is_valid()) {
+    ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
+    sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
+    verify_oop(oop_result);
+  }
+}
+
+void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
+
+  move(V0, SP);
+  //we also reserve space for java_thread here
+  move(AT, -(StackAlignmentInBytes));
+  andr(SP, SP, AT);
+  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
+
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
+  call_VM_leaf_base(entry_point, number_of_arguments);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
+  if (arg_0 != A0) move(A0, arg_0);
+  call_VM_leaf(entry_point, 1);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
+  if (arg_0 != A0) move(A0, arg_0);
+  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
+  call_VM_leaf(entry_point, 2);
+}
+
+void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
+  if (arg_0 != A0) move(A0, arg_0);
+  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
+  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
+  call_VM_leaf(entry_point, 3);
+}
+void MacroAssembler::super_call_VM_leaf(address entry_point) {
+  MacroAssembler::call_VM_leaf_base(entry_point, 0);
+}
+
+
+void MacroAssembler::super_call_VM_leaf(address entry_point,
+                                                   Register arg_1) {
+  if (arg_1 != A0) move(A0, arg_1);
+  MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
+
+void MacroAssembler::super_call_VM_leaf(address entry_point,
+                                                   Register arg_1,
+                                                   Register arg_2) {
+  if (arg_1 != A0) move(A0, arg_1);
+  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
+  MacroAssembler::call_VM_leaf_base(entry_point, 2);
+}
+void MacroAssembler::super_call_VM_leaf(address entry_point,
+                                                   Register arg_1,
+                                                   Register arg_2,
+                                                   Register arg_3) {
+  if (arg_1 != A0) move(A0, arg_1);
+  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
+  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
+  MacroAssembler::call_VM_leaf_base(entry_point, 3);
+}
+
+void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
+}
+
+void MacroAssembler::check_and_handle_popframe(Register java_thread) {
+}
+
+void MacroAssembler::null_check(Register reg, int offset) {
+  if (needs_explicit_null_check(offset)) {
+    // provoke OS NULL exception if reg = NULL by
+    // accessing M[reg] w/o changing any (non-CC) registers
+    // NOTE: cmpl is plenty here to provoke a segv
+    lw(AT, reg, 0);
+  } else {
+    // nothing to do, (later) access of M[reg + offset]
+    // will provoke OS NULL exception if reg = NULL
+  }
+}
+
+void MacroAssembler::enter() {
+  push2(RA, FP);
+  move(FP, SP);
+}
+
+void MacroAssembler::leave() {
+  move(SP, FP);
+  pop2(RA, FP);
+}
+
+void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+#ifndef OPT_THREAD
+    java_thread = T1;
+    get_thread(java_thread);
+#else
+    java_thread = TREG;
+#endif
+  }
+  // we must set sp to zero to clear frame
+  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // must clear fp, so that compiled frames are not confused; it is possible
+  // that we need it only for debugging
+  if(clear_fp) {
+    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
+  }
+
+  // Always clear the pc because it could have been set by make_walkable()
+  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
+}
+
+void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
+  Register thread = TREG;
+#ifndef OPT_THREAD
+  get_thread(thread);
+#endif
+  // we must set sp to zero to clear frame
+  sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
+  // must clear fp, so that compiled frames are not confused; it is
+  // possible that we need it only for debugging
+  if (clear_fp) {
+    sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
+  }
+
+  // Always clear the pc because it could have been set by make_walkable()
+  sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
+}
+
+// Write serialization page so VM thread can do a pseudo remote membar.
+// We use the current thread pointer to calculate a thread specific
+// offset to write to within the page. This minimizes bus traffic
+// due to cache line collision.
+void MacroAssembler::serialize_memory(Register thread, Register tmp) {
+  int mask = os::vm_page_size() - sizeof(int);
+  assert_different_registers(AT, tmp);
+  assert(is_uimm(mask, 16), "Not a unsigned 16-bit");
+  srl(AT, thread, os::get_serialize_page_shift_count());
+  andi(AT, AT, mask);
+  li(tmp, os::get_memory_serialize_page());
+  addu(tmp, tmp, AT);
+  sw(R0, tmp, 0);
+}
+
+// Calls to C land
+//
+// When entering C land, the fp, & sp of the last Java frame have to be recorded
+// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
+// has to be reset to 0. This is required to allow proper stack traversal.
+void MacroAssembler::set_last_Java_frame(Register java_thread,
+                                         Register last_java_sp,
+                                         Register last_java_fp,
+                                         address  last_java_pc) {
+  // determine java_thread register
+  if (!java_thread->is_valid()) {
+#ifndef OPT_THREAD
+    java_thread = T2;
+    get_thread(java_thread);
+#else
+    java_thread = TREG;
+#endif
+  }
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = SP;
+  }
+
+  // last_java_fp is optional
+  if (last_java_fp->is_valid()) {
+    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
+  }
+
+  // last_java_pc is optional
+  if (last_java_pc != NULL) {
+    relocate(relocInfo::internal_word_type);
+    patchable_set48(AT, (long)last_java_pc);
+    st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
+  }
+  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+}
+
+void MacroAssembler::set_last_Java_frame(Register last_java_sp,
+                                         Register last_java_fp,
+                                         address  last_java_pc) {
+  // determine last_java_sp register
+  if (!last_java_sp->is_valid()) {
+    last_java_sp = SP;
+  }
+
+  Register thread = TREG;
+#ifndef OPT_THREAD
+  get_thread(thread);
+#endif
+  // last_java_fp is optional
+  if (last_java_fp->is_valid()) {
+    sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
+  }
+
+  // last_java_pc is optional
+  if (last_java_pc != NULL) {
+    relocate(relocInfo::internal_word_type);
+    patchable_set48(AT, (long)last_java_pc);
+    st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
+  }
+
+  sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
+}
+
+//////////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+                                          Register pre_val,
+                                          Register thread,
+                                          Register tmp,
+                                          bool tosca_live,
+                                          bool expand_call) {
+
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+  assert(thread == TREG, "must be");
+
+  Label done;
+  Label runtime;
+
+  assert(pre_val != noreg, "check this code");
+
+  if (obj != noreg) {
+    assert_different_registers(obj, pre_val, tmp);
+    assert(pre_val != V0, "check this code");
+  }
+
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+
+  // Is marking active?
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    lw(AT, in_progress);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    lb(AT, in_progress);
+  }
+  beq(AT, R0, done);
+  delayed()->nop();
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    load_heap_oop(pre_val, Address(obj, 0));
+  }
+
+  // Is the previous value null?
+  beq(pre_val, R0, done);
+  delayed()->nop();
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  ld(tmp, index);
+  beq(tmp, R0, runtime);
+  delayed()->nop();
+
+  daddiu(tmp, tmp, -1 * wordSize);
+  sd(tmp, index);
+  ld(AT, buffer);
+  daddu(tmp, tmp, AT);
+
+  // Record the previous value
+  sd(pre_val, tmp, 0);
+  beq(R0, R0, done);
+  delayed()->nop();
+
+  bind(runtime);
+  // save the live input values
+  if (tosca_live) push(V0);
+
+  if (obj != noreg && obj != V0) push(obj);
+
+  if (pre_val != V0) push(pre_val);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(fp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then fp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+
+
+  if (expand_call) {
+    assert(pre_val != A1, "smashed arg");
+    if (thread != A1) move(A1, thread);
+    if (pre_val != A0) move(A0, pre_val);
+    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+  } else {
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  }
+
+  // save the live input values
+  if (pre_val != V0)
+    pop(pre_val);
+
+  if (obj != noreg && obj != V0)
+    pop(obj);
+
+  if(tosca_live) pop(V0);
+
+  bind(done);
+}
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+                                           Register new_val,
+                                           Register thread,
+                                           Register tmp,
+                                           Register tmp2) {
+  assert(tmp  != AT, "must be");
+  assert(tmp2 != AT, "must be");
+  assert(thread == TREG, "must be");
+
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+  xorr(AT, store_addr, new_val);
+  dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
+  beq(AT, R0, done);
+  delayed()->nop();
+
+
+  // crosses regions, storing NULL?
+  beq(new_val, R0, done);
+  delayed()->nop();
+
+  // storing region crossing non-NULL, is card already dirty?
+  const Register card_addr = tmp;
+  const Register cardtable = tmp2;
+
+  move(card_addr, store_addr);
+  dsrl(card_addr, card_addr, CardTableModRefBS::card_shift);
+  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
+  // a valid address and therefore is not properly handled by the relocation code.
+  set64(cardtable, (intptr_t)ct->byte_map_base);
+  daddu(card_addr, card_addr, cardtable);
+
+  lb(AT, card_addr, 0);
+  daddiu(AT, AT, -1 * (int)G1SATBCardTableModRefBS::g1_young_card_val());
+  beq(AT, R0, done);
+  delayed()->nop();
+
+  sync();
+  lb(AT, card_addr, 0);
+  daddiu(AT, AT, -1 * (int)(int)CardTableModRefBS::dirty_card_val());
+  beq(AT, R0, done);
+  delayed()->nop();
+
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+  move(AT, (int)CardTableModRefBS::dirty_card_val());
+  sb(AT, card_addr, 0);
+
+  lw(AT, queue_index);
+  beq(AT, R0, runtime);
+  delayed()->nop();
+  daddiu(AT, AT, -1 * wordSize);
+  sw(AT, queue_index);
+  ld(tmp2, buffer);
+  ld(AT, queue_index);
+  daddu(tmp2, tmp2, AT);
+  sd(card_addr, tmp2, 0);
+  beq(R0, R0, done);
+  delayed()->nop();
+
+  bind(runtime);
+  // save the live input values
+  push(store_addr);
+  push(new_val);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, TREG);
+  pop(new_val);
+  pop(store_addr);
+
+  bind(done);
+}
+
+#endif // INCLUDE_ALL_GCS
+//////////////////////////////////////////////////////////////////////////////////
+
+
+void MacroAssembler::store_check(Register obj) {
+  // Does a store check for the oop in register obj. The content of
+  // register obj is destroyed afterwards.
+  store_check_part_1(obj);
+  store_check_part_2(obj);
+}
+
+void MacroAssembler::store_check(Register obj, Address dst) {
+  store_check(obj);
+}
+
+
+// split the store check operation so that other instructions can be scheduled inbetween
+void MacroAssembler::store_check_part_1(Register obj) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  dsrl(obj, obj, CardTableModRefBS::card_shift);
+}
+
+void MacroAssembler::store_check_part_2(Register obj) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+  set64(AT, (long)ct->byte_map_base);
+  daddu(AT, AT, obj);
+  if (UseConcMarkSweepGC) sync();
+  sb(R0, AT, 0);
+}
+
+// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
+void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
+                                   Register t1, Register t2, Label& slow_case) {
+  assert_different_registers(obj, var_size_in_bytes, t1, t2, AT);
+
+  Register end = t2;
+#ifndef OPT_THREAD
+  Register thread = t1;
+  get_thread(thread);
+#else
+  Register thread = TREG;
+#endif
+  verify_tlab(t1, t2);//blows t1&t2
+
+  ld_ptr(obj, thread, in_bytes(JavaThread::tlab_top_offset()));
+
+  if (var_size_in_bytes == NOREG) {
+    set64(AT, con_size_in_bytes);
+    addu(end, obj, AT);
+  } else {
+    addu(end, obj, var_size_in_bytes);
+  }
+
+  ld_ptr(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
+  sltu(AT, AT, end);
+  bne_far(AT, R0, slow_case);
+  delayed()->nop();
+
+
+  // update the tlab top pointer
+  st_ptr(end, thread, in_bytes(JavaThread::tlab_top_offset()));
+
+  verify_tlab(t1, t2);
+}
+
+// Defines obj, preserves var_size_in_bytes
+void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
+                                   Register t1, Register t2, Label& slow_case) {
+  assert_different_registers(obj, var_size_in_bytes, t1, AT);
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    b_far(slow_case);
+    delayed()->nop();
+  } else {
+
+    Address heap_top(t1);
+    li(t1, (long)Universe::heap()->top_addr());
+    ld_ptr(obj, heap_top);
+
+    Register end = t2;
+    Label retry;
+
+    bind(retry);
+    if (var_size_in_bytes == NOREG) {
+      set64(AT, con_size_in_bytes);
+      addu(end, obj, AT);
+    } else {
+      addu(end, obj, var_size_in_bytes);
+    }
+    // if end < obj then we wrapped around => object too long => slow case
+    sltu(AT, end, obj);
+    bne_far(AT, R0, slow_case);
+    delayed()->nop();
+
+    li(AT, (long)Universe::heap()->end_addr());
+    ld_ptr(AT, AT, 0);
+    sltu(AT, AT, end);
+    bne_far(AT, R0, slow_case);
+    delayed()->nop();
+    // Compare obj with the top addr, and if still equal, store the new top addr in
+    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
+    // it otherwise. Use lock prefix for atomicity on MPs.
+    //if (os::is_MP()) {
+    //  sync();
+    //}
+
+    // if someone beat us on the allocation, try again, otherwise continue
+    cmpxchg(end, heap_top, obj);
+    beq_far(AT, R0, retry);
+    delayed()->nop();
+  }
+}
+
+// C2 doesn't invoke this one.
+void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) {
+  Register top = T0;
+  Register t1  = T1;
+  Register t2  = T9;
+  Register t3  = T3;
+  Register thread_reg = T8;
+  assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ T2, A4);
+  Label do_refill, discard_tlab;
+
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    b(slow_case);
+    delayed()->nop();
+  }
+
+  get_thread(thread_reg);
+
+  ld_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
+  ld_ptr(t1,  thread_reg, in_bytes(JavaThread::tlab_end_offset()));
+
+  // calculate amount of free space
+  subu(t1, t1, top);
+  shr(t1, LogHeapWordSize);
+
+  // Retain tlab and allocate object in shared space if
+  // the amount free in the tlab is too large to discard.
+  ld_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
+  slt(AT, t2, t1);
+  beq(AT, R0, discard_tlab);
+  delayed()->nop();
+
+  // Retain
+  li(AT, ThreadLocalAllocBuffer::refill_waste_limit_increment());
+  addu(t2, t2, AT);
+  st_ptr(t2, thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()));
+
+  if (TLABStats) {
+    // increment number of slow_allocations
+    lw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
+    addiu(AT, AT, 1);
+    sw(AT, thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset()));
+  }
+  b(try_eden);
+  delayed()->nop();
+
+  bind(discard_tlab);
+  if (TLABStats) {
+    // increment number of refills
+    lw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
+    addiu(AT, AT, 1);
+    sw(AT, thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset()));
+    // accumulate wastage -- t1 is amount free in tlab
+    lw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
+    addu(AT, AT, t1);
+    sw(AT, thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset()));
+  }
+
+  // if tlab is currently allocated (top or end != null) then
+  // fill [top, end + alignment_reserve) with array object
+  beq(top, R0, do_refill);
+  delayed()->nop();
+
+  // set up the mark word
+  li(AT, (long)markOopDesc::prototype()->copy_set_hash(0x2));
+  st_ptr(AT, top, oopDesc::mark_offset_in_bytes());
+
+  // set the length to the remaining space
+  addiu(t1, t1, - typeArrayOopDesc::header_size(T_INT));
+  addiu(t1, t1, ThreadLocalAllocBuffer::alignment_reserve());
+  shl(t1, log2_intptr(HeapWordSize/sizeof(jint)));
+  sw(t1, top, arrayOopDesc::length_offset_in_bytes());
+
+  // set klass to intArrayKlass
+  li(AT, (intptr_t)Universe::intArrayKlassObj_addr());
+  ld_ptr(t1, AT, 0);
+  //st_ptr(t1, top, oopDesc::klass_offset_in_bytes());
+  store_klass(top, t1);
+
+  ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
+  subu(t1, top, t1);
+  incr_allocated_bytes(thread_reg, t1, 0);
+
+  // refill the tlab with an eden allocation
+  bind(do_refill);
+  ld_ptr(t1, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
+  shl(t1, LogHeapWordSize);
+  // add object_size ??
+  eden_allocate(top, t1, 0, t2, t3, slow_case);
+
+  // Check that t1 was preserved in eden_allocate.
+#ifdef ASSERT
+  if (UseTLAB) {
+    Label ok;
+    assert_different_registers(thread_reg, t1);
+    ld_ptr(AT, thread_reg, in_bytes(JavaThread::tlab_size_offset()));
+    shl(AT, LogHeapWordSize);
+    beq(AT, t1, ok);
+    delayed()->nop();
+    stop("assert(t1 != tlab size)");
+    should_not_reach_here();
+
+    bind(ok);
+  }
+#endif
+  st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_start_offset()));
+  st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_top_offset()));
+  addu(top, top, t1);
+  addiu(top, top, - ThreadLocalAllocBuffer::alignment_reserve_in_bytes());
+  st_ptr(top, thread_reg, in_bytes(JavaThread::tlab_end_offset()));
+  verify_tlab(t1, t2);
+  b(retry);
+  delayed()->nop();
+}
+
+void MacroAssembler::incr_allocated_bytes(Register thread,
+                                          Register var_size_in_bytes,
+                                          int con_size_in_bytes,
+                                          Register t1) {
+  if (!thread->is_valid()) {
+#ifndef OPT_THREAD
+    assert(t1->is_valid(), "need temp reg");
+    thread = t1;
+    get_thread(thread);
+#else
+    thread = TREG;
+#endif
+  }
+
+  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
+  if (var_size_in_bytes->is_valid()) {
+    addu(AT, AT, var_size_in_bytes);
+  } else {
+    addiu(AT, AT, con_size_in_bytes);
+  }
+  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
+}
+
+static const double     pi_4 =  0.7853981633974483;
+
+// must get argument(a double) in F12/F13
+//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
+//We need to preseve the register which maybe modified during the Call
+void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
+  // save all modified register here
+  // FIXME, in the disassembly of tirgfunc, only used V0, V1, T9, SP, RA, so we ony save V0, V1, T9
+  pushad();
+  // we should preserve the stack space before we call
+  addiu(SP, SP, -wordSize * 2);
+  switch (trig){
+    case 's' :
+      call( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
+      delayed()->nop();
+      break;
+    case 'c':
+      call( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
+      delayed()->nop();
+      break;
+    case 't':
+      call( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
+      delayed()->nop();
+      break;
+    default:assert (false, "bad intrinsic");
+    break;
+
+  }
+
+  addiu(SP, SP, wordSize * 2);
+  popad();
+}
+
+void MacroAssembler::li(Register rd, long imm) {
+  if (imm <= max_jint && imm >= min_jint) {
+    li32(rd, (int)imm);
+  } else if (julong(imm) <= 0xFFFFFFFF) {
+    assert_not_delayed();
+    // lui sign-extends, so we can't use that.
+    ori(rd, R0, julong(imm) >> 16);
+    dsll(rd, rd, 16);
+    ori(rd, rd, split_low(imm));
+  } else if ((imm > 0) && is_simm16(imm >> 32)) {
+    // A 48-bit address
+    li48(rd, imm);
+  } else {
+    li64(rd, imm);
+  }
+}
+
+void MacroAssembler::li32(Register reg, int imm) {
+  if (is_simm16(imm)) {
+    addiu(reg, R0, imm);
+  } else {
+    lui(reg, split_low(imm >> 16));
+    if (split_low(imm))
+      ori(reg, reg, split_low(imm));
+  }
+}
+
+void MacroAssembler::set64(Register d, jlong value) {
+  assert_not_delayed();
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+
+  if (value == lo) {  // 32-bit integer
+    if (is_simm16(value)) {
+      daddiu(d, R0, value);
+    } else {
+      lui(d, split_low(value >> 16));
+      if (split_low(value)) {
+        ori(d, d, split_low(value));
+      }
+    }
+  } else if (hi == 0) {  // hardware zero-extends to upper 32
+      ori(d, R0, julong(value) >> 16);
+      dsll(d, d, 16);
+      if (split_low(value)) {
+        ori(d, d, split_low(value));
+      }
+  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
+    // 4 insts
+    li48(d, value);
+  } else {  // li64
+    // 6 insts
+    li64(d, value);
+  }
+}
+
+
+int MacroAssembler::insts_for_set64(jlong value) {
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+
+  int count = 0;
+
+  if (value == lo) {  // 32-bit integer
+    if (is_simm16(value)) {
+      //daddiu(d, R0, value);
+      count++;
+    } else {
+      //lui(d, split_low(value >> 16));
+      count++;
+      if (split_low(value)) {
+        //ori(d, d, split_low(value));
+        count++;
+      }
+    }
+  } else if (hi == 0) {  // hardware zero-extends to upper 32
+      //ori(d, R0, julong(value) >> 16);
+      //dsll(d, d, 16);
+      count += 2;
+      if (split_low(value)) {
+        //ori(d, d, split_low(value));
+        count++;
+      }
+  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
+    // 4 insts
+    //li48(d, value);
+    count += 4;
+  } else {  // li64
+    // 6 insts
+    //li64(d, value);
+    count += 6;
+  }
+
+  return count;
+}
+
+void MacroAssembler::patchable_set48(Register d, jlong value) {
+  assert_not_delayed();
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+
+  int count = 0;
+
+  if (value == lo) {  // 32-bit integer
+    if (is_simm16(value)) {
+      daddiu(d, R0, value);
+      count += 1;
+    } else {
+      lui(d, split_low(value >> 16));
+      count += 1;
+      if (split_low(value)) {
+        ori(d, d, split_low(value));
+        count += 1;
+      }
+    }
+  } else if (hi == 0) {  // hardware zero-extends to upper 32
+      ori(d, R0, julong(value) >> 16);
+      dsll(d, d, 16);
+      count += 2;
+      if (split_low(value)) {
+        ori(d, d, split_low(value));
+        count += 1;
+      }
+  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
+    // 4 insts
+    li48(d, value);
+    count += 4;
+  } else {  // li64
+    tty->print_cr("value = 0x%lx", value);
+    guarantee(false, "Not supported yet !");
+  }
+
+  while (count < 4) {
+    nop();
+    count++;
+  }
+}
+
+void MacroAssembler::patchable_set32(Register d, jlong value) {
+  assert_not_delayed();
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+
+  int count = 0;
+
+  if (value == lo) {  // 32-bit integer
+    if (is_simm16(value)) {
+      daddiu(d, R0, value);
+      count += 1;
+    } else {
+      lui(d, split_low(value >> 16));
+      count += 1;
+      if (split_low(value)) {
+        ori(d, d, split_low(value));
+        count += 1;
+      }
+    }
+  } else if (hi == 0) {  // hardware zero-extends to upper 32
+      ori(d, R0, julong(value) >> 16);
+      dsll(d, d, 16);
+      count += 2;
+      if (split_low(value)) {
+        ori(d, d, split_low(value));
+        count += 1;
+      }
+  } else {
+    tty->print_cr("value = 0x%lx", value);
+    guarantee(false, "Not supported yet !");
+  }
+
+  while (count < 3) {
+    nop();
+    count++;
+  }
+}
+
+void MacroAssembler::patchable_call32(Register d, jlong value) {
+  assert_not_delayed();
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+
+  int count = 0;
+
+  if (value == lo) {  // 32-bit integer
+    if (is_simm16(value)) {
+      daddiu(d, R0, value);
+      count += 1;
+    } else {
+      lui(d, split_low(value >> 16));
+      count += 1;
+      if (split_low(value)) {
+        ori(d, d, split_low(value));
+        count += 1;
+      }
+    }
+  } else {
+    tty->print_cr("value = 0x%lx", value);
+    guarantee(false, "Not supported yet !");
+  }
+
+  while (count < 2) {
+    nop();
+    count++;
+  }
+}
+
+void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
+  assert(UseCompressedClassPointers, "should only be used for compressed header");
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+
+  int klass_index = oop_recorder()->find_index(k);
+  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+  long narrowKlass = (long)Klass::encode_klass(k);
+
+  relocate(rspec, Assembler::narrow_oop_operand);
+  patchable_set48(dst, narrowKlass);
+}
+
+
+void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
+  assert(UseCompressedOops, "should only be used for compressed header");
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+
+  int oop_index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+  relocate(rspec, Assembler::narrow_oop_operand);
+  patchable_set48(dst, oop_index);
+}
+
+void MacroAssembler::li64(Register rd, long imm) {
+  assert_not_delayed();
+  lui(rd, split_low(imm >> 48));
+  ori(rd, rd, split_low(imm >> 32));
+  dsll(rd, rd, 16);
+  ori(rd, rd, split_low(imm >> 16));
+  dsll(rd, rd, 16);
+  ori(rd, rd, split_low(imm));
+}
+
+void MacroAssembler::li48(Register rd, long imm) {
+  assert_not_delayed();
+  assert(is_simm16(imm >> 32), "Not a 48-bit address");
+  lui(rd, imm >> 32);
+  ori(rd, rd, split_low(imm >> 16));
+  dsll(rd, rd, 16);
+  ori(rd, rd, split_low(imm));
+}
+
+void MacroAssembler::verify_oop(Register reg, const char* s) {
+  if (!VerifyOops) return;
+  const char * b = NULL;
+  stringStream ss;
+  ss.print("verify_oop: %s: %s", reg->name(), s);
+  b = code_string(ss.as_string());
+  pushad();
+  move(A1, reg);
+  li(A0, (long)b);
+  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
+  ld(T9, AT, 0);
+  jalr(T9);
+  delayed()->nop();
+  popad();
+}
+
+
+void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
+  if (!VerifyOops) {
+    nop();
+    return;
+  }
+  // Pass register number to verify_oop_subroutine
+  const char * b = NULL;
+  stringStream ss;
+  ss.print("verify_oop_addr: %s",  s);
+  b = code_string(ss.as_string());
+
+  addiu(SP, SP, - 7 * wordSize);
+  st_ptr(T0, SP, 6 * wordSize);
+  st_ptr(T1, SP, 5 * wordSize);
+  st_ptr(RA, SP, 4 * wordSize);
+  st_ptr(A0, SP, 3 * wordSize);
+  st_ptr(A1, SP, 2 * wordSize);
+  st_ptr(AT, SP, 1 * wordSize);
+  st_ptr(T9, SP, 0);
+
+  // addr may contain sp so we will have to adjust it based on the
+  // pushes that we just did.
+  if (addr.uses(SP)) {
+    lea(A1, addr);
+    ld_ptr(A1, Address(A1, 7 * wordSize));
+  } else {
+    ld_ptr(A1, addr);
+  }
+  li(A0, (long)b);
+  // call indirectly to solve generation ordering problem
+  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
+  ld_ptr(T9, AT, 0);
+  jalr(T9);
+  delayed()->nop();
+  ld_ptr(T0, SP, 6* wordSize);
+  ld_ptr(T1, SP, 5* wordSize);
+  ld_ptr(RA, SP, 4* wordSize);
+  ld_ptr(A0, SP, 3* wordSize);
+  ld_ptr(A1, SP, 2* wordSize);
+  ld_ptr(AT, SP, 1* wordSize);
+  ld_ptr(T9, SP, 0* wordSize);
+  addiu(SP, SP, 7 * wordSize);
+}
+
+// used registers :  T0, T1
+void MacroAssembler::verify_oop_subroutine() {
+  // RA: ra
+  // A0: char* error message
+  // A1: oop   object to verify
+
+  Label exit, error;
+  // increment counter
+  li(T0, (long)StubRoutines::verify_oop_count_addr());
+  lw(AT, T0, 0);
+  daddiu(AT, AT, 1);
+  sw(AT, T0, 0);
+
+  // make sure object is 'reasonable'
+  beq(A1, R0, exit);         // if obj is NULL it is ok
+  delayed()->nop();
+
+  // Check if the oop is in the right area of memory
+  // const int oop_mask = Universe::verify_oop_mask();
+  // const int oop_bits = Universe::verify_oop_bits();
+  const uintptr_t oop_mask = Universe::verify_oop_mask();
+  const uintptr_t oop_bits = Universe::verify_oop_bits();
+  li(AT, oop_mask);
+  andr(T0, A1, AT);
+  li(AT, oop_bits);
+  bne(T0, AT, error);
+  delayed()->nop();
+
+  // make sure klass is 'reasonable'
+  // add for compressedoops
+  reinit_heapbase();
+  // add for compressedoops
+  load_klass(T0, A1);
+  beq(T0, R0, error);                        // if klass is NULL it is broken
+  delayed()->nop();
+  // return if everything seems ok
+  bind(exit);
+
+  jr(RA);
+  delayed()->nop();
+
+  // handle errors
+  bind(error);
+  pushad();
+  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
+  delayed()->nop();
+  popad();
+  jr(RA);
+  delayed()->nop();
+}
+
+void MacroAssembler::verify_tlab(Register t1, Register t2) {
+#ifdef ASSERT
+  assert_different_registers(t1, t2, AT);
+  if (UseTLAB && VerifyOops) {
+    Label next, ok;
+
+    get_thread(t1);
+
+    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
+    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
+    sltu(AT, t2, AT);
+    beq(AT, R0, next);
+    delayed()->nop();
+
+    stop("assert(top >= start)");
+
+    bind(next);
+    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
+    sltu(AT, AT, t2);
+    beq(AT, R0, ok);
+    delayed()->nop();
+
+    stop("assert(top <= end)");
+
+    bind(ok);
+
+  }
+#endif
+}
+
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+                                                       Register tmp,
+                                                       int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0)
+  return RegisterOrConstant(value + offset);
+  AddressLiteral a(delayed_value_addr);
+  // load indirectly to solve generation ordering problem
+  //movptr(tmp, ExternalAddress((address) delayed_value_addr));
+  //ld(tmp, a);
+  if (offset != 0)
+    daddiu(tmp,tmp, offset);
+
+  return RegisterOrConstant(tmp);
+}
+
+void MacroAssembler::hswap(Register reg) {
+  //short
+  //andi(reg, reg, 0xffff);
+  srl(AT, reg, 8);
+  sll(reg, reg, 24);
+  sra(reg, reg, 16);
+  orr(reg, reg, AT);
+}
+
+void MacroAssembler::huswap(Register reg) {
+  dsrl(AT, reg, 8);
+  dsll(reg, reg, 24);
+  dsrl(reg, reg, 16);
+  orr(reg, reg, AT);
+  andi(reg, reg, 0xffff);
+}
+
+// something funny to do this will only one more register AT
+// 32 bits
+void MacroAssembler::swap(Register reg) {
+  srl(AT, reg, 8);
+  sll(reg, reg, 24);
+  orr(reg, reg, AT);
+  //reg : 4 1 2 3
+  srl(AT, AT, 16);
+  xorr(AT, AT, reg);
+  andi(AT, AT, 0xff);
+  //AT : 0 0 0 1^3);
+  xorr(reg, reg, AT);
+  //reg : 4 1 2 1
+  sll(AT, AT, 16);
+  xorr(reg, reg, AT);
+  //reg : 4 3 2 1
+}
+
+// do 32-bit CAS using MIPS64 lld/scd
+//
+//  cas_int should only compare 32-bits of the memory value.
+//  However, lld/scd will do 64-bit operation, which violates the intention of cas_int.
+//  To simulate a 32-bit atomic operation, the value loaded with LLD should be split into
+//  tow halves, and only the low-32 bits is compared. If equals, the low-32 bits of newval,
+//  plus the high-32 bits or memory value, are stored togethor with SCD.
+//
+//Example:
+//
+//      double d = 3.1415926;
+//      System.err.println("hello" + d);
+//
+//  sun.misc.FloatingDecimal$1.<init>()
+//   |
+//   `- java.util.concurrent.atomic.AtomicInteger::compareAndSet()
+//
+//  38 cas_int [a7a7|J] [a0|I] [a6|I]
+//   a0: 0xffffffffe8ea9f63 pc: 0x55647f3354
+//   a6: 0x4ab325aa
+//
+//again:
+//   0x00000055647f3c5c: lld at, 0x0(a7)                          ; 64-bit load, "0xe8ea9f63"
+//
+//   0x00000055647f3c60: sll t9, at, 0                            ; t9: low-32 bits (sign extended)
+//   0x00000055647f3c64: dsrl32 t8, at, 0                         ; t8: high-32 bits
+//   0x00000055647f3c68: dsll32 t8, t8, 0
+//   0x00000055647f3c6c: bne t9, a0, 0x00000055647f3c9c           ; goto nequal
+//   0x00000055647f3c70: sll zero, zero, 0
+//
+//   0x00000055647f3c74: ori v1, zero, 0xffffffff                 ; v1: low-32 bits of newval (sign unextended)
+//   0x00000055647f3c78: dsll v1, v1, 16                          ; v1 = a6 & 0xFFFFFFFF;
+//   0x00000055647f3c7c: ori v1, v1, 0xffffffff
+//   0x00000055647f3c80: and v1, a6, v1
+//   0x00000055647f3c84: or at, t8, v1
+//   0x00000055647f3c88: scd at, 0x0(a7)
+//   0x00000055647f3c8c: beq at, zero, 0x00000055647f3c5c         ; goto again
+//   0x00000055647f3c90: sll zero, zero, 0
+//   0x00000055647f3c94: beq zero, zero, 0x00000055647f45ac       ; goto done
+//   0x00000055647f3c98: sll zero, zero, 0
+//nequal:
+//   0x00000055647f45a4: daddu a0, t9, zero
+//   0x00000055647f45a8: daddu at, zero, zero
+//done:
+//
+
+void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
+  // MIPS64 can use ll/sc for 32-bit atomic memory access
+  Label done, again, nequal;
+
+  bind(again);
+
+  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
+  ll(AT, dest);
+  bne(AT, c_reg, nequal);
+  delayed()->nop();
+
+  move(AT, x_reg);
+  sc(AT, dest);
+  beq(AT, R0, again);
+  delayed()->nop();
+  b(done);
+  delayed()->nop();
+
+  // not xchged
+  bind(nequal);
+  sync();
+  move(c_reg, AT);
+  move(AT, R0);
+
+  bind(done);
+}
+
+void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {
+  Label done, again, nequal;
+
+  bind(again);
+  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
+  lld(AT, dest);
+  bne(AT, c_reg, nequal);
+  delayed()->nop();
+
+  move(AT, x_reg);
+  scd(AT, dest);
+  beq(AT, R0, again);
+  delayed()->nop();
+  b(done);
+  delayed()->nop();
+
+  // not xchged
+  bind(nequal);
+  sync();
+  move(c_reg, AT);
+  move(AT, R0);
+
+  bind(done);
+}
+
+void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
+  Label done, again, nequal;
+
+  Register x_reg = x_regLo;
+  dsll32(x_regHi, x_regHi, 0);
+  dsll32(x_regLo, x_regLo, 0);
+  dsrl32(x_regLo, x_regLo, 0);
+  orr(x_reg, x_regLo, x_regHi);
+
+  Register c_reg = c_regLo;
+  dsll32(c_regHi, c_regHi, 0);
+  dsll32(c_regLo, c_regLo, 0);
+  dsrl32(c_regLo, c_regLo, 0);
+  orr(c_reg, c_regLo, c_regHi);
+
+  bind(again);
+
+  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
+  lld(AT, dest);
+  bne(AT, c_reg, nequal);
+  delayed()->nop();
+
+  //move(AT, x_reg);
+  daddu(AT, x_reg, R0);
+  scd(AT, dest);
+  beq(AT, R0, again);
+  delayed()->nop();
+  b(done);
+  delayed()->nop();
+
+  // not xchged
+  bind(nequal);
+  sync();
+  //move(c_reg, AT);
+  //move(AT, R0);
+  daddu(c_reg, AT, R0);
+  daddu(AT, R0, R0);
+  bind(done);
+}
+
+// be sure the three register is different
+void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
+  assert_different_registers(tmp, fs, ft);
+  div_s(tmp, fs, ft);
+  trunc_l_s(tmp, tmp);
+  cvt_s_l(tmp, tmp);
+  mul_s(tmp, tmp, ft);
+  sub_s(fd, fs, tmp);
+}
+
+// be sure the three register is different
+void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
+  assert_different_registers(tmp, fs, ft);
+  div_d(tmp, fs, ft);
+  trunc_l_d(tmp, tmp);
+  cvt_d_l(tmp, tmp);
+  mul_d(tmp, tmp, ft);
+  sub_d(fd, fs, tmp);
+}
+
+// Fast_Lock and Fast_Unlock used by C2
+
+// Because the transitions from emitted code to the runtime
+// monitorenter/exit helper stubs are so slow it's critical that
+// we inline both the stack-locking fast-path and the inflated fast path.
+//
+// See also: cmpFastLock and cmpFastUnlock.
+//
+// What follows is a specialized inline transliteration of the code
+// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
+// another option would be to emit TrySlowEnter and TrySlowExit methods
+// at startup-time.  These methods would accept arguments as
+// (Obj, Self, box, Scratch) and return success-failure
+// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
+// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
+// In practice, however, the # of lock sites is bounded and is usually small.
+// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
+// if the processor uses simple bimodal branch predictors keyed by EIP
+// Since the helper routines would be called from multiple synchronization
+// sites.
+//
+// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
+// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
+// to those specialized methods.  That'd give us a mostly platform-independent
+// implementation that the JITs could optimize and inline at their pleasure.
+// Done correctly, the only time we'd need to cross to native could would be
+// to park() or unpark() threads.  We'd also need a few more unsafe operators
+// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
+// (b) explicit barriers or fence operations.
+//
+// TODO:
+//
+// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
+//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
+//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
+//    the lock operators would typically be faster than reifying Self.
+//
+// *  Ideally I'd define the primitives as:
+//       fast_lock   (nax Obj, nax box, tmp, nax scr) where box, tmp and scr are KILLED.
+//       fast_unlock (nax Obj, box, nax tmp) where box and tmp are KILLED
+//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
+//    Instead, we're stuck with a rather awkward and brittle register assignments below.
+//    Furthermore the register assignments are overconstrained, possibly resulting in
+//    sub-optimal code near the synchronization site.
+//
+// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
+//    Alternately, use a better sp-proximity test.
+//
+// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
+//    Either one is sufficient to uniquely identify a thread.
+//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
+//
+// *  Intrinsify notify() and notifyAll() for the common cases where the
+//    object is locked by the calling thread but the waitlist is empty.
+//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
+//
+// *  use jccb and jmpb instead of jcc and jmp to improve code density.
+//    But beware of excessive branch density on AMD Opterons.
+//
+// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
+//    or failure of the fast-path.  If the fast-path fails then we pass
+//    control to the slow-path, typically in C.  In Fast_Lock and
+//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
+//    will emit a conditional branch immediately after the node.
+//    So we have branches to branches and lots of ICC.ZF games.
+//    Instead, it might be better to have C2 pass a "FailureLabel"
+//    into Fast_Lock and Fast_Unlock.  In the case of success, control
+//    will drop through the node.  ICC.ZF is undefined at exit.
+//    In the case of failure, the node will branch directly to the
+//    FailureLabel
+
+
+// obj: object to lock
+// box: on-stack box address (displaced header location) - KILLED
+// tmp: tmp -- KILLED
+// scr: tmp -- KILLED
+void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
+
+  // Ensure the register assignents are disjoint
+  guarantee (objReg != boxReg, "") ;
+  guarantee (objReg != tmpReg, "") ;
+  guarantee (objReg != scrReg, "") ;
+  guarantee (boxReg != tmpReg, "") ;
+  guarantee (boxReg != scrReg, "") ;
+
+
+  block_comment("FastLock");
+  if (PrintBiasedLockingStatistics) {
+    push(tmpReg);
+    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, AT, tmpReg);
+    pop(tmpReg);
+  }
+
+  if (EmitSync & 1) {
+    move(AT, 0x0);
+    return;
+  } else
+    if (EmitSync & 2) {
+      Label DONE_LABEL ;
+      if (UseBiasedLocking) {
+        // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
+        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
+      }
+
+      ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
+      ori(tmpReg, tmpReg, 0x1);
+      sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
+
+      cmpxchg(boxReg, Address(objReg, 0), tmpReg);          // Updates tmpReg
+      bne(AT, R0, DONE_LABEL);
+      delayed()->nop();
+
+      // Recursive locking
+      dsubu(tmpReg, tmpReg, SP);
+      li(AT, (7 - os::vm_page_size() ));
+      andr(tmpReg, tmpReg, AT);
+      sd(tmpReg, Address(boxReg, 0));
+      bind(DONE_LABEL) ;
+    } else {
+      // Possible cases that we'll encounter in fast_lock
+      // ------------------------------------------------
+      // * Inflated
+      //    -- unlocked
+      //    -- Locked
+      //       = by self
+      //       = by other
+      // * biased
+      //    -- by Self
+      //    -- by other
+      // * neutral
+      // * stack-locked
+      //    -- by self
+      //       = sp-proximity test hits
+      //       = sp-proximity test generates false-negative
+      //    -- by other
+      //
+
+      Label IsInflated, DONE_LABEL, PopDone ;
+
+      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
+      // order to reduce the number of conditional branches in the most common cases.
+      // Beware -- there's a subtle invariant that fetch of the markword
+      // at [FETCH], below, will never observe a biased encoding (*101b).
+      // If this invariant is not held we risk exclusion (safety) failure.
+      if (UseBiasedLocking && !UseOptoBiasInlining) {
+        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
+      }
+
+      ld(tmpReg, Address(objReg, 0)) ;         //Fetch the markword of the object.
+      andi(AT, tmpReg, markOopDesc::monitor_value);
+      bne(AT, R0, IsInflated);                      // inflated vs stack-locked|neutral|bias
+      delayed()->nop();
+
+      // Attempt stack-locking ...
+      ori (tmpReg, tmpReg, markOopDesc::unlocked_value);
+      sd(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
+      //if (os::is_MP()) {
+      //  sync();
+      //}
+
+      cmpxchg(boxReg, Address(objReg, 0), tmpReg);           // Updates tmpReg
+      //AT == 1: unlocked
+
+      if (PrintBiasedLockingStatistics) {
+        Label L;
+        beq(AT, R0, L);
+        delayed()->nop();
+        push(T0);
+        push(T1);
+        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
+        pop(T1);
+        pop(T0);
+        bind(L);
+      }
+      bne(AT, R0, DONE_LABEL);
+      delayed()->nop();
+
+      // Recursive locking
+      // The object is stack-locked: markword contains stack pointer to BasicLock.
+      // Locked by current thread if difference with current SP is less than one page.
+      dsubu(tmpReg, tmpReg, SP);
+      li(AT, 7 - os::vm_page_size() );
+      andr(tmpReg, tmpReg, AT);
+      sd(tmpReg, Address(boxReg, 0));
+      if (PrintBiasedLockingStatistics) {
+        Label L;
+        // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
+        bne(tmpReg, R0, L);
+        delayed()->nop();
+        push(T0);
+        push(T1);
+        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, T0, T1);
+        pop(T1);
+        pop(T0);
+        bind(L);
+      }
+      sltiu(AT, tmpReg, 1); // AT = (tmpReg == 0) ? 1 : 0
+
+      b(DONE_LABEL) ;
+      delayed()->nop();
+
+      bind(IsInflated) ;
+      // The object's monitor m is unlocked iff m->owner == NULL,
+      // otherwise m->owner may contain a thread or a stack address.
+
+      // TODO: someday avoid the ST-before-CAS penalty by
+      // relocating (deferring) the following ST.
+      // We should also think about trying a CAS without having
+      // fetched _owner.  If the CAS is successful we may
+      // avoid an RTO->RTS upgrade on the $line.
+      // Without cast to int32_t a movptr will destroy r10 which is typically obj
+      li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
+      sd(AT, Address(boxReg, 0));
+
+      move(boxReg, tmpReg) ;
+      ld(tmpReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+      // if (m->owner != 0) => AT = 0, goto slow path.
+      move(AT, R0);
+      bne(tmpReg, R0, DONE_LABEL);
+      delayed()->nop();
+
+#ifndef OPT_THREAD
+      get_thread (TREG) ;
+#endif
+      // It's inflated and appears unlocked
+      //if (os::is_MP()) {
+      //  sync();
+      //}
+      cmpxchg(TREG, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), tmpReg) ;
+      // Intentional fall-through into DONE_LABEL ...
+
+
+      // DONE_LABEL is a hot target - we'd really like to place it at the
+      // start of cache line by padding with NOPs.
+      // See the AMD and Intel software optimization manuals for the
+      // most efficient "long" NOP encodings.
+      // Unfortunately none of our alignment mechanisms suffice.
+      bind(DONE_LABEL);
+
+      // At DONE_LABEL the AT is set as follows ...
+      // Fast_Unlock uses the same protocol.
+      // AT == 1 -> Success
+      // AT == 0 -> Failure - force control through the slow-path
+
+      // Avoid branch-to-branch on AMD processors
+      // This appears to be superstition.
+      if (EmitSync & 32) nop() ;
+
+    }
+}
+
+// obj: object to unlock
+// box: box address (displaced header location), killed.
+// tmp: killed tmp; cannot be obj nor box.
+//
+// Some commentary on balanced locking:
+//
+// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
+// Methods that don't have provably balanced locking are forced to run in the
+// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
+// The interpreter provides two properties:
+// I1:  At return-time the interpreter automatically and quietly unlocks any
+//      objects acquired the current activation (frame).  Recall that the
+//      interpreter maintains an on-stack list of locks currently held by
+//      a frame.
+// I2:  If a method attempts to unlock an object that is not held by the
+//      the frame the interpreter throws IMSX.
+//
+// Lets say A(), which has provably balanced locking, acquires O and then calls B().
+// B() doesn't have provably balanced locking so it runs in the interpreter.
+// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
+// is still locked by A().
+//
+// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
+// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
+// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
+// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
+
+void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
+
+  guarantee (objReg != boxReg, "") ;
+  guarantee (objReg != tmpReg, "") ;
+  guarantee (boxReg != tmpReg, "") ;
+
+  block_comment("FastUnlock");
+
+
+  if (EmitSync & 4) {
+    // Disable - inhibit all inlining.  Force control through the slow-path
+    move(AT, 0x0);
+    return;
+  } else
+    if (EmitSync & 8) {
+      Label DONE_LABEL ;
+      if (UseBiasedLocking) {
+        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+      }
+      // classic stack-locking code ...
+      ld(tmpReg, Address(boxReg, 0)) ;
+      beq(tmpReg, R0, DONE_LABEL) ;
+      move(AT, 0x1);  // delay slot
+
+      cmpxchg(tmpReg, Address(objReg, 0), boxReg);
+      bind(DONE_LABEL);
+    } else {
+      Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
+
+      // Critically, the biased locking test must have precedence over
+      // and appear before the (box->dhw == 0) recursive stack-lock test.
+      if (UseBiasedLocking && !UseOptoBiasInlining) {
+        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
+      }
+
+      ld(AT, Address(boxReg, 0)) ;            // Examine the displaced header
+      beq(AT, R0, DONE_LABEL) ;      // 0 indicates recursive stack-lock
+      delayed()->daddiu(AT, R0, 0x1);
+
+      ld(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
+      andi(AT, tmpReg, markOopDesc::monitor_value) ;                     // Inflated?
+      beq(AT, R0, Stacked) ;                     // Inflated?
+      delayed()->nop();
+
+      bind(Inflated) ;
+      // It's inflated.
+      // Despite our balanced locking property we still check that m->_owner == Self
+      // as java routines or native JNI code called by this thread might
+      // have released the lock.
+      // Refer to the comments in synchronizer.cpp for how we might encode extra
+      // state in _succ so we can avoid fetching EntryList|cxq.
+      //
+      // I'd like to add more cases in fast_lock() and fast_unlock() --
+      // such as recursive enter and exit -- but we have to be wary of
+      // I$ bloat, T$ effects and BP$ effects.
+      //
+      // If there's no contention try a 1-0 exit.  That is, exit without
+      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
+      // we detect and recover from the race that the 1-0 exit admits.
+      //
+      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
+      // before it STs null into _owner, releasing the lock.  Updates
+      // to data protected by the critical section must be visible before
+      // we drop the lock (and thus before any other thread could acquire
+      // the lock and observe the fields protected by the lock).
+#ifndef OPT_THREAD
+      get_thread (TREG) ;
+#endif
+
+      // It's inflated
+      ld(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+      xorr(boxReg, boxReg, TREG);
+
+      ld(AT, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
+      orr(boxReg, boxReg, AT);
+
+      move(AT, R0);
+      bne(boxReg, R0, DONE_LABEL);
+      delayed()->nop();
+
+      ld(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
+      ld(AT, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
+      orr(boxReg, boxReg, AT);
+
+      move(AT, R0);
+      bne(boxReg, R0, DONE_LABEL);
+      delayed()->nop();
+
+      sync();
+      sd(R0, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
+      move(AT, 0x1);
+      b(DONE_LABEL);
+      delayed()->nop();
+
+      bind  (Stacked);
+      ld(tmpReg, Address(boxReg, 0)) ;
+      //if (os::is_MP()) { sync(); }
+      cmpxchg(tmpReg, Address(objReg, 0), boxReg);
+
+      if (EmitSync & 65536) {
+        bind (CheckSucc);
+      }
+
+      bind(DONE_LABEL);
+
+      // Avoid branch to branch on AMD processors
+      if (EmitSync & 32768) { nop() ; }
+    }
+}
+
+void MacroAssembler::align(int modulus) {
+  while (offset() % modulus != 0) nop();
+}
+
+
+void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
+  //Unimplemented();
+}
+
+Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
+Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
+
+//In MIPS64, F0~23 are all caller-saved registers
+FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
+
+// We preserve all caller-saved register
+void  MacroAssembler::pushad(){
+  int i;
+
+  // Fixed-point registers
+  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
+  daddiu(SP, SP, -1 * len * wordSize);
+  for (i = 0; i < len; i++)
+  {
+    sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
+  }
+
+  // Floating-point registers
+  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
+  daddiu(SP, SP, -1 * len * wordSize);
+  for (i = 0; i < len; i++)
+  {
+    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
+  }
+};
+
+void  MacroAssembler::popad(){
+  int i;
+
+  // Floating-point registers
+  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
+  for (i = 0; i < len; i++)
+  {
+    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
+  }
+  daddiu(SP, SP, len * wordSize);
+
+  // Fixed-point registers
+  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
+  for (i = 0; i < len; i++)
+  {
+    ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
+  }
+  daddiu(SP, SP, len * wordSize);
+};
+
+// We preserve all caller-saved register except V0
+void MacroAssembler::pushad_except_v0() {
+  int i;
+
+  // Fixed-point registers
+  int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
+  daddiu(SP, SP, -1 * len * wordSize);
+  for (i = 0; i < len; i++) {
+    sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
+  }
+
+  // Floating-point registers
+  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
+  daddiu(SP, SP, -1 * len * wordSize);
+  for (i = 0; i < len; i++) {
+    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
+  }
+}
+
+void MacroAssembler::popad_except_v0() {
+  int i;
+
+  // Floating-point registers
+  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
+  for (i = 0; i < len; i++) {
+    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
+  }
+  daddiu(SP, SP, len * wordSize);
+
+  // Fixed-point registers
+  len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
+  for (i = 0; i < len; i++) {
+    ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
+  }
+  daddiu(SP, SP, len * wordSize);
+}
+
+void MacroAssembler::push2(Register reg1, Register reg2) {
+  daddiu(SP, SP, -16);
+  sd(reg1, SP, 8);
+  sd(reg2, SP, 0);
+}
+
+void MacroAssembler::pop2(Register reg1, Register reg2) {
+  ld(reg1, SP, 8);
+  ld(reg2, SP, 0);
+  daddiu(SP, SP, 16);
+}
+
+// for UseCompressedOops Option
+void MacroAssembler::load_klass(Register dst, Register src) {
+  if(UseCompressedClassPointers){
+    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
+    decode_klass_not_null(dst);
+  } else
+  ld(dst, src, oopDesc::klass_offset_in_bytes());
+}
+
+void MacroAssembler::store_klass(Register dst, Register src) {
+  if(UseCompressedClassPointers){
+    encode_klass_not_null(src);
+    sw(src, dst, oopDesc::klass_offset_in_bytes());
+  } else {
+    sd(src, dst, oopDesc::klass_offset_in_bytes());
+  }
+}
+
+void MacroAssembler::load_prototype_header(Register dst, Register src) {
+  load_klass(dst, src);
+  ld(dst, Address(dst, Klass::prototype_header_offset()));
+}
+
+void MacroAssembler::store_klass_gap(Register dst, Register src) {
+  if (UseCompressedClassPointers) {
+    sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
+  }
+}
+
+void MacroAssembler::load_heap_oop(Register dst, Address src) {
+  if(UseCompressedOops){
+    lwu(dst, src);
+    decode_heap_oop(dst);
+  } else {
+    ld(dst, src);
+  }
+}
+
+void MacroAssembler::store_heap_oop(Address dst, Register src){
+  if(UseCompressedOops){
+    assert(!dst.uses(src), "not enough registers");
+    encode_heap_oop(src);
+    sw(src, dst);
+  } else {
+    sd(src, dst);
+  }
+}
+
+void MacroAssembler::store_heap_oop_null(Address dst){
+  if(UseCompressedOops){
+    sw(R0, dst);
+  } else {
+    sd(R0, dst);
+  }
+}
+
+#ifdef ASSERT
+void MacroAssembler::verify_heapbase(const char* msg) {
+  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+}
+#endif
+
+
+// Algorithm must match oop.inline.hpp encode_heap_oop.
+void MacroAssembler::encode_heap_oop(Register r) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
+#endif
+  verify_oop(r, "broken oop in encode_heap_oop");
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shr(r, LogMinObjAlignmentInBytes);
+    }
+    return;
+  }
+
+  movz(r, S5_heapbase, r);
+  dsubu(r, r, S5_heapbase);
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shr(r, LogMinObjAlignmentInBytes);
+  }
+}
+
+void MacroAssembler::encode_heap_oop(Register dst, Register src) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
+#endif
+  verify_oop(src, "broken oop in encode_heap_oop");
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      dsrl(dst, src, LogMinObjAlignmentInBytes);
+    } else {
+      if (dst != src) move(dst, src);
+    }
+  } else {
+    if (dst == src) {
+      movz(dst, S5_heapbase, dst);
+      dsubu(dst, dst, S5_heapbase);
+      if (Universe::narrow_oop_shift() != 0) {
+        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+        shr(dst, LogMinObjAlignmentInBytes);
+      }
+    } else {
+      dsubu(dst, src, S5_heapbase);
+      if (Universe::narrow_oop_shift() != 0) {
+        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+        shr(dst, LogMinObjAlignmentInBytes);
+      }
+      movz(dst, R0, src);
+    }
+  }
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register r) {
+  assert (UseCompressedOops, "should be compressed");
+#ifdef ASSERT
+  if (CheckCompressedOops) {
+    Label ok;
+    bne(r, R0, ok);
+    delayed()->nop();
+    stop("null oop passed to encode_heap_oop_not_null");
+    bind(ok);
+  }
+#endif
+  verify_oop(r, "broken oop in encode_heap_oop_not_null");
+  if (Universe::narrow_oop_base() != NULL) {
+    dsubu(r, r, S5_heapbase);
+  }
+  if (Universe::narrow_oop_shift() != 0) {
+    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shr(r, LogMinObjAlignmentInBytes);
+  }
+
+}
+
+void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
+  assert (UseCompressedOops, "should be compressed");
+#ifdef ASSERT
+  if (CheckCompressedOops) {
+    Label ok;
+    bne(src, R0, ok);
+    delayed()->nop();
+    stop("null oop passed to encode_heap_oop_not_null2");
+    bind(ok);
+  }
+#endif
+  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
+
+  if (Universe::narrow_oop_base() != NULL) {
+    dsubu(dst, src, S5_heapbase);
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shr(dst, LogMinObjAlignmentInBytes);
+    }
+  } else {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      dsrl(dst, src, LogMinObjAlignmentInBytes);
+    } else {
+      if (dst != src) move(dst, src);
+    }
+  }
+}
+
+void  MacroAssembler::decode_heap_oop(Register r) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
+#endif
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shl(r, LogMinObjAlignmentInBytes);
+    }
+  } else {
+    move(AT, r);
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      shl(r, LogMinObjAlignmentInBytes);
+    }
+    daddu(r, r, S5_heapbase);
+    movz(r, R0, AT);
+  }
+  verify_oop(r, "broken oop in decode_heap_oop");
+}
+
+void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
+#ifdef ASSERT
+  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
+#endif
+  if (Universe::narrow_oop_base() == NULL) {
+    if (Universe::narrow_oop_shift() != 0) {
+      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+      if (dst != src) nop(); // DON'T DELETE THIS GUY.
+      dsll(dst, src, LogMinObjAlignmentInBytes);
+    } else {
+      if (dst != src) move(dst, src);
+    }
+  } else {
+    if (dst == src) {
+      move(AT, dst);
+      if (Universe::narrow_oop_shift() != 0) {
+        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+        shl(dst, LogMinObjAlignmentInBytes);
+      }
+      daddu(dst, dst, S5_heapbase);
+      movz(dst, R0, AT);
+    } else {
+      if (Universe::narrow_oop_shift() != 0) {
+        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+        dsll(dst, src, LogMinObjAlignmentInBytes);
+        daddu(dst, dst, S5_heapbase);
+      } else {
+        daddu(dst, src, S5_heapbase);
+      }
+      movz(dst, R0, src);
+    }
+  }
+  verify_oop(dst, "broken oop in decode_heap_oop");
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register r) {
+  // Note: it will change flags
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    shl(r, LogMinObjAlignmentInBytes);
+    if (Universe::narrow_oop_base() != NULL) {
+      daddu(r, r, S5_heapbase);
+    }
+  } else {
+    assert (Universe::narrow_oop_base() == NULL, "sanity");
+  }
+}
+
+void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
+  assert (UseCompressedOops, "should only be used for compressed headers");
+  assert (Universe::heap() != NULL, "java heap should be initialized");
+
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
+  if (Universe::narrow_oop_shift() != 0) {
+    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
+    if (LogMinObjAlignmentInBytes == Address::times_8) {
+      dsll(dst, src, LogMinObjAlignmentInBytes);
+      daddu(dst, dst, S5_heapbase);
+    } else {
+      dsll(dst, src, LogMinObjAlignmentInBytes);
+      if (Universe::narrow_oop_base() != NULL) {
+        daddu(dst, dst, S5_heapbase);
+      }
+    }
+  } else {
+    assert (Universe::narrow_oop_base() == NULL, "sanity");
+    if (dst != src) {
+      move(dst, src);
+    }
+  }
+}
+
+void MacroAssembler::encode_klass_not_null(Register r) {
+  if (Universe::narrow_klass_base() != NULL) {
+    assert(r != AT, "Encoding a klass in AT");
+    set64(AT, (int64_t)Universe::narrow_klass_base());
+    dsubu(r, r, AT);
+  }
+  if (Universe::narrow_klass_shift() != 0) {
+    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    shr(r, LogKlassAlignmentInBytes);
+  }
+}
+
+void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+  if (dst == src) {
+    encode_klass_not_null(src);
+  } else {
+    if (Universe::narrow_klass_base() != NULL) {
+      set64(dst, (int64_t)Universe::narrow_klass_base());
+      dsubu(dst, src, dst);
+      if (Universe::narrow_klass_shift() != 0) {
+        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+        shr(dst, LogKlassAlignmentInBytes);
+      }
+    } else {
+      if (Universe::narrow_klass_shift() != 0) {
+        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+        dsrl(dst, src, LogKlassAlignmentInBytes);
+      } else {
+        move(dst, src);
+      }
+    }
+  }
+}
+
+// Function instr_size_for_decode_klass_not_null() counts the instructions
+// generated by decode_klass_not_null(register r) and reinit_heapbase(),
+// when (Universe::heap() != NULL).  Hence, if the instructions they
+// generate change, then this method needs to be updated.
+int MacroAssembler::instr_size_for_decode_klass_not_null() {
+  assert (UseCompressedClassPointers, "only for compressed klass ptrs");
+  if (Universe::narrow_klass_base() != NULL) {
+    // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
+    return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
+  } else {
+    // longest load decode klass function, mov64, leaq
+    return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
+  }
+}
+
+void  MacroAssembler::decode_klass_not_null(Register r) {
+  assert (UseCompressedClassPointers, "should only be used for compressed headers");
+  assert(r != AT, "Decoding a klass in AT");
+  // Cannot assert, unverified entry point counts instructions (see .ad file)
+  // vtableStubs also counts instructions in pd_code_size_limit.
+  // Also do not verify_oop as this is called by verify_oop.
+  if (Universe::narrow_klass_shift() != 0) {
+    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+    shl(r, LogKlassAlignmentInBytes);
+  }
+  if (Universe::narrow_klass_base() != NULL) {
+    set64(AT, (int64_t)Universe::narrow_klass_base());
+    daddu(r, r, AT);
+    //Not neccessary for MIPS at all.
+    //reinit_heapbase();
+  }
+}
+
+void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
+  assert (UseCompressedClassPointers, "should only be used for compressed headers");
+
+  if (dst == src) {
+    decode_klass_not_null(dst);
+  } else {
+    // Cannot assert, unverified entry point counts instructions (see .ad file)
+    // vtableStubs also counts instructions in pd_code_size_limit.
+    // Also do not verify_oop as this is called by verify_oop.
+    set64(dst, (int64_t)Universe::narrow_klass_base());
+    if (Universe::narrow_klass_shift() != 0) {
+      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
+      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
+      dsll(AT, src, Address::times_8);
+      daddu(dst, dst, AT);
+    } else {
+      daddu(dst, src, dst);
+    }
+  }
+}
+
+void MacroAssembler::incrementl(Register reg, int value) {
+  if (value == min_jint) {
+     move(AT, value);
+     addu32(reg, reg, AT);
+     return;
+  }
+  if (value <  0) { decrementl(reg, -value); return; }
+  if (value == 0) {                        ; return; }
+
+  move(AT, value);
+  addu32(reg, reg, AT);
+}
+
+void MacroAssembler::decrementl(Register reg, int value) {
+  if (value == min_jint) {
+     move(AT, value);
+     subu32(reg, reg, AT);
+     return;
+  }
+  if (value <  0) { incrementl(reg, -value); return; }
+  if (value == 0) {                        ; return; }
+
+  move(AT, value);
+  subu32(reg, reg, AT);
+}
+
+void MacroAssembler::reinit_heapbase() {
+  if (UseCompressedOops || UseCompressedClassPointers) {
+    if (Universe::heap() != NULL) {
+      if (Universe::narrow_oop_base() == NULL) {
+        move(S5_heapbase, R0);
+      } else {
+        set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
+      }
+    } else {
+      set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
+      ld(S5_heapbase, S5_heapbase, 0);
+    }
+  }
+}
+
+void MacroAssembler::check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success) {
+//implement ind   gen_subtype_check
+  Label L_failure;
+  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
+  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
+  bind(L_failure);
+}
+
+SkipIfEqual::SkipIfEqual(
+    MacroAssembler* masm, const bool* flag_addr, bool value) {
+  _masm = masm;
+  _masm->li(AT, (address)flag_addr);
+  _masm->lb(AT, AT, 0);
+  _masm->addiu(AT, AT, -value);
+  _masm->beq(AT, R0, _label);
+  _masm->delayed()->nop();
+}
+void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                        RegisterOrConstant super_check_offset) {
+  assert_different_registers(sub_klass, super_klass, temp_reg);
+  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+  if (super_check_offset.is_register()) {
+    assert_different_registers(sub_klass, super_klass,
+                               super_check_offset.as_register());
+  } else if (must_load_sco) {
+    assert(temp_reg != noreg, "supply either a temp or a register offset");
+  }
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  int sco_offset = in_bytes(Klass::super_check_offset_offset());
+  // If the pointers are equal, we are done (e.g., String[] elements).
+  // This self-check enables sharing of secondary supertype arrays among
+  // non-primary types such as array-of-interface.  Otherwise, each such
+  // type would need its own customized SSA.
+  // We move this check to the front of the fast path because many
+  // type checks are in fact trivially successful in this manner,
+  // so we get a nicely predicted branch right at the start of the check.
+  beq(sub_klass, super_klass, *L_success);
+  delayed()->nop();
+  // Check the supertype display:
+  if (must_load_sco) {
+    lwu(temp_reg, super_klass, sco_offset);
+    super_check_offset = RegisterOrConstant(temp_reg);
+  }
+  daddu(AT, sub_klass, super_check_offset.register_or_noreg());
+  ld(AT, AT, super_check_offset.constant_or_zero());
+
+  // This check has worked decisively for primary supers.
+  // Secondary supers are sought in the super_cache ('super_cache_addr').
+  // (Secondary supers are interfaces and very deeply nested subtypes.)
+  // This works in the same check above because of a tricky aliasing
+  // between the super_cache and the primary super display elements.
+  // (The 'super_check_addr' can address either, as the case requires.)
+  // Note that the cache is updated below if it does not help us find
+  // what we need immediately.
+  // So if it was a primary super, we can just fail immediately.
+  // Otherwise, it's the slow path for us (no success at this point).
+
+  if (super_check_offset.is_register()) {
+    beq(super_klass, AT, *L_success);
+    delayed()->nop();
+    addiu(AT, super_check_offset.as_register(), -sc_offset);
+    if (L_failure == &L_fallthrough) {
+      beq(AT, R0, *L_slow_path);
+      delayed()->nop();
+    } else {
+      bne_far(AT, R0, *L_failure);
+      delayed()->nop();
+      b(*L_slow_path);
+      delayed()->nop();
+    }
+  } else if (super_check_offset.as_constant() == sc_offset) {
+    // Need a slow path; fast failure is impossible.
+    if (L_slow_path == &L_fallthrough) {
+      beq(super_klass, AT, *L_success);
+      delayed()->nop();
+    } else {
+      bne(super_klass, AT, *L_slow_path);
+      delayed()->nop();
+      b(*L_success);
+      delayed()->nop();
+    }
+  } else {
+    // No slow path; it's a fast decision.
+    if (L_failure == &L_fallthrough) {
+      beq(super_klass, AT, *L_success);
+      delayed()->nop();
+    } else {
+      bne_far(super_klass, AT, *L_failure);
+      delayed()->nop();
+      b(*L_success);
+      delayed()->nop();
+    }
+  }
+
+  bind(L_fallthrough);
+
+}
+
+
+void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
+                                                   Register super_klass,
+                                                   Register temp_reg,
+                                                   Register temp2_reg,
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   bool set_cond_codes) {
+  if (temp2_reg == noreg)
+    temp2_reg = TSR;
+  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
+#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1, "at most one NULL in the batch");
+
+  // a couple of useful fields in sub_klass:
+  int ss_offset = in_bytes(Klass::secondary_supers_offset());
+  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
+  Address secondary_supers_addr(sub_klass, ss_offset);
+  Address super_cache_addr(     sub_klass, sc_offset);
+
+  // Do a linear scan of the secondary super-klass chain.
+  // This code is rarely used, so simplicity is a virtue here.
+  // The repne_scan instruction uses fixed registers, which we must spill.
+  // Don't worry too much about pre-existing connections with the input regs.
+
+#ifndef PRODUCT
+  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
+  ExternalAddress pst_counter_addr((address) pst_counter);
+#endif //PRODUCT
+
+  // We will consult the secondary-super array.
+  ld(temp_reg, secondary_supers_addr);
+  // Load the array length.
+  lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
+  // Skip to start of data.
+  daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
+
+  // OpenJDK8 never compresses klass pointers in secondary-super array.
+  Label Loop, subtype;
+  bind(Loop);
+  beq(temp2_reg, R0, *L_failure);
+  delayed()->nop();
+  ld(AT, temp_reg, 0);
+  beq(AT, super_klass, subtype);
+  delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize);
+  b(Loop);
+  delayed()->daddiu(temp2_reg, temp2_reg, -1);
+
+  bind(subtype);
+  sd(super_klass, super_cache_addr);
+  if (L_success != &L_fallthrough) {
+    b(*L_success);
+    delayed()->nop();
+  }
+
+  // Success.  Cache the super we found and proceed in triumph.
+#undef IS_A_TEMP
+
+  bind(L_fallthrough);
+}
+
+void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
+  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
+  sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
+  verify_oop(oop_result, "broken oop in call_VM_base");
+}
+
+void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
+  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
+  sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
+}
+
+Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
+                                         int extra_slot_offset) {
+  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
+  int stackElementSize = Interpreter::stackElementSize;
+  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
+#ifdef ASSERT
+  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
+  assert(offset1 - offset == stackElementSize, "correct arithmetic");
+#endif
+  Register             scale_reg    = NOREG;
+  Address::ScaleFactor scale_factor = Address::no_scale;
+  if (arg_slot.is_constant()) {
+    offset += arg_slot.as_constant() * stackElementSize;
+  } else {
+    scale_reg    = arg_slot.as_register();
+    scale_factor = Address::times_8;
+  }
+  // We don't push RA on stack in prepare_invoke.
+  //  offset += wordSize;           // return PC is on stack
+  if(scale_reg==NOREG) return Address(SP, offset);
+  else {
+  dsll(scale_reg, scale_reg, scale_factor);
+  daddu(scale_reg, SP, scale_reg);
+  return Address(scale_reg, offset);
+  }
+}
+
+SkipIfEqual::~SkipIfEqual() {
+  _masm->bind(_label);
+}
+
+void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
+  switch (size_in_bytes) {
+  case  8:  ld(dst, src); break;
+  case  4:  lw(dst, src); break;
+  case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
+  case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
+  switch (size_in_bytes) {
+  case  8:  sd(src, dst); break;
+  case  4:  sw(src, dst); break;
+  case  2:  sh(src, dst); break;
+  case  1:  sb(src, dst); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+// Look up the method for a megamorphic invokeinterface call.
+// The target method is determined by <intf_klass, itable_index>.
+// The receiver klass is in recv_klass.
+// On success, the result will be in method_result, and execution falls through.
+// On failure, execution transfers to the given label.
+void MacroAssembler::lookup_interface_method(Register recv_klass,
+                                             Register intf_klass,
+                                             RegisterOrConstant itable_index,
+                                             Register method_result,
+                                             Register scan_temp,
+                                             Label& L_no_such_interface,
+                                             bool return_method) {
+  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
+  assert_different_registers(method_result, intf_klass, scan_temp, AT);
+  assert(recv_klass != method_result || !return_method,
+         "recv_klass can be destroyed when method isn't needed");
+
+  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
+         "caller must use same register for non-constant itable index as for method");
+
+  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
+  int vtable_base = InstanceKlass::vtable_start_offset() * wordSize;
+  int itentry_off = itableMethodEntry::method_offset_in_bytes();
+  int scan_step   = itableOffsetEntry::size() * wordSize;
+  int vte_size    = vtableEntry::size() * wordSize;
+  Address::ScaleFactor times_vte_scale = Address::times_ptr;
+  assert(vte_size == wordSize, "else adjust times_vte_scale");
+
+  lw(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize));
+
+  // %%% Could store the aligned, prescaled offset in the klassoop.
+  dsll(scan_temp, scan_temp, times_vte_scale);
+  daddu(scan_temp, recv_klass, scan_temp);
+  daddiu(scan_temp, scan_temp, vtable_base);
+  if (HeapWordsPerLong > 1) {
+    // Round up to align_object_offset boundary
+    // see code for InstanceKlass::start_of_itable!
+    round_to(scan_temp, BytesPerLong);
+  }
+
+  if (return_method) {
+    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
+    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+    if (itable_index.is_constant()) {
+      set64(AT, (int)itable_index.is_constant());
+      dsll(AT, AT, (int)Address::times_ptr);
+    } else {
+      dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
+    }
+    daddu(AT, AT, recv_klass);
+    daddiu(recv_klass, AT, itentry_off);
+  }
+
+  Label search, found_method;
+
+  for (int peel = 1; peel >= 0; peel--) {
+    ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
+
+    if (peel) {
+      beq(intf_klass, method_result, found_method);
+      delayed()->nop();
+    } else {
+      bne(intf_klass, method_result, search);
+      delayed()->nop();
+      // (invert the test to fall through to found_method...)
+    }
+
+    if (!peel)  break;
+
+    bind(search);
+
+    // Check that the previous entry is non-null.  A null entry means that
+    // the receiver class doesn't implement the interface, and wasn't the
+    // same as when the caller was compiled.
+    beq(method_result, R0, L_no_such_interface);
+    delayed()->nop();
+    daddiu(scan_temp, scan_temp, scan_step);
+  }
+
+  bind(found_method);
+
+  if (return_method) {
+    // Got a hit.
+    lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
+    if (UseLEXT1) {
+      gsldx(method_result, recv_klass, scan_temp, 0);
+    } else {
+      daddu(AT, recv_klass, scan_temp);
+      ld(method_result, AT, 0);
+    }
+  }
+}
+
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register method_result) {
+  Register tmp = GP;
+  push(tmp);
+
+  if (vtable_index.is_constant()) {
+    assert_different_registers(recv_klass, method_result, tmp);
+  } else {
+    assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
+  }
+  const int base = InstanceKlass::vtable_start_offset() * wordSize;
+  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
+  if (vtable_index.is_constant()) {
+    set64(AT, vtable_index.as_constant());
+    dsll(AT, AT, (int)Address::times_ptr);
+  } else {
+    dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
+  }
+  set64(tmp, base + vtableEntry::method_offset_in_bytes());
+  daddu(tmp, tmp, AT);
+  daddu(tmp, tmp, recv_klass);
+  ld(method_result, tmp, 0);
+
+  pop(tmp);
+}
+
+void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
+  switch (type) {
+    case T_LONG:
+      st_ptr(src_reg, tmp_reg, disp);
+      break;
+    case T_ARRAY:
+    case T_OBJECT:
+      if (UseCompressedOops && !wide) {
+        sw(src_reg, tmp_reg, disp);
+      } else {
+        st_ptr(src_reg, tmp_reg, disp);
+      }
+      break;
+    case T_ADDRESS:
+      st_ptr(src_reg, tmp_reg, disp);
+      break;
+    case T_INT:
+      sw(src_reg, tmp_reg, disp);
+      break;
+    case T_CHAR:
+    case T_SHORT:
+      sh(src_reg, tmp_reg, disp);
+      break;
+    case T_BYTE:
+    case T_BOOLEAN:
+      sb(src_reg, tmp_reg, disp);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
+  Register tmp_reg = T9;
+  Register index_reg = addr.index();
+  if (index_reg == NOREG) {
+    tmp_reg = NOREG;
+  }
+
+  int scale = addr.scale();
+  if (tmp_reg != NOREG && scale >= 0) {
+    dsll(tmp_reg, index_reg, scale);
+  }
+
+  int disp = addr.disp();
+  bool disp_is_simm16 = true;
+  if (!Assembler::is_simm16(disp)) {
+    disp_is_simm16 = false;
+  }
+
+  Register base_reg = addr.base();
+  if (tmp_reg != NOREG) {
+    assert_different_registers(tmp_reg, base_reg, index_reg);
+  }
+
+  if (tmp_reg != NOREG) {
+    daddu(tmp_reg, base_reg, tmp_reg);
+    if (!disp_is_simm16) {
+      move(tmp_reg, disp);
+      daddu(tmp_reg, base_reg, tmp_reg);
+    }
+    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
+  } else {
+    if (!disp_is_simm16) {
+      tmp_reg = T9;
+      assert_different_registers(tmp_reg, base_reg);
+      move(tmp_reg, disp);
+      daddu(tmp_reg, base_reg, tmp_reg);
+    }
+    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
+  }
+}
+
+void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
+  switch (type) {
+    case T_DOUBLE:
+      sdc1(src_reg, tmp_reg, disp);
+      break;
+    case T_FLOAT:
+      swc1(src_reg, tmp_reg, disp);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
+  Register tmp_reg = T9;
+  Register index_reg = addr.index();
+  if (index_reg == NOREG) {
+    tmp_reg = NOREG;
+  }
+
+  int scale = addr.scale();
+  if (tmp_reg != NOREG && scale >= 0) {
+    dsll(tmp_reg, index_reg, scale);
+  }
+
+  int disp = addr.disp();
+  bool disp_is_simm16 = true;
+  if (!Assembler::is_simm16(disp)) {
+    disp_is_simm16 = false;
+  }
+
+  Register base_reg = addr.base();
+  if (tmp_reg != NOREG) {
+    assert_different_registers(tmp_reg, base_reg, index_reg);
+  }
+
+  if (tmp_reg != NOREG) {
+    daddu(tmp_reg, base_reg, tmp_reg);
+    if (!disp_is_simm16) {
+      move(tmp_reg, disp);
+      daddu(tmp_reg, base_reg, tmp_reg);
+    }
+    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
+  } else {
+    if (!disp_is_simm16) {
+      tmp_reg = T9;
+      assert_different_registers(tmp_reg, base_reg);
+      move(tmp_reg, disp);
+      daddu(tmp_reg, base_reg, tmp_reg);
+    }
+    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
+  }
+}
+
+void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
+  switch (type) {
+    case T_LONG:
+      ld_ptr(dst_reg, tmp_reg, disp);
+      break;
+    case T_ARRAY:
+    case T_OBJECT:
+      if (UseCompressedOops && !wide) {
+        lwu(dst_reg, tmp_reg, disp);
+      } else {
+        ld_ptr(dst_reg, tmp_reg, disp);
+      }
+      break;
+    case T_ADDRESS:
+      if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
+        lwu(dst_reg, tmp_reg, disp);
+      } else {
+        ld_ptr(dst_reg, tmp_reg, disp);
+      }
+      break;
+    case T_INT:
+      lw(dst_reg, tmp_reg, disp);
+      break;
+    case T_CHAR:
+      lhu(dst_reg, tmp_reg, disp);
+      break;
+    case T_SHORT:
+      lh(dst_reg, tmp_reg, disp);
+      break;
+    case T_BYTE:
+    case T_BOOLEAN:
+      lb(dst_reg, tmp_reg, disp);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
+  int code_offset = 0;
+  Register tmp_reg = T9;
+  Register index_reg = addr.index();
+  if (index_reg == NOREG) {
+    tmp_reg = NOREG;
+  }
+
+  int scale = addr.scale();
+  if (tmp_reg != NOREG && scale >= 0) {
+    dsll(tmp_reg, index_reg, scale);
+  }
+
+  int disp = addr.disp();
+  bool disp_is_simm16 = true;
+  if (!Assembler::is_simm16(disp)) {
+    disp_is_simm16 = false;
+  }
+
+  Register base_reg = addr.base();
+  if (tmp_reg != NOREG) {
+    assert_different_registers(tmp_reg, base_reg, index_reg);
+  }
+
+  if (tmp_reg != NOREG) {
+    daddu(tmp_reg, base_reg, tmp_reg);
+    if (!disp_is_simm16) {
+      move(tmp_reg, disp);
+      daddu(tmp_reg, base_reg, tmp_reg);
+    }
+    code_offset = offset();
+    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
+  } else {
+    if (!disp_is_simm16) {
+      tmp_reg = T9;
+      assert_different_registers(tmp_reg, base_reg);
+      move(tmp_reg, disp);
+      daddu(tmp_reg, base_reg, tmp_reg);
+    }
+    code_offset = offset();
+    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
+  }
+
+  return code_offset;
+}
+
+void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
+  switch (type) {
+    case T_DOUBLE:
+      ldc1(dst_reg, tmp_reg, disp);
+      break;
+    case T_FLOAT:
+      lwc1(dst_reg, tmp_reg, disp);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
+  int code_offset = 0;
+  Register tmp_reg = T9;
+  Register index_reg = addr.index();
+  if (index_reg == NOREG) {
+    tmp_reg = NOREG;
+  }
+
+  int scale = addr.scale();
+  if (tmp_reg != NOREG && scale >= 0) {
+    dsll(tmp_reg, index_reg, scale);
+  }
+
+  int disp = addr.disp();
+  bool disp_is_simm16 = true;
+  if (!Assembler::is_simm16(disp)) {
+    disp_is_simm16 = false;
+  }
+
+  Register base_reg = addr.base();
+  if (tmp_reg != NOREG) {
+    assert_different_registers(tmp_reg, base_reg, index_reg);
+  }
+
+  if (tmp_reg != NOREG) {
+    daddu(tmp_reg, base_reg, tmp_reg);
+    if (!disp_is_simm16) {
+      move(tmp_reg, disp);
+      daddu(tmp_reg, base_reg, tmp_reg);
+    }
+    code_offset = offset();
+    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
+  } else {
+    if (!disp_is_simm16) {
+      tmp_reg = T9;
+      assert_different_registers(tmp_reg, base_reg);
+      move(tmp_reg, disp);
+      daddu(tmp_reg, base_reg, tmp_reg);
+    }
+    code_offset = offset();
+    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
+  }
+
+  return code_offset;
+}
+
+void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
+  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
+  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
+  // The inverted mask is sign-extended
+  move(AT, inverted_jweak_mask);
+  andr(possibly_jweak, AT, possibly_jweak);
+}
+
+void MacroAssembler::resolve_jobject(Register value,
+                                     Register thread,
+                                     Register tmp) {
+  assert_different_registers(value, thread, tmp);
+  Label done, not_weak;
+  beq(value, R0, done);                // Use NULL as-is.
+  delayed()->nop();
+  move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
+  andr(AT, value, AT);
+  beq(AT, R0, not_weak);
+  delayed()->nop();
+  // Resolve jweak.
+  ld(value, value, -JNIHandles::weak_tag_value);
+  verify_oop(value);
+  #if INCLUDE_ALL_GCS
+    if (UseG1GC) {
+      g1_write_barrier_pre(noreg /* obj */,
+                           value /* pre_val */,
+                           thread /* thread */,
+                           tmp /* tmp */,
+                           true /* tosca_live */,
+                           true /* expand_call */);
+    }
+  #endif // INCLUDE_ALL_GCS
+  b(done);
+  delayed()->nop();
+  bind(not_weak);
+  // Resolve (untagged) jobject.
+  ld(value, value, 0);
+  verify_oop(value);
+  bind(done);
+}
+
+void MacroAssembler::cmp_cmov(Register  op1,
+                              Register  op2,
+                              Register  dst,
+                              Register  src,
+                              CMCompare cmp,
+                              bool      is_signed) {
+  switch (cmp) {
+    case EQ:
+      subu(AT, op1, op2);
+      movz(dst, src, AT);
+      break;
+
+    case NE:
+      subu(AT, op1, op2);
+      movn(dst, src, AT);
+      break;
+
+    case GT:
+      if (is_signed) {
+        slt(AT, op2, op1);
+      } else {
+        sltu(AT, op2, op1);
+      }
+      movn(dst, src, AT);
+      break;
+
+    case GE:
+      if (is_signed) {
+        slt(AT, op1, op2);
+      } else {
+        sltu(AT, op1, op2);
+      }
+      movz(dst, src, AT);
+      break;
+
+    case LT:
+      if (is_signed) {
+        slt(AT, op1, op2);
+      } else {
+        sltu(AT, op1, op2);
+      }
+      movn(dst, src, AT);
+      break;
+
+    case LE:
+      if (is_signed) {
+        slt(AT, op2, op1);
+      } else {
+        sltu(AT, op2, op1);
+      }
+      movz(dst, src, AT);
+      break;
+
+    default:
+      Unimplemented();
+  }
+}
+
+void MacroAssembler::cmp_cmov(FloatRegister op1,
+                              FloatRegister op2,
+                              Register      dst,
+                              Register      src,
+                              CMCompare     cmp,
+                              bool          is_float) {
+  switch(cmp) {
+    case EQ:
+      if (is_float) {
+        c_eq_s(op1, op2);
+      } else {
+        c_eq_d(op1, op2);
+      }
+      movt(dst, src);
+      break;
+
+    case NE:
+      if (is_float) {
+        c_eq_s(op1, op2);
+      } else {
+        c_eq_d(op1, op2);
+      }
+      movf(dst, src);
+      break;
+
+    case GT:
+      if (is_float) {
+        c_ule_s(op1, op2);
+      } else {
+        c_ule_d(op1, op2);
+      }
+      movf(dst, src);
+      break;
+
+    case GE:
+      if (is_float) {
+        c_ult_s(op1, op2);
+      } else {
+        c_ult_d(op1, op2);
+      }
+      movf(dst, src);
+      break;
+
+    case LT:
+      if (is_float) {
+        c_ult_s(op1, op2);
+      } else {
+        c_ult_d(op1, op2);
+      }
+      movt(dst, src);
+      break;
+
+    case LE:
+      if (is_float) {
+        c_ule_s(op1, op2);
+      } else {
+        c_ule_d(op1, op2);
+      }
+      movt(dst, src);
+      break;
+
+    default:
+      Unimplemented();
+  }
+}
+
+void MacroAssembler::cmp_cmov(FloatRegister op1,
+                              FloatRegister op2,
+                              FloatRegister dst,
+                              FloatRegister src,
+                              CMCompare     cmp,
+                              bool          is_float) {
+  switch(cmp) {
+    case EQ:
+      if (!is_float) {
+        c_eq_d(op1, op2);
+        movt_d(dst, src);
+      } else {
+        c_eq_s(op1, op2);
+        movt_s(dst, src);
+      }
+      break;
+
+    case NE:
+      if (!is_float) {
+        c_eq_d(op1, op2);
+        movf_d(dst, src);
+      } else {
+        c_eq_s(op1, op2);
+        movf_s(dst, src);
+      }
+      break;
+
+    case GT:
+      if (!is_float) {
+        c_ule_d(op1, op2);
+        movf_d(dst, src);
+      } else {
+        c_ule_s(op1, op2);
+        movf_s(dst, src);
+      }
+      break;
+
+    case GE:
+      if (!is_float) {
+        c_ult_d(op1, op2);
+        movf_d(dst, src);
+      } else {
+        c_ult_s(op1, op2);
+        movf_s(dst, src);
+      }
+      break;
+
+    case LT:
+      if (!is_float) {
+        c_ult_d(op1, op2);
+        movt_d(dst, src);
+      } else {
+        c_ult_s(op1, op2);
+        movt_s(dst, src);
+      }
+      break;
+
+    case LE:
+      if (!is_float) {
+        c_ule_d(op1, op2);
+        movt_d(dst, src);
+      } else {
+        c_ule_s(op1, op2);
+        movt_s(dst, src);
+      }
+      break;
+
+    default:
+      Unimplemented();
+  }
+}
+
+void MacroAssembler::cmp_cmov(Register      op1,
+                              Register      op2,
+                              FloatRegister dst,
+                              FloatRegister src,
+                              CMCompare     cmp,
+                              bool          is_float) {
+  Label L;
+
+  switch(cmp) {
+    case EQ:
+      bne(op1, op2, L);
+      delayed()->nop();
+      if (is_float) {
+        mov_s(dst, src);
+      } else {
+        mov_d(dst, src);
+      }
+      bind(L);
+      break;
+
+    case NE:
+      beq(op1, op2, L);
+      delayed()->nop();
+      if (is_float) {
+        mov_s(dst, src);
+      } else {
+        mov_d(dst, src);
+      }
+      bind(L);
+      break;
+
+    case GT:
+      slt(AT, op2, op1);
+      beq(AT, R0, L);
+      delayed()->nop();
+      if (is_float) {
+        mov_s(dst, src);
+      } else {
+        mov_d(dst, src);
+      }
+      bind(L);
+      break;
+
+    case GE:
+      slt(AT, op1, op2);
+      bne(AT, R0, L);
+      delayed()->nop();
+      if (is_float) {
+        mov_s(dst, src);
+      } else {
+        mov_d(dst, src);
+      }
+      bind(L);
+      break;
+
+    case LT:
+      slt(AT, op1, op2);
+      beq(AT, R0, L);
+      delayed()->nop();
+      if (is_float) {
+        mov_s(dst, src);
+      } else {
+        mov_d(dst, src);
+      }
+      bind(L);
+      break;
+
+    case LE:
+      slt(AT, op2, op1);
+      bne(AT, R0, L);
+      delayed()->nop();
+      if (is_float) {
+        mov_s(dst, src);
+      } else {
+        mov_d(dst, src);
+      }
+      bind(L);
+      break;
+
+    default:
+      Unimplemented();
+  }
+}
diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp
new file mode 100644
index 00000000000..ab9727793f4
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.hpp
@@ -0,0 +1,701 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
+#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
+
+#include "asm/assembler.hpp"
+#include "utilities/macros.hpp"
+#include "runtime/rtmLocking.hpp"
+
+// MacroAssembler extends Assembler by frequently used macros.
+//
+// Instructions for which a 'better' code sequence exists depending
+// on arguments should also go in here.
+
+class MacroAssembler: public Assembler {
+  friend class LIR_Assembler;
+  friend class Runtime1;      // as_Address()
+
+ public:
+  // Compare code
+  typedef enum {
+    EQ = 0x01,
+    NE = 0x02,
+    GT = 0x03,
+    GE = 0x04,
+    LT = 0x05,
+    LE = 0x06
+  } CMCompare;
+
+ protected:
+
+  Address as_Address(AddressLiteral adr);
+  Address as_Address(ArrayAddress adr);
+
+  // Support for VM calls
+  //
+  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+#ifdef CC_INTERP
+  // c++ interpreter never wants to use interp_masm version of call_VM
+  #define VIRTUAL
+#else
+  #define VIRTUAL virtual
+#endif
+
+  VIRTUAL void call_VM_leaf_base(
+    address entry_point,               // the entry point
+    int     number_of_arguments        // the number of arguments to pop after the call
+  );
+
+  // This is the base routine called by the different versions of call_VM. The interpreter
+  // may customize this version by overriding it for its purposes (e.g., to save/restore
+  // additional registers when doing a VM call).
+  //
+  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
+  // returns the register which contains the thread upon return. If a thread register has been
+  // specified, the return value will correspond to that register. If no last_java_sp is specified
+  // (noreg) than sp will be used instead.
+  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
+    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
+    Register java_thread,              // the thread if computed before     ; use noreg otherwise
+    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
+    address  entry_point,              // the entry point
+    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
+    bool     check_exceptions          // whether to check for pending exceptions after return
+  );
+
+  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
+  // The implementation is only non-empty for the InterpreterMacroAssembler,
+  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
+  virtual void check_and_handle_popframe(Register java_thread);
+  virtual void check_and_handle_earlyret(Register java_thread);
+
+  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
+
+  // helpers for FPU flag access
+  // tmp is a temporary register, if none is available use noreg
+
+ public:
+  static intptr_t  i[32];
+  static float  f[32];
+  static void print(outputStream *s);
+
+  static int i_offset(unsigned int k);
+  static int f_offset(unsigned int k);
+
+  static void save_registers(MacroAssembler *masm);
+  static void restore_registers(MacroAssembler *masm);
+
+  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
+
+  // Support for NULL-checks
+  //
+  // Generates code that causes a NULL OS exception if the content of reg is NULL.
+  // If the accessed location is M[reg + offset] and the offset is known, provide the
+  // offset. No explicit code generation is needed if the offset is within a certain
+  // range (0 <= offset <= page_size).
+
+  void null_check(Register reg, int offset = -1);
+  static bool needs_explicit_null_check(intptr_t offset);
+
+  // Required platform-specific helpers for Label::patch_instructions.
+  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
+  void pd_patch_instruction(address branch, address target);
+
+  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
+
+  // Support for inc/dec with optimal instruction selection depending on value
+  void incrementl(Register reg, int value = 1);
+  void decrementl(Register reg, int value = 1);
+
+
+  // Alignment
+  void align(int modulus);
+
+
+  // Stack frame creation/removal
+  void enter();
+  void leave();
+
+  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
+  // The pointer will be loaded into the thread register.
+  void get_thread(Register thread);
+
+
+  // Support for VM calls
+  //
+  // It is imperative that all calls into the VM are handled via the call_VM macros.
+  // They make sure that the stack linkage is setup correctly. call_VM's correspond
+  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
+
+
+  void call_VM(Register oop_result,
+               address entry_point,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  // Overloadings with last_Java_sp
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               int number_of_arguments = 0,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, bool
+               check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2,
+               bool check_exceptions = true);
+  void call_VM(Register oop_result,
+               Register last_java_sp,
+               address entry_point,
+               Register arg_1, Register arg_2, Register arg_3,
+               bool check_exceptions = true);
+
+  void get_vm_result  (Register oop_result, Register thread);
+  void get_vm_result_2(Register metadata_result, Register thread);
+  void call_VM_leaf(address entry_point,
+                    int number_of_arguments = 0);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1, Register arg_2);
+  void call_VM_leaf(address entry_point,
+                    Register arg_1, Register arg_2, Register arg_3);
+
+  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
+  void super_call_VM_leaf(address entry_point);
+  void super_call_VM_leaf(address entry_point, Register arg_1);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
+  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
+
+  // last Java Frame (fills frame anchor)
+  void set_last_Java_frame(Register thread,
+                           Register last_java_sp,
+                           Register last_java_fp,
+                           address last_java_pc);
+
+  // thread in the default location (S6)
+  void set_last_Java_frame(Register last_java_sp,
+                           Register last_java_fp,
+                           address last_java_pc);
+
+  void reset_last_Java_frame(Register thread, bool clear_fp);
+
+  // thread in the default location (S6)
+  void reset_last_Java_frame(bool clear_fp);
+
+  // Stores
+  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
+  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
+
+ void resolve_jobject(Register value, Register thread, Register tmp);
+ void clear_jweak_tag(Register possibly_jweak);
+
+#if INCLUDE_ALL_GCS
+
+  void g1_write_barrier_pre(Register obj,
+                            Register pre_val,
+                            Register thread,
+                            Register tmp,
+                            bool tosca_live,
+                            bool expand_call);
+
+  void g1_write_barrier_post(Register store_addr,
+                             Register new_val,
+                             Register thread,
+                             Register tmp,
+                             Register tmp2);
+
+#endif // INCLUDE_ALL_GCS
+
+  // split store_check(Register obj) to enhance instruction interleaving
+  void store_check_part_1(Register obj);
+  void store_check_part_2(Register obj);
+
+  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
+  void c2bool(Register x);
+  //add for compressedoops
+  void load_klass(Register dst, Register src);
+  void store_klass(Register dst, Register src);
+  void load_prototype_header(Register dst, Register src);
+
+  void store_klass_gap(Register dst, Register src);
+
+  void load_heap_oop(Register dst, Address src);
+  void store_heap_oop(Address dst, Register src);
+  void store_heap_oop_null(Address dst);
+  void encode_heap_oop(Register r);
+  void encode_heap_oop(Register dst, Register src);
+  void decode_heap_oop(Register r);
+  void decode_heap_oop(Register dst, Register src);
+  void encode_heap_oop_not_null(Register r);
+  void decode_heap_oop_not_null(Register r);
+  void encode_heap_oop_not_null(Register dst, Register src);
+  void decode_heap_oop_not_null(Register dst, Register src);
+
+  void encode_klass_not_null(Register r);
+  void decode_klass_not_null(Register r);
+  void encode_klass_not_null(Register dst, Register src);
+  void decode_klass_not_null(Register dst, Register src);
+
+  // Returns the byte size of the instructions generated by decode_klass_not_null()
+  // when compressed klass pointers are being used.
+  static int instr_size_for_decode_klass_not_null();
+
+  // if heap base register is used - reinit it with the correct value
+  void reinit_heapbase();
+
+  DEBUG_ONLY(void verify_heapbase(const char* msg);)
+
+  void set_narrow_klass(Register dst, Klass* k);
+  void set_narrow_oop(Register dst, jobject obj);
+
+
+
+
+  // Sign extension
+  void sign_extend_short(Register reg)   { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); }
+  void sign_extend_byte(Register reg)  { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); }
+  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
+  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
+
+  void trigfunc(char trig, int num_fpu_regs_in_use = 1);
+  // allocation
+  void eden_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
+  void incr_allocated_bytes(Register thread,
+                            Register var_size_in_bytes, int con_size_in_bytes,
+                            Register t1 = noreg);
+  // interface method calling
+  void lookup_interface_method(Register recv_klass,
+                               Register intf_klass,
+                               RegisterOrConstant itable_index,
+                               Register method_result,
+                               Register scan_temp,
+                               Label& no_such_interface,
+                               bool return_method = true);
+
+  // virtual method calling
+  void lookup_virtual_method(Register recv_klass,
+                             RegisterOrConstant vtable_index,
+                             Register method_result);
+
+  // Test sub_klass against super_klass, with fast and slow paths.
+
+  // The fast path produces a tri-state answer: yes / no / maybe-slow.
+  // One of the three labels can be NULL, meaning take the fall-through.
+  // If super_check_offset is -1, the value is loaded up from super_klass.
+  // No registers are killed, except temp_reg.
+  void check_klass_subtype_fast_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path,
+                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
+
+  // The rest of the type check; must be wired to a corresponding fast path.
+  // It does not repeat the fast path logic, so don't use it standalone.
+  // The temp_reg and temp2_reg can be noreg, if no temps are available.
+  // Updates the sub's secondary super cache as necessary.
+  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
+  void check_klass_subtype_slow_path(Register sub_klass,
+                                     Register super_klass,
+                                     Register temp_reg,
+                                     Register temp2_reg,
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     bool set_cond_codes = false);
+
+  // Simplified, combined version, good for typical uses.
+  // Falls through on failure.
+  void check_klass_subtype(Register sub_klass,
+                           Register super_klass,
+                           Register temp_reg,
+                           Label& L_success);
+
+
+  // Debugging
+
+  // only if +VerifyOops
+  void verify_oop(Register reg, const char* s = "broken oop");
+  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
+  void verify_oop_subroutine();
+  // TODO: verify method and klass metadata (compare against vptr?)
+  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
+  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
+
+  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
+  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
+
+  // only if +VerifyFPU
+  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
+
+  // prints msg, dumps registers and stops execution
+  void stop(const char* msg);
+
+  // prints msg and continues
+  void warn(const char* msg);
+
+  static void debug(char* msg/*, RegistersForDebugging* regs*/);
+  static void debug64(char* msg, int64_t pc, int64_t regs[]);
+
+  void print_reg(Register reg);
+  void print_reg(FloatRegister reg);
+
+  void untested()                                { stop("untested"); }
+
+  void unimplemented(const char* what = "")      { char* b = new char[1024];  jio_snprintf(b, sizeof(b), "unimplemented: %s", what);  stop(b); }
+
+  void should_not_reach_here()                   { stop("should not reach here"); }
+
+  void print_CPU_state();
+
+  // Stack overflow checking
+  void bang_stack_with_offset(int offset) {
+    // stack grows down, caller passes positive offset
+    assert(offset > 0, "must bang with negative offset");
+    if (offset <= 32768) {
+      sw(RA0, SP, -offset);
+    } else {
+      li(AT, offset);
+      dsubu(AT, SP, AT);
+      sw(RA0, AT, 0);
+    }
+  }
+
+  // Writes to stack successive pages until offset reached to check for
+  // stack overflow + shadow pages.  Also, clobbers tmp
+  void bang_stack_size(Register size, Register tmp);
+
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+                                                Register tmp,
+                                                int offset);
+
+  // Support for serializing memory accesses between threads
+  void serialize_memory(Register thread, Register tmp);
+
+  //void verify_tlab();
+  void verify_tlab(Register t1, Register t2);
+
+  // Biased locking support
+  // lock_reg and obj_reg must be loaded up with the appropriate values.
+  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
+  // be killed; if not supplied, push/pop will be used internally to
+  // allocate a temporary (inefficient, avoid if possible).
+  // Optional slow case is for implementations (interpreter and C1) which branch to
+  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
+  // Returns offset of first potentially-faulting instruction for null
+  // check info (currently consumed only by C1). If
+  // swap_reg_contains_mark is true then returns -1 as it is assumed
+  // the calling code has already passed any potential faults.
+  int biased_locking_enter(Register lock_reg, Register obj_reg,
+                           Register swap_reg, Register tmp_reg,
+                           bool swap_reg_contains_mark,
+                           Label& done, Label* slow_case = NULL,
+                           BiasedLockingCounters* counters = NULL);
+  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
+#ifdef COMPILER2
+  void fast_lock(Register obj, Register box, Register tmp, Register scr);
+  void fast_unlock(Register obj, Register box, Register tmp);
+#endif
+
+
+  // Arithmetics
+  // Regular vs. d* versions
+  inline void addu_long(Register rd, Register rs, Register rt) {
+    daddu(rd, rs, rt);
+  }
+  inline void addu_long(Register rd, Register rs, long imm32_64) {
+    daddiu(rd, rs, imm32_64);
+  }
+
+  void round_to(Register reg, int modulus) {
+    assert_different_registers(reg, AT);
+    increment(reg, modulus - 1);
+    move(AT, - modulus);
+    andr(reg, reg, AT);
+  }
+
+  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
+  void increment(Register reg, int imm);
+  void decrement(Register reg, int imm);
+
+  void shl(Register reg, int sa)        { dsll(reg, reg, sa); }
+  void shr(Register reg, int sa)        { dsrl(reg, reg, sa); }
+  void sar(Register reg, int sa)        { dsra(reg, reg, sa); }
+
+  // Helper functions for statistics gathering.
+  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
+
+  // Calls
+  void call(address entry);
+  void call(address entry, relocInfo::relocType rtype);
+  void call(address entry, RelocationHolder& rh);
+
+  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
+
+  // Emit the CompiledIC call idiom
+  void ic_call(address entry);
+
+  // Jumps
+  void jmp(address entry);
+  void jmp(address entry, relocInfo::relocType rtype);
+  void jmp_far(Label& L); // always long jumps
+
+  /* branches may exceed 16-bit offset */
+  void b_far(address entry);
+  void b_far(Label& L);
+
+  void bne_far    (Register rs, Register rt, address entry);
+  void bne_far    (Register rs, Register rt, Label& L);
+
+  void beq_far    (Register rs, Register rt, address entry);
+  void beq_far    (Register rs, Register rt, Label& L);
+
+  // For C2 to support long branches
+  void beq_long   (Register rs, Register rt, Label& L);
+  void bne_long   (Register rs, Register rt, Label& L);
+  void bc1t_long  (Label& L);
+  void bc1f_long  (Label& L);
+
+  void patchable_call(address target);
+  void general_call(address target);
+
+  void patchable_jump(address target);
+  void general_jump(address target);
+
+  static int insts_for_patchable_call(address target);
+  static int insts_for_general_call(address target);
+
+  static int insts_for_patchable_jump(address target);
+  static int insts_for_general_jump(address target);
+
+  // Floating
+  // Data
+
+  // Load and store values by size and signed-ness
+  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
+  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
+
+  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
+  inline void ld_ptr(Register rt, Address a) {
+    ld(rt, a);
+  }
+
+  inline void ld_ptr(Register rt, Register base, int offset16) {
+    ld(rt, base, offset16);
+  }
+
+  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
+  inline void st_ptr(Register rt, Address a) {
+    sd(rt, a);
+  }
+
+  inline void st_ptr(Register rt, Register base, int offset16) {
+    sd(rt, base, offset16);
+  }
+
+  void ld_ptr(Register rt, Register base, Register offset);
+  void st_ptr(Register rt, Register base, Register offset);
+
+  // swap the two byte of the low 16-bit halfword
+  // this directive will use AT, be sure the high 16-bit of reg is zero
+  void hswap(Register reg);
+  void huswap(Register reg);
+
+  // convert big endian integer to little endian integer
+  void swap(Register reg);
+
+  // implement the x86 instruction semantic
+  // if c_reg == *dest then *dest <= x_reg
+  // else c_reg <= *dest
+  // the AT indicate if xchg occurred, 1 for xchged, else  0
+  void cmpxchg(Register x_reg, Address dest, Register c_reg);
+  void cmpxchg32(Register x_reg, Address dest, Register c_reg);
+  void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi);
+
+  //pop & push
+  void extend_sign(Register rh, Register rl) { stop("extend_sign"); }
+  void neg(Register reg) { dsubu(reg, R0, reg); }
+  void push (Register reg)      { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
+  void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); }
+  void pop  (Register reg)      { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
+  void pop  (FloatRegister reg) { ldc1(reg, SP, 0);  daddiu(SP, SP, 8); }
+  void pop  ()                  { daddiu(SP, SP, 8); }
+  void pop2 ()                  { daddiu(SP, SP, 16); }
+  void push2(Register reg1, Register reg2);
+  void pop2 (Register reg1, Register reg2);
+  void dpush (Register reg)     { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
+  void dpop  (Register reg)     { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
+  //we need 2 fun to save and resotre general register
+  void pushad();
+  void popad();
+  void pushad_except_v0();
+  void popad_except_v0();
+
+  //move an 32-bit immediate to Register
+  void move(Register reg, int imm32)  { li32(reg, imm32); }
+  void li  (Register rd, long imm);
+  void li  (Register rd, address addr) { li(rd, (long)addr); }
+  //replace move(Register reg, int imm)
+  void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64
+  void set64(Register d, jlong value);
+  static int  insts_for_set64(jlong value);
+
+  void patchable_set48(Register d, jlong value);
+  void patchable_set32(Register d, jlong value);
+
+  void patchable_call32(Register d, jlong value);
+
+  static int call_size(address target, bool far, bool patchable);
+
+  static bool reachable_from_cache(address target);
+  static bool reachable_from_cache();
+
+
+  void dli(Register rd, long imm) { li(rd, imm); }
+  void li64(Register rd, long imm);
+  void li48(Register rd, long imm);
+
+  void move(Register rd, Register rs)   { daddu(rd, rs, R0); }
+  void move_u32(Register rd, Register rs)   { addu32(rd, rs, R0); }
+  void dmove(Register rd, Register rs)  { daddu(rd, rs, R0); }
+  void mov_metadata(Register dst, Metadata* obj);
+  void mov_metadata(Address dst, Metadata* obj);
+
+  void store_for_type_by_register(Register src_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
+  void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type);
+  void store_for_type(Register src_reg,      Address addr, BasicType type = T_INT, bool wide = false);
+  void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT);
+  void load_for_type_by_register(Register dst_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
+  void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type);
+  int load_for_type(Register dst_reg,      Address addr, BasicType type = T_INT, bool wide = false);
+  int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT);
+
+#ifndef PRODUCT
+  static void pd_print_patched_instruction(address branch) {
+    jint stub_inst = *(jint*) branch;
+    print_instruction(stub_inst);
+    ::tty->print("%s", " (unresolved)");
+
+  }
+#endif
+
+  //FIXME
+  void empty_FPU_stack(){/*need implemented*/};
+
+
+  // method handles (JSR 292)
+  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
+
+  // Conditional move
+  void cmp_cmov(Register        op1,
+                Register        op2,
+                Register        dst,
+                Register        src,
+                CMCompare       cmp = EQ,
+                bool      is_signed = true);
+  void cmp_cmov(FloatRegister   op1,
+                FloatRegister   op2,
+                Register        dst,
+                Register        src,
+                CMCompare       cmp = EQ,
+                bool       is_float = true);
+  void cmp_cmov(FloatRegister   op1,
+                FloatRegister   op2,
+                FloatRegister   dst,
+                FloatRegister   src,
+                CMCompare       cmp = EQ,
+                bool       is_float = true);
+  void cmp_cmov(Register        op1,
+                Register        op2,
+                FloatRegister   dst,
+                FloatRegister   src,
+                CMCompare       cmp = EQ,
+                bool       is_float = true);
+
+#undef VIRTUAL
+
+};
+
+/**
+ * class SkipIfEqual:
+ *
+ * Instantiating this class will result in assembly code being output that will
+ * jump around any code emitted between the creation of the instance and it's
+ * automatic destruction at the end of a scope block, depending on the value of
+ * the flag passed to the constructor, which will be checked at run-time.
+ */
+class SkipIfEqual {
+ private:
+  MacroAssembler* _masm;
+  Label _label;
+
+ public:
+   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
+   ~SkipIfEqual();
+};
+
+#ifdef ASSERT
+inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
+#endif
+
+
+#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp b/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp
new file mode 100644
index 00000000000..92c05fb726a
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/macroAssembler_mips.inline.hpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
+#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
diff --git a/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp b/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp
new file mode 100644
index 00000000000..0c467df2f38
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/metaspaceShared_mips_64.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/codeBuffer.hpp"
+#include "memory/metaspaceShared.hpp"
+
+// Generate the self-patching vtable method:
+//
+// This method will be called (as any other Klass virtual method) with
+// the Klass itself as the first argument.  Example:
+//
+//      oop obj;
+//      int size = obj->klass()->klass_part()->oop_size(this);
+//
+// for which the virtual method call is Klass::oop_size();
+//
+// The dummy method is called with the Klass object as the first
+// operand, and an object as the second argument.
+//
+
+//=====================================================================
+
+// All of the dummy methods in the vtable are essentially identical,
+// differing only by an ordinal constant, and they bear no releationship
+// to the original method which the caller intended. Also, there needs
+// to be 'vtbl_list_size' instances of the vtable in order to
+// differentiate between the 'vtable_list_size' original Klass objects.
+
+#define __ masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+void MetaspaceShared::generate_vtable_methods(void** vtbl_list,
+                                                   void** vtable,
+                                                   char** md_top,
+                                                   char* md_end,
+                                                   char** mc_top,
+                                                   char* mc_end) {
+
+  intptr_t vtable_bytes = (num_virtuals * vtbl_list_size) * sizeof(void*);
+  *(intptr_t *)(*md_top) = vtable_bytes;
+  *md_top += sizeof(intptr_t);
+  void** dummy_vtable = (void**)*md_top;
+  *vtable = dummy_vtable;
+  *md_top += vtable_bytes;
+
+  // Get ready to generate dummy methods.
+
+  CodeBuffer cb((unsigned char*)*mc_top, mc_end - *mc_top);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+
+  Label common_code;
+  for (int i = 0; i < vtbl_list_size; ++i) {
+    for (int j = 0; j < num_virtuals; ++j) {
+      dummy_vtable[num_virtuals * i + j] = (void*)masm->pc();
+
+      // Load V0 with a value indicating vtable/offset pair.
+      // -- bits[ 7..0]  (8 bits) which virtual method in table?
+      // -- bits[12..8]  (5 bits) which virtual method table?
+      // -- must fit in 13-bit instruction immediate field.
+      __ move(V0, (i << 8) + j);
+      __ b(common_code);
+      __ delayed()->nop();
+    }
+  }
+
+  __ bind(common_code);
+
+  __ srl(T9, V0, 8);    // isolate vtable identifier.
+  __ shl(T9, LogBytesPerWord);
+  __ li(AT, (long)vtbl_list);
+  __ addu(T9, AT, T9);
+  __ ld(T9, T9, 0);     // get correct vtable address.
+  __ sd(T9, A0, 0);    // update vtable pointer.
+
+  __ andi(V0, V0, 0x00ff);  // isolate vtable method index
+  __ shl(V0, LogBytesPerWord);
+  __ addu(T9, T9, V0);
+  __ ld(T9, T9, 0);   // address of real method pointer.
+  __ jr(T9);      // get real method pointer.
+  __ delayed()->nop();
+
+  __ flush();
+
+  *mc_top = (char*)__ pc();
+}
diff --git a/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp b/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp
new file mode 100644
index 00000000000..428c2713621
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/methodHandles_mips.cpp
@@ -0,0 +1,576 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "prims/methodHandles.hpp"
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#define STOP(error) block_comment(error); __ stop(error)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
+  if (VerifyMethodHandles)
+    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
+                 "MH argument is a Class");
+  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
+}
+
+#ifdef ASSERT
+static int check_nonzero(const char* xname, int x) {
+  assert(x != 0, err_msg("%s should be nonzero", xname));
+  return x;
+}
+#define NONZERO(x) check_nonzero(#x, x)
+#else //ASSERT
+#define NONZERO(x) (x)
+#endif //ASSERT
+
+#ifdef ASSERT
+void MethodHandles::verify_klass(MacroAssembler* _masm,
+                                 Register obj, SystemDictionary::WKID klass_id,
+                                 const char* error_message) {
+}
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
+  Label L;
+  BLOCK_COMMENT("verify_ref_kind {");
+  __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
+  __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
+  __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
+  __ andr(temp, temp, AT);
+  __ move(AT, ref_kind);
+  __ beq(temp, AT, L);
+  __ delayed()->nop();
+  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
+    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
+    if (ref_kind == JVM_REF_invokeVirtual ||
+        ref_kind == JVM_REF_invokeSpecial)
+      // could do this for all ref_kinds, but would explode assembly code size
+      trace_method_handle(_masm, buf);
+    __ STOP(buf);
+  }
+  BLOCK_COMMENT("} verify_ref_kind");
+  __ bind(L);
+}
+
+#endif //ASSERT
+
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+                                            bool for_compiler_entry) {
+  assert(method == Rmethod, "interpreter calling convention");
+
+  Label L_no_such_method;
+  __ beq(method, R0, L_no_such_method);
+  __ delayed()->nop();
+
+  __ verify_method_ptr(method);
+
+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled.  Check here for
+    // interp_only_mode if these events CAN be enabled.
+    Register rthread = TREG;
+    // interp_only is an int, on little endian it is sufficient to test the byte only
+    // Is a cmpl faster?
+    __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
+    __ beq(AT, R0, run_compiled_code);
+    __ delayed()->nop();
+    __ ld(T9, method, in_bytes(Method::interpreter_entry_offset()));
+    __ jr(T9);
+    __ delayed()->nop();
+    __ BIND(run_compiled_code);
+  }
+
+  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
+                                                     Method::from_interpreted_offset();
+  __ ld(T9, method, in_bytes(entry_offset));
+  __ jr(T9);
+  __ delayed()->nop();
+
+  __ bind(L_no_such_method);
+  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
+  __ jmp(wrong_method, relocInfo::runtime_call_type);
+  __ delayed()->nop();
+}
+
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+                                        Register recv, Register method_temp,
+                                        Register temp2,
+                                        bool for_compiler_entry) {
+  BLOCK_COMMENT("jump_to_lambda_form {");
+  // This is the initial entry point of a lazy method handle.
+  // After type checking, it picks up the invoker from the LambdaForm.
+  assert_different_registers(recv, method_temp, temp2);
+  assert(recv != noreg, "required register");
+  assert(method_temp == Rmethod, "required register for loading method");
+
+  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
+
+  // Load the invoker, as MH -> MH.form -> LF.vmentry
+  __ verify_oop(recv);
+  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())));
+  __ verify_oop(method_temp);
+  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())));
+  __ verify_oop(method_temp);
+  // the following assumes that a Method* is normally compressed in the vmtarget field:
+  __ ld(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())));
+
+  if (VerifyMethodHandles && !for_compiler_entry) {
+    // make sure recv is already on stack
+    __ ld(temp2, Address(method_temp, Method::const_offset()));
+    __ load_sized_value(temp2,
+                        Address(temp2, ConstMethod::size_of_parameters_offset()),
+                        sizeof(u2), false);
+    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
+    Label L;
+    Address recv_addr = __ argument_address(temp2, -1);
+    __ ld(AT, recv_addr);
+    __ beq(recv, AT, L);
+    __ delayed()->nop();
+
+    recv_addr = __ argument_address(temp2, -1);
+    __ ld(V0, recv_addr);
+    __ STOP("receiver not on stack");
+    __ BIND(L);
+  }
+
+  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
+  BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
+
+// Code generation
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+                                                                vmIntrinsics::ID iid) {
+  const bool not_for_compiler_entry = false;  // this is the interpreter entry
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  if (iid == vmIntrinsics::_invokeGeneric ||
+      iid == vmIntrinsics::_compiledLambdaForm) {
+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+    // They all allow an appendix argument.
+    __ stop("empty stubs make SG sick");
+    return NULL;
+  }
+
+  // Rmethod: Method*
+  // T9: argument locator (parameter slot count, added to sp)
+  // S7: used as temp to hold mh or receiver
+  Register t9_argp   = T9;   // argument list ptr, live on error paths
+  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
+  Register rm_method = Rmethod;   // eventual target of this invocation
+
+  // here's where control starts out:
+  __ align(CodeEntryAlignment);
+  address entry_point = __ pc();
+
+  if (VerifyMethodHandles) {
+    Label L;
+    BLOCK_COMMENT("verify_intrinsic_id {");
+    __ lbu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
+    guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions.");
+    __ addiu(AT, AT, -1 * (int) iid);
+    __ beq(AT, R0, L);
+    __ delayed()->nop();
+    if (iid == vmIntrinsics::_linkToVirtual ||
+        iid == vmIntrinsics::_linkToSpecial) {
+      // could do this for all kinds, but would explode assembly code size
+      trace_method_handle(_masm, "bad Method*::intrinsic_id");
+    }
+    __ STOP("bad Method*::intrinsic_id");
+    __ bind(L);
+    BLOCK_COMMENT("} verify_intrinsic_id");
+  }
+
+  // First task:  Find out how big the argument list is.
+  Address t9_first_arg_addr;
+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+    __ ld(t9_argp, Address(rm_method, Method::const_offset()));
+    __ load_sized_value(t9_argp,
+                        Address(t9_argp, ConstMethod::size_of_parameters_offset()),
+                        sizeof(u2), false);
+    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
+    t9_first_arg_addr = __ argument_address(t9_argp, -1);
+  } else {
+    DEBUG_ONLY(t9_argp = noreg);
+  }
+
+  if (!is_signature_polymorphic_static(iid)) {
+    __ ld(s7_mh, t9_first_arg_addr);
+    DEBUG_ONLY(t9_argp = noreg);
+  }
+
+  // t9_first_arg_addr is live!
+
+  trace_method_handle_interpreter_entry(_masm, iid);
+
+  if (iid == vmIntrinsics::_invokeBasic) {
+    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
+
+  } else {
+    // Adjust argument list by popping the trailing MemberName argument.
+    Register r_recv = noreg;
+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
+      __ ld(r_recv = T2, t9_first_arg_addr);
+    }
+    DEBUG_ONLY(t9_argp = noreg);
+    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
+    __ pop(rm_member);         // extract last argument
+    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
+  }
+
+  return entry_point;
+}
+
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+                                                    vmIntrinsics::ID iid,
+                                                    Register receiver_reg,
+                                                    Register member_reg,
+                                                    bool for_compiler_entry) {
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  Register rm_method = Rmethod;   // eventual target of this invocation
+  // temps used in this code are not used in *either* compiled or interpreted calling sequences
+  Register j_rarg0 = T0;
+  Register j_rarg1 = A0;
+  Register j_rarg2 = A1;
+  Register j_rarg3 = A2;
+  Register j_rarg4 = A3;
+  Register j_rarg5 = A4;
+
+  Register temp1 = T8;
+  Register temp2 = T9;
+  Register temp3 = V0;
+  if (for_compiler_entry) {
+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
+    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
+    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
+    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
+  }
+  else {
+    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
+  }
+  assert_different_registers(temp1, temp2, temp3, receiver_reg);
+  assert_different_registers(temp1, temp2, temp3, member_reg);
+
+  if (iid == vmIntrinsics::_invokeBasic) {
+    // indirect through MH.form.vmentry.vmtarget
+    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
+
+  } else {
+    // The method is a member invoker used by direct method handles.
+    if (VerifyMethodHandles) {
+      // make sure the trailing argument really is a MemberName (caller responsibility)
+      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
+                   "MemberName required for invokeVirtual etc.");
+    }
+
+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
+
+    Register temp1_recv_klass = temp1;
+    if (iid != vmIntrinsics::_linkToStatic) {
+      __ verify_oop(receiver_reg);
+      if (iid == vmIntrinsics::_linkToSpecial) {
+        // Don't actually load the klass; just null-check the receiver.
+        __ null_check(receiver_reg);
+      } else {
+        // load receiver klass itself
+        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
+        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ verify_klass_ptr(temp1_recv_klass);
+      }
+      BLOCK_COMMENT("check_receiver {");
+      // The receiver for the MemberName must be in receiver_reg.
+      // Check the receiver against the MemberName.clazz
+      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+        // Did not load it above...
+        __ load_klass(temp1_recv_klass, receiver_reg);
+        __ verify_klass_ptr(temp1_recv_klass);
+      }
+      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+        Label L_ok;
+        Register temp2_defc = temp2;
+        __ load_heap_oop(temp2_defc, member_clazz);
+        load_klass_from_Class(_masm, temp2_defc);
+        __ verify_klass_ptr(temp2_defc);
+        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
+        // If we get here, the type check failed!
+        __ STOP("receiver class disagrees with MemberName.clazz");
+        __ bind(L_ok);
+      }
+      BLOCK_COMMENT("} check_receiver");
+    }
+    if (iid == vmIntrinsics::_linkToSpecial ||
+        iid == vmIntrinsics::_linkToStatic) {
+      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
+    }
+
+    // Live registers at this point:
+    //  member_reg - MemberName that was the trailing argument
+    //  temp1_recv_klass - klass of stacked receiver, if needed
+
+    Label L_incompatible_class_change_error;
+    switch (iid) {
+    case vmIntrinsics::_linkToSpecial:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+      }
+      __ ld(rm_method, member_vmtarget);
+      break;
+
+    case vmIntrinsics::_linkToStatic:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+      }
+      __ ld(rm_method, member_vmtarget);
+      break;
+
+    case vmIntrinsics::_linkToVirtual:
+    {
+      // same as TemplateTable::invokevirtual,
+      // minus the CP setup and profiling:
+
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+      }
+
+      // pick out the vtable index from the MemberName, and then we can discard it:
+      Register temp2_index = temp2;
+      __ ld(temp2_index, member_vmindex);
+
+      if (VerifyMethodHandles) {
+        Label L_index_ok;
+        __ slt(AT, R0, temp2_index);
+        __ bne(AT, R0, L_index_ok);
+        __ delayed()->nop();
+        __ STOP("no virtual index");
+        __ BIND(L_index_ok);
+      }
+
+      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
+
+      // get target Method* & entry point
+      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
+      break;
+    }
+
+    case vmIntrinsics::_linkToInterface:
+    {
+      // same as TemplateTable::invokeinterface
+      // (minus the CP setup and profiling, with different argument motion)
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+      }
+
+      Register temp3_intf = temp3;
+      __ load_heap_oop(temp3_intf, member_clazz);
+      load_klass_from_Class(_masm, temp3_intf);
+      __ verify_klass_ptr(temp3_intf);
+
+      Register rm_index = rm_method;
+      __ ld(rm_index, member_vmindex);
+      if (VerifyMethodHandles) {
+        Label L;
+        __ slt(AT, rm_index, R0);
+        __ beq(AT, R0, L);
+        __ delayed()->nop();
+        __ STOP("invalid vtable index for MH.invokeInterface");
+        __ bind(L);
+      }
+
+      // given intf, index, and recv klass, dispatch to the implementation method
+      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+                                 // note: next two args must be the same:
+                                 rm_index, rm_method,
+                                 temp2,
+                                 L_incompatible_class_change_error);
+      break;
+    }
+
+    default:
+      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      break;
+    }
+
+    // Live at this point:
+    //   rm_method
+
+    // After figuring out which concrete method to call, jump into it.
+    // Note that this works in the interpreter with no data motion.
+    // But the compiled version will require that r_recv be shifted out.
+    __ verify_method_ptr(rm_method);
+    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
+
+    if (iid == vmIntrinsics::_linkToInterface) {
+      __ bind(L_incompatible_class_change_error);
+      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
+      __ jmp(icce_entry, relocInfo::runtime_call_type);
+      __ delayed()->nop();
+    }
+  }
+}
+
+#ifndef PRODUCT
+void trace_method_handle_stub(const char* adaptername,
+                              oop mh,
+                              intptr_t* saved_regs,
+                              intptr_t* entry_sp) {
+  // called as a leaf from native code: do not block the JVM!
+  bool has_mh = (strstr(adaptername, "/static") == NULL &&
+                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
+  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
+  tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT,
+                adaptername, mh_reg_name,
+                p2i(mh), p2i(entry_sp));
+
+  if (Verbose) {
+    tty->print_cr("Registers:");
+    const int saved_regs_count = RegisterImpl::number_of_registers;
+    for (int i = 0; i < saved_regs_count; i++) {
+      Register r = as_Register(i);
+      // The registers are stored in reverse order on the stack (by pusha).
+      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
+      if ((i + 1) % 4 == 0) {
+        tty->cr();
+      } else {
+        tty->print(", ");
+      }
+    }
+    tty->cr();
+
+    {
+     // dumping last frame with frame::describe
+
+      JavaThread* p = JavaThread::active();
+
+      ResourceMark rm;
+      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
+      FrameValues values;
+
+      // Note: We want to allow trace_method_handle from any call site.
+      // While trace_method_handle creates a frame, it may be entered
+      // without a PC on the stack top (e.g. not just after a call).
+      // Walking that frame could lead to failures due to that invalid PC.
+      // => carefully detect that frame when doing the stack walking
+
+      // Current C frame
+      frame cur_frame = os::current_frame();
+
+      // Robust search of trace_calling_frame (independant of inlining).
+      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
+      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
+      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
+      while (trace_calling_frame.fp() < saved_regs) {
+        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
+      }
+
+      // safely create a frame and call frame::describe
+      intptr_t *dump_sp = trace_calling_frame.sender_sp();
+      intptr_t *dump_fp = trace_calling_frame.link();
+
+      bool walkable = has_mh; // whether the traced frame shoud be walkable
+
+      if (walkable) {
+        // The previous definition of walkable may have to be refined
+        // if new call sites cause the next frame constructor to start
+        // failing. Alternatively, frame constructors could be
+        // modified to support the current or future non walkable
+        // frames (but this is more intrusive and is not considered as
+        // part of this RFE, which will instead use a simpler output).
+        frame dump_frame = frame(dump_sp, dump_fp);
+        dump_frame.describe(values, 1);
+      } else {
+        // Stack may not be walkable (invalid PC above FP):
+        // Add descriptions without building a Java frame to avoid issues
+        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
+        values.describe(-1, dump_sp, "sp for #1");
+      }
+      values.describe(-1, entry_sp, "raw top of stack");
+
+      tty->print_cr("Stack layout:");
+      values.print(p);
+    }
+    if (has_mh && mh->is_oop()) {
+      mh->print();
+      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
+        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
+          java_lang_invoke_MethodHandle::form(mh)->print();
+      }
+    }
+  }
+}
+
+// The stub wraps the arguments in a struct on the stack to avoid
+// dealing with the different calling conventions for passing 6
+// arguments.
+struct MethodHandleStubArguments {
+  const char* adaptername;
+  oopDesc* mh;
+  intptr_t* saved_regs;
+  intptr_t* entry_sp;
+};
+void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
+  trace_method_handle_stub(args->adaptername,
+                           args->mh,
+                           args->saved_regs,
+                           args->entry_sp);
+}
+
+void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
+}
+#endif //PRODUCT
diff --git a/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp b/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp
new file mode 100644
index 00000000000..03b65fc8ef2
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/methodHandles_mips.hpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// Platform-specific definitions for method handles.
+// These definitions are inlined into class MethodHandles.
+
+// Adapters
+enum /* platform_dependent_constants */ {
+  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
+};
+
+// Additional helper methods for MethodHandles code generation:
+public:
+  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
+
+  static void verify_klass(MacroAssembler* _masm,
+                           Register obj, SystemDictionary::WKID klass_id,
+                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
+
+  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
+    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
+                 "reference is a MH");
+  }
+
+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
+  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
+  // Takes care of special dispatch from single stepping too.
+  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
+                                      bool for_compiler_entry);
+
+  static void jump_to_lambda_form(MacroAssembler* _masm,
+                                  Register recv, Register method_temp,
+                                  Register temp2,
+                                  bool for_compiler_entry);
+
+  static Register saved_last_sp_register() {
+    // Should be in sharedRuntime, not here.
+   return I29;
+  }
diff --git a/hotspot/src/cpu/mips/vm/mips.ad b/hotspot/src/cpu/mips/vm/mips.ad
new file mode 100644
index 00000000000..3563bbe0e59
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/mips.ad
@@ -0,0 +1,25 @@
+//
+// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
diff --git a/hotspot/src/cpu/mips/vm/mips_64.ad b/hotspot/src/cpu/mips/vm/mips_64.ad
new file mode 100644
index 00000000000..29125913a4f
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/mips_64.ad
@@ -0,0 +1,14036 @@
+//
+// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// GodSon3 Architecture Description File
+
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+
+// format:
+// reg_def name (call convention, c-call convention, ideal type, encoding);
+//     call convention :
+//      NS  = No-Save
+//      SOC = Save-On-Call
+//      SOE = Save-On-Entry
+//      AS  = Always-Save
+//    ideal type :
+//      see opto/opcodes.hpp for more info
+// reg_class name (reg, ...);
+// alloc_class name (reg, ...);
+register %{
+
+// General Registers
+// Integer Registers
+  reg_def R0      ( NS,  NS,   Op_RegI,  0, VMRegImpl::Bad());
+  reg_def AT    ( NS,  NS,   Op_RegI,  1, AT->as_VMReg());
+  reg_def AT_H    ( NS,  NS,  Op_RegI,  1, AT->as_VMReg()->next());
+  reg_def V0    (SOC, SOC,  Op_RegI,  2, V0->as_VMReg());
+  reg_def V0_H  (SOC, SOC,  Op_RegI,  2, V0->as_VMReg()->next());
+  reg_def V1    (SOC, SOC,  Op_RegI,  3, V1->as_VMReg());
+  reg_def V1_H  (SOC, SOC,  Op_RegI,  3, V1->as_VMReg()->next());
+  reg_def A0    (SOC, SOC,  Op_RegI,  4, A0->as_VMReg());
+  reg_def A0_H  (SOC, SOC,  Op_RegI,  4, A0->as_VMReg()->next());
+  reg_def A1    (SOC, SOC,  Op_RegI,  5, A1->as_VMReg());
+  reg_def A1_H  (SOC, SOC,  Op_RegI,  5, A1->as_VMReg()->next());
+  reg_def A2    (SOC, SOC,  Op_RegI,  6, A2->as_VMReg());
+  reg_def A2_H  (SOC, SOC,  Op_RegI,  6, A2->as_VMReg()->next());
+  reg_def A3    (SOC, SOC,  Op_RegI,  7, A3->as_VMReg());
+  reg_def A3_H  (SOC, SOC,  Op_RegI,  7, A3->as_VMReg()->next());
+  reg_def A4    (SOC, SOC,  Op_RegI,  8, A4->as_VMReg());
+  reg_def A4_H  (SOC, SOC,  Op_RegI,  8, A4->as_VMReg()->next());
+  reg_def A5    (SOC, SOC,  Op_RegI,  9, A5->as_VMReg());
+  reg_def A5_H  (SOC, SOC,  Op_RegI,  9, A5->as_VMReg()->next());
+  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
+  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
+  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
+  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
+  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
+  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
+  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
+  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
+  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
+  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
+  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
+  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
+  reg_def S0    (SOC, SOE,  Op_RegI,  16, S0->as_VMReg());
+  reg_def S0_H  (SOC, SOE,  Op_RegI,  16, S0->as_VMReg()->next());
+  reg_def S1    (SOC, SOE,  Op_RegI,  17, S1->as_VMReg());
+  reg_def S1_H  (SOC, SOE,  Op_RegI,  17, S1->as_VMReg()->next());
+  reg_def S2    (SOC, SOE,  Op_RegI,  18, S2->as_VMReg());
+  reg_def S2_H  (SOC, SOE,  Op_RegI,  18, S2->as_VMReg()->next());
+  reg_def S3    (SOC, SOE,  Op_RegI,  19, S3->as_VMReg());
+  reg_def S3_H  (SOC, SOE,  Op_RegI,  19, S3->as_VMReg()->next());
+  reg_def S4    (SOC, SOE,  Op_RegI,  20, S4->as_VMReg());
+  reg_def S4_H  (SOC, SOE,  Op_RegI,  20, S4->as_VMReg()->next());
+  reg_def S5    (SOC, SOE,  Op_RegI,  21, S5->as_VMReg());
+  reg_def S5_H  (SOC, SOE,  Op_RegI,  21, S5->as_VMReg()->next());
+  reg_def S6    (SOC, SOE,  Op_RegI,  22, S6->as_VMReg());
+  reg_def S6_H  (SOC, SOE,  Op_RegI,  22, S6->as_VMReg()->next());
+  reg_def S7    (SOC, SOE,  Op_RegI,  23, S7->as_VMReg());
+  reg_def S7_H  (SOC, SOE,  Op_RegI,  23, S7->as_VMReg()->next());
+  reg_def T8    (SOC, SOC,  Op_RegI,  24, T8->as_VMReg());
+  reg_def T8_H  (SOC, SOC,  Op_RegI,  24, T8->as_VMReg()->next());
+  reg_def T9    (SOC, SOC,  Op_RegI,  25, T9->as_VMReg());
+  reg_def T9_H  (SOC, SOC,  Op_RegI,  25, T9->as_VMReg()->next());
+
+// Special Registers
+  reg_def K0    ( NS,  NS,  Op_RegI, 26, K0->as_VMReg());
+  reg_def K1    ( NS,  NS,  Op_RegI, 27, K1->as_VMReg());
+  reg_def GP    ( NS,  NS,  Op_RegI, 28, GP->as_VMReg());
+  reg_def GP_H  ( NS,  NS,  Op_RegI, 28, GP->as_VMReg()->next());
+  reg_def SP    ( NS,  NS,  Op_RegI, 29, SP->as_VMReg());
+  reg_def SP_H  ( NS,  NS,  Op_RegI, 29, SP->as_VMReg()->next());
+  reg_def FP    ( NS,  NS,  Op_RegI, 30, FP->as_VMReg());
+  reg_def FP_H  ( NS,  NS,  Op_RegI, 30, FP->as_VMReg()->next());
+  reg_def RA    ( NS,  NS,  Op_RegI, 31, RA->as_VMReg());
+  reg_def RA_H  ( NS,  NS,  Op_RegI, 31, RA->as_VMReg()->next());
+
+// Floating registers.
+reg_def F0          ( SOC, SOC, Op_RegF, 0, F0->as_VMReg());
+reg_def F0_H        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next());
+reg_def F1          ( SOC, SOC, Op_RegF, 1, F1->as_VMReg());
+reg_def F1_H        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next());
+reg_def F2          ( SOC, SOC, Op_RegF, 2, F2->as_VMReg());
+reg_def F2_H        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next());
+reg_def F3          ( SOC, SOC, Op_RegF, 3, F3->as_VMReg());
+reg_def F3_H        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next());
+reg_def F4          ( SOC, SOC, Op_RegF, 4, F4->as_VMReg());
+reg_def F4_H        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next());
+reg_def F5          ( SOC, SOC, Op_RegF, 5, F5->as_VMReg());
+reg_def F5_H        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next());
+reg_def F6          ( SOC, SOC, Op_RegF, 6, F6->as_VMReg());
+reg_def F6_H        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next());
+reg_def F7          ( SOC, SOC, Op_RegF, 7, F7->as_VMReg());
+reg_def F7_H        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next());
+reg_def F8          ( SOC, SOC, Op_RegF, 8, F8->as_VMReg());
+reg_def F8_H        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next());
+reg_def F9          ( SOC, SOC, Op_RegF, 9, F9->as_VMReg());
+reg_def F9_H        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next());
+reg_def F10         ( SOC, SOC, Op_RegF, 10, F10->as_VMReg());
+reg_def F10_H       ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next());
+reg_def F11         ( SOC, SOC, Op_RegF, 11, F11->as_VMReg());
+reg_def F11_H       ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next());
+reg_def F12         ( SOC, SOC, Op_RegF, 12, F12->as_VMReg());
+reg_def F12_H       ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next());
+reg_def F13         ( SOC, SOC, Op_RegF, 13, F13->as_VMReg());
+reg_def F13_H       ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next());
+reg_def F14         ( SOC, SOC, Op_RegF, 14, F14->as_VMReg());
+reg_def F14_H       ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next());
+reg_def F15         ( SOC, SOC, Op_RegF, 15, F15->as_VMReg());
+reg_def F15_H       ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next());
+reg_def F16         ( SOC, SOC, Op_RegF, 16, F16->as_VMReg());
+reg_def F16_H       ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next());
+reg_def F17         ( SOC, SOC, Op_RegF, 17, F17->as_VMReg());
+reg_def F17_H       ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next());
+reg_def F18         ( SOC, SOC, Op_RegF, 18, F18->as_VMReg());
+reg_def F18_H       ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next());
+reg_def F19         ( SOC, SOC, Op_RegF, 19, F19->as_VMReg());
+reg_def F19_H       ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next());
+reg_def F20         ( SOC, SOC, Op_RegF, 20, F20->as_VMReg());
+reg_def F20_H       ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next());
+reg_def F21         ( SOC, SOC, Op_RegF, 21, F21->as_VMReg());
+reg_def F21_H       ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next());
+reg_def F22         ( SOC, SOC, Op_RegF, 22, F22->as_VMReg());
+reg_def F22_H       ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next());
+reg_def F23         ( SOC, SOC, Op_RegF, 23, F23->as_VMReg());
+reg_def F23_H       ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next());
+reg_def F24         ( SOC, SOC, Op_RegF, 24, F24->as_VMReg());
+reg_def F24_H       ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next());
+reg_def F25         ( SOC, SOC, Op_RegF, 25, F25->as_VMReg());
+reg_def F25_H       ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next());
+reg_def F26         ( SOC, SOC, Op_RegF, 26, F26->as_VMReg());
+reg_def F26_H       ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next());
+reg_def F27         ( SOC, SOC, Op_RegF, 27, F27->as_VMReg());
+reg_def F27_H       ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next());
+reg_def F28         ( SOC, SOC, Op_RegF, 28, F28->as_VMReg());
+reg_def F28_H       ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next());
+reg_def F29         ( SOC, SOC, Op_RegF, 29, F29->as_VMReg());
+reg_def F29_H       ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next());
+reg_def F30         ( SOC, SOC, Op_RegF, 30, F30->as_VMReg());
+reg_def F30_H       ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next());
+reg_def F31         ( SOC, SOC, Op_RegF, 31, F31->as_VMReg());
+reg_def F31_H       ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next());
+
+
+// ----------------------------
+// Special Registers
+//S6 is used for get_thread(S6)
+//S5 is uesd for heapbase of compressed oop
+alloc_class chunk0(
+                     S7, S7_H,
+                     S0, S0_H,
+                     S1, S1_H,
+                     S2, S2_H,
+                     S4, S4_H,
+                     S5, S5_H,
+                     S6, S6_H,
+                     S3, S3_H,
+                     T2, T2_H,
+                     T3, T3_H,
+                     T8, T8_H,
+                     T9, T9_H,
+                     T1, T1_H, // inline_cache_reg
+                     V1, V1_H,
+                     A7, A7_H,
+                     A6, A6_H,
+                     A5, A5_H,
+                     A4, A4_H,
+                     V0, V0_H,
+                     A3, A3_H,
+                     A2, A2_H,
+                     A1, A1_H,
+                     A0, A0_H,
+                     T0, T0_H,
+                     GP, GP_H
+                     RA, RA_H,
+                     SP, SP_H, // stack_pointer
+                     FP, FP_H  // frame_pointer
+                 );
+
+alloc_class chunk1(  F0, F0_H,
+                     F1, F1_H,
+                     F2, F2_H,
+                     F3, F3_H,
+                     F4, F4_H,
+                     F5, F5_H,
+                     F6, F6_H,
+                     F7, F7_H,
+                     F8, F8_H,
+                     F9, F9_H,
+                     F10, F10_H,
+                     F11, F11_H,
+                     F20, F20_H,
+                     F21, F21_H,
+                     F22, F22_H,
+                     F23, F23_H,
+                     F24, F24_H,
+                     F25, F25_H,
+                     F26, F26_H,
+                     F27, F27_H,
+                     F28, F28_H,
+                     F19, F19_H,
+                     F18, F18_H,
+                     F17, F17_H,
+                     F16, F16_H,
+                     F15, F15_H,
+                     F14, F14_H,
+                     F13, F13_H,
+                     F12, F12_H,
+                     F29, F29_H,
+                     F30, F30_H,
+                     F31, F31_H);
+
+reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
+reg_class s0_reg( S0 );
+reg_class s1_reg( S1 );
+reg_class s2_reg( S2 );
+reg_class s3_reg( S3 );
+reg_class s4_reg( S4 );
+reg_class s5_reg( S5 );
+reg_class s6_reg( S6 );
+reg_class s7_reg( S7 );
+
+reg_class t_reg( T0, T1, T2, T3, T8, T9 );
+reg_class t0_reg( T0 );
+reg_class t1_reg( T1 );
+reg_class t2_reg( T2 );
+reg_class t3_reg( T3 );
+reg_class t8_reg( T8 );
+reg_class t9_reg( T9 );
+
+reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
+reg_class a0_reg( A0 );
+reg_class a1_reg( A1 );
+reg_class a2_reg( A2 );
+reg_class a3_reg( A3 );
+reg_class a4_reg( A4 );
+reg_class a5_reg( A5 );
+reg_class a6_reg( A6 );
+reg_class a7_reg( A7 );
+
+reg_class v0_reg( V0 );
+reg_class v1_reg( V1 );
+
+reg_class sp_reg( SP, SP_H );
+reg_class fp_reg( FP, FP_H );
+
+reg_class v0_long_reg( V0, V0_H );
+reg_class v1_long_reg( V1, V1_H );
+reg_class a0_long_reg( A0, A0_H );
+reg_class a1_long_reg( A1, A1_H );
+reg_class a2_long_reg( A2, A2_H );
+reg_class a3_long_reg( A3, A3_H );
+reg_class a4_long_reg( A4, A4_H );
+reg_class a5_long_reg( A5, A5_H );
+reg_class a6_long_reg( A6, A6_H );
+reg_class a7_long_reg( A7, A7_H );
+reg_class t0_long_reg( T0, T0_H );
+reg_class t1_long_reg( T1, T1_H );
+reg_class t2_long_reg( T2, T2_H );
+reg_class t3_long_reg( T3, T3_H );
+reg_class t8_long_reg( T8, T8_H );
+reg_class t9_long_reg( T9, T9_H );
+reg_class s0_long_reg( S0, S0_H );
+reg_class s1_long_reg( S1, S1_H );
+reg_class s2_long_reg( S2, S2_H );
+reg_class s3_long_reg( S3, S3_H );
+reg_class s4_long_reg( S4, S4_H );
+reg_class s5_long_reg( S5, S5_H );
+reg_class s6_long_reg( S6, S6_H );
+reg_class s7_long_reg( S7, S7_H );
+
+reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 );
+
+reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 );
+
+reg_class p_reg(
+                 S7, S7_H,
+                 S0, S0_H,
+                 S1, S1_H,
+                 S2, S2_H,
+                 S4, S4_H,
+                 S3, S3_H,
+                 T8, T8_H,
+                 T2, T2_H,
+                 T3, T3_H,
+                 T1, T1_H,
+                 A7, A7_H,
+                 A6, A6_H,
+                 A5, A5_H,
+                 A4, A4_H,
+                 A3, A3_H,
+                 A2, A2_H,
+                 A1, A1_H,
+                 A0, A0_H,
+                 T0, T0_H
+               );
+
+reg_class no_T8_p_reg(
+                 S7, S7_H,
+                 S0, S0_H,
+                 S1, S1_H,
+                 S2, S2_H,
+                 S4, S4_H,
+                 S3, S3_H,
+                 T2, T2_H,
+                 T3, T3_H,
+                 T1, T1_H,
+                 A7, A7_H,
+                 A6, A6_H,
+                 A5, A5_H,
+                 A4, A4_H,
+                 A3, A3_H,
+                 A2, A2_H,
+                 A1, A1_H,
+                 A0, A0_H,
+                 T0, T0_H
+               );
+
+reg_class long_reg(
+                    S7, S7_H,
+                    S0, S0_H,
+                    S1, S1_H,
+                    S2, S2_H,
+                    S4, S4_H,
+                    S3, S3_H,
+                    T8, T8_H,
+                    T2, T2_H,
+                    T3, T3_H,
+                    T1, T1_H,
+                    A7, A7_H,
+                    A6, A6_H,
+                    A5, A5_H,
+                    A4, A4_H,
+                    A3, A3_H,
+                    A2, A2_H,
+                    A1, A1_H,
+                    A0, A0_H,
+                    T0, T0_H
+                  );
+
+
+// Floating point registers.
+// F31 are not used as temporary registers in D2I
+reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31);
+reg_class dbl_reg( F0, F0_H,
+                   F1, F1_H,
+                   F2, F2_H,
+                   F3, F3_H,
+                   F4, F4_H,
+                   F5, F5_H,
+                   F6, F6_H,
+                   F7, F7_H,
+                   F8, F8_H,
+                   F9, F9_H,
+                   F10, F10_H,
+                   F11, F11_H,
+                   F12, F12_H,
+                   F13, F13_H,
+                   F14, F14_H,
+                   F15, F15_H,
+                   F16, F16_H,
+                   F17, F17_H,
+                   F18, F18_H,
+                   F19, F19_H,
+                   F20, F20_H,
+                   F21, F21_H,
+                   F22, F22_H,
+                   F23, F23_H,
+                   F24, F24_H,
+                   F25, F25_H,
+                   F26, F26_H,
+                   F27, F27_H,
+                   F28, F28_H,
+                   F29, F29_H,
+                   F31, F31_H);
+
+reg_class flt_arg0( F12 );
+reg_class dbl_arg0( F12, F12_H );
+reg_class dbl_arg1( F14, F14_H );
+
+%}
+
+//----------DEFINITION BLOCK---------------------------------------------------
+// Define name --> value mappings to inform the ADLC of an integer valued name
+// Current support includes integer values in the range [0, 0x7FFFFFFF]
+// Format:
+//        int_def  <name>         ( <int_value>, <expression>);
+// Generated Code in ad_<arch>.hpp
+//        #define  <name>   (<expression>)
+//        // value == <int_value>
+// Generated code in ad_<arch>.cpp adlc_verification()
+//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
+//
+definitions %{
+  int_def DEFAULT_COST      (    100,     100);
+  int_def HUGE_COST         (1000000, 1000000);
+
+  // Memory refs are twice as expensive as run-of-the-mill.
+  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
+
+  // Branches are even more expensive.
+  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
+  // we use jr instruction to construct call, so more expensive
+  int_def CALL_COST         (    500, DEFAULT_COST * 5);
+/*
+        int_def EQUAL             (   1, 1  );
+        int_def NOT_EQUAL         (   2, 2  );
+        int_def GREATER           (   3, 3  );
+        int_def GREATER_EQUAL     (   4, 4  );
+        int_def LESS              (   5, 5  );
+        int_def LESS_EQUAL        (   6, 6  );
+*/
+%}
+
+
+
+//----------SOURCE BLOCK-------------------------------------------------------
+// This is a block of C++ code which provides values, functions, and
+// definitions necessary in the rest of the architecture description
+
+source_hpp %{
+// Header information of the source block.
+// Method declarations/definitions which are used outside
+// the ad-scope can conveniently be defined here.
+//
+// To keep related declarations/definitions/uses close together,
+// we switch between source %{ }% and source_hpp %{ }% freely as needed.
+
+class CallStubImpl {
+
+  //--------------------------------------------------------------
+  //---<  Used for optimization in Compile::shorten_branches  >---
+  //--------------------------------------------------------------
+
+ public:
+  // Size of call trampoline stub.
+  static uint size_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+
+  // number of relocations needed by a call trampoline stub
+  static uint reloc_call_trampoline() {
+    return 0; // no call trampolines on this platform
+  }
+};
+
+class HandlerImpl {
+
+ public:
+
+  static int emit_exception_handler(CodeBuffer &cbuf);
+  static int emit_deopt_handler(CodeBuffer& cbuf);
+
+  static uint size_exception_handler() {
+    // NativeCall instruction size is the same as NativeJump.
+    // exception handler starts out as jump and can be patched to
+    // a call be deoptimization.  (4932387)
+    // Note that this value is also credited (in output.cpp) to
+    // the size of the code section.
+    int size = NativeCall::instruction_size;
+    return round_to(size, 16);
+  }
+
+  static uint size_deopt_handler() {
+    int size = NativeCall::instruction_size;
+    return round_to(size, 16);
+  }
+};
+
+%} // end source_hpp
+
+source %{
+
+#define   NO_INDEX    0
+#define   RELOC_IMM64    Assembler::imm_operand
+#define   RELOC_DISP32   Assembler::disp32_operand
+
+
+#define __ _masm.
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+
+// Emit exception handler code.
+// Stuff framesize into a register and call a VM stub routine.
+int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base = __ start_a_stub(size_exception_handler());
+  if (base == NULL) {
+    ciEnv::current()->record_failure("CodeCache is full");
+    return 0;  // CodeBuffer::expand failed
+  }
+
+  int offset = __ offset();
+
+  __ block_comment("; emit_exception_handler");
+
+  cbuf.set_insts_mark();
+  __ relocate(relocInfo::runtime_call_type);
+  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
+  __ align(16);
+  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+// Emit deopt handler code.
+int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a handler.
+  MacroAssembler _masm(&cbuf);
+  address base = __ start_a_stub(size_deopt_handler());
+  if (base == NULL) {
+    ciEnv::current()->record_failure("CodeCache is full");
+    return 0;  // CodeBuffer::expand failed
+  }
+
+  int offset = __ offset();
+
+  __ block_comment("; emit_deopt_handler");
+
+  cbuf.set_insts_mark();
+  __ relocate(relocInfo::runtime_call_type);
+  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
+  __ align(16);
+  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
+  __ end_a_stub();
+  return offset;
+}
+
+
+const bool Matcher::match_rule_supported(int opcode) {
+  if (!has_match_rule(opcode))
+    return false;
+
+  switch (opcode) {
+    //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz.
+    case Op_CountLeadingZerosI:
+    case Op_CountLeadingZerosL:
+      if (!UseCountLeadingZerosInstructionMIPS64)
+        return false;
+      break;
+    case Op_CountTrailingZerosI:
+    case Op_CountTrailingZerosL:
+      if (!UseCountTrailingZerosInstructionMIPS64)
+        return false;
+      break;
+  }
+
+  return true;  // Per default match rules are supported.
+}
+
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+  int offs = offset - br_size + 4;
+  // To be conservative on MIPS
+  // branch node should be end with:
+  //   branch inst
+  //   delay slot
+  const int safety_zone = 3 * BytesPerInstWord;
+  return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2);
+}
+
+
+// No additional cost for CMOVL.
+const int Matcher::long_cmove_cost() { return 0; }
+
+// No CMOVF/CMOVD with SSE2
+const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
+
+// Does the CPU require late expand (see block.cpp for description of late expand)?
+const bool Matcher::require_postalloc_expand = false;
+
+// Should the Matcher clone shifts on addressing modes, expecting them
+// to be subsumed into complex addressing expressions or compute them
+// into registers?  True for Intel but false for most RISCs
+const bool Matcher::clone_shift_expressions = false;
+
+// Do we need to mask the count passed to shift instructions or does
+// the cpu only look at the lower 5/6 bits anyway?
+const bool Matcher::need_masked_shift_count = false;
+
+bool Matcher::narrow_oop_use_complex_address() {
+  assert(UseCompressedOops, "only for compressed oops code");
+  return false;
+}
+
+bool Matcher::narrow_klass_use_complex_address() {
+  assert(UseCompressedClassPointers, "only for compressed klass code");
+  return false;
+}
+
+// This is UltraSparc specific, true just means we have fast l2f conversion
+const bool Matcher::convL2FSupported(void) {
+  return true;
+}
+
+// Max vector size in bytes. 0 if not supported.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  if (MaxVectorSize == 0)
+    return 0;
+  assert(MaxVectorSize == 8, "");
+  return 8;
+}
+
+// Vector ideal reg
+const uint Matcher::vector_ideal_reg(int size) {
+  assert(MaxVectorSize == 8, "");
+  switch(size) {
+    case  8: return Op_VecD;
+  }
+  ShouldNotReachHere();
+  return 0;
+}
+
+// Only lowest bits of xmm reg are used for vector shift count.
+const uint Matcher::vector_shift_count_ideal_reg(int size) {
+  fatal("vector shift is not supported");
+  return Node::NotAMachineReg;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  assert(is_java_primitive(bt), "only primitive type vectors");
+  return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+
+const int Matcher::min_vector_size(const BasicType bt) {
+  return max_vector_size(bt); // Same as max.
+}
+
+// MIPS supports misaligned vectors store/load? FIXME
+const bool Matcher::misaligned_vectors_ok() {
+  return false;
+  //return !AlignVector; // can be changed by flag
+}
+
+// Register for DIVI projection of divmodI
+RegMask Matcher::divI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for MODI projection of divmodI
+RegMask Matcher::modI_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+// Register for DIVL projection of divmodL
+RegMask Matcher::divL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+int Matcher::regnum_to_fpu_offset(int regnum) {
+  return regnum - 32; // The FP registers are in the second chunk
+}
+
+
+const bool Matcher::isSimpleConstant64(jlong value) {
+  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
+  return true;
+}
+
+
+// Return whether or not this register is ever used as an argument.  This
+// function is used on startup to build the trampoline stubs in generateOptoStub.
+// Registers not mentioned will be killed by the VM call in the trampoline, and
+// arguments in those registers not be available to the callee.
+bool Matcher::can_be_java_arg( int reg ) {
+  // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention()
+  if (    reg == T0_num || reg == T0_H_num
+       || reg == A0_num || reg == A0_H_num
+       || reg == A1_num || reg == A1_H_num
+       || reg == A2_num || reg == A2_H_num
+       || reg == A3_num || reg == A3_H_num
+       || reg == A4_num || reg == A4_H_num
+       || reg == A5_num || reg == A5_H_num
+       || reg == A6_num || reg == A6_H_num
+       || reg == A7_num || reg == A7_H_num )
+    return true;
+
+  if (    reg == F12_num || reg == F12_H_num
+       || reg == F13_num || reg == F13_H_num
+       || reg == F14_num || reg == F14_H_num
+       || reg == F15_num || reg == F15_H_num
+       || reg == F16_num || reg == F16_H_num
+       || reg == F17_num || reg == F17_H_num
+       || reg == F18_num || reg == F18_H_num
+       || reg == F19_num || reg == F19_H_num )
+    return true;
+
+  return false;
+}
+
+bool Matcher::is_spillable_arg( int reg ) {
+  return can_be_java_arg(reg);
+}
+
+bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
+  return false;
+}
+
+// Register for MODL projection of divmodL
+RegMask Matcher::modL_proj_mask() {
+  ShouldNotReachHere();
+  return RegMask();
+}
+
+const RegMask Matcher::method_handle_invoke_SP_save_mask() {
+  return FP_REG_mask();
+}
+
+// MIPS doesn't support AES intrinsics
+const bool Matcher::pass_original_key_for_aes() {
+  return false;
+}
+
+int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
+  return round_to(current_offset, alignment_required()) - current_offset;
+}
+
+int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
+  return round_to(current_offset, alignment_required()) - current_offset;
+}
+
+int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
+  return round_to(current_offset, alignment_required()) - current_offset;
+}
+
+int CallLeafDirectNode::compute_padding(int current_offset) const {
+  return round_to(current_offset, alignment_required()) - current_offset;
+}
+
+int CallRuntimeDirectNode::compute_padding(int current_offset) const {
+  return round_to(current_offset, alignment_required()) - current_offset;
+}
+
+// If CPU can load and store mis-aligned doubles directly then no fixup is
+// needed.  Else we split the double into 2 integer pieces and move it
+// piece-by-piece.  Only happens when passing doubles into C code as the
+// Java calling convention forces doubles to be aligned.
+const bool Matcher::misaligned_doubles_ok = false;
+// Do floats take an entire double register or just half?
+//const bool Matcher::float_in_double = true;
+bool Matcher::float_in_double() { return false; }
+// Threshold size for cleararray.
+const int Matcher::init_array_short_size = 8 * BytesPerLong;
+// Do ints take an entire long register or just half?
+const bool Matcher::int_in_long = true;
+// Is it better to copy float constants, or load them directly from memory?
+// Intel can load a float constant from a direct address, requiring no
+// extra registers.  Most RISCs will have to materialize an address into a
+// register first, so they would do better to copy the constant from stack.
+const bool Matcher::rematerialize_float_constants = false;
+// Advertise here if the CPU requires explicit rounding operations
+// to implement the UseStrictFP mode.
+const bool Matcher::strict_fp_requires_explicit_rounding = false;
+// false => size gets scaled to BytesPerLong, ok.
+const bool Matcher::init_array_count_is_in_bytes = false;
+
+// Indicate if the safepoint node needs the polling page as an input.
+// Since MIPS doesn't have absolute addressing, it needs.
+bool SafePointNode::needs_polling_address_input() {
+  return false;
+}
+
+// !!!!! Special hack to get all type of calls to specify the byte offset
+//       from the start of the call to the point where the return address
+//       will point.
+int MachCallStaticJavaNode::ret_addr_offset() {
+  //lui
+  //ori
+  //nop
+  //nop
+  //jalr
+  //nop
+  return 24;
+}
+
+int MachCallDynamicJavaNode::ret_addr_offset() {
+  //lui IC_Klass,
+  //ori IC_Klass,
+  //dsll IC_Klass
+  //ori IC_Klass
+
+  //lui T9
+  //ori T9
+  //nop
+  //nop
+  //jalr T9
+  //nop
+  return 4 * 4 + 4 * 6;
+}
+
+//=============================================================================
+
+// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
+enum RC { rc_bad, rc_int, rc_float, rc_stack };
+static enum RC rc_class( OptoReg::Name reg ) {
+  if( !OptoReg::is_valid(reg)  ) return rc_bad;
+  if (OptoReg::is_stack(reg)) return rc_stack;
+  VMReg r = OptoReg::as_VMReg(reg);
+  if (r->is_Register()) return rc_int;
+  assert(r->is_FloatRegister(), "must be");
+  return rc_float;
+}
+
+uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
+  // Get registers to move
+  OptoReg::Name src_second = ra_->get_reg_second(in(1));
+  OptoReg::Name src_first = ra_->get_reg_first(in(1));
+  OptoReg::Name dst_second = ra_->get_reg_second(this );
+  OptoReg::Name dst_first = ra_->get_reg_first(this );
+
+  enum RC src_second_rc = rc_class(src_second);
+  enum RC src_first_rc = rc_class(src_first);
+  enum RC dst_second_rc = rc_class(dst_second);
+  enum RC dst_first_rc = rc_class(dst_first);
+
+  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
+
+  // Generate spill code!
+  int size = 0;
+
+  if( src_first == dst_first && src_second == dst_second )
+    return 0;            // Self copy, no move
+
+  if (src_first_rc == rc_stack) {
+    // mem ->
+    if (dst_first_rc == rc_stack) {
+      // mem -> mem
+      assert(src_second != dst_first, "overlap");
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        int src_offset = ra_->reg2offset(src_first);
+        int dst_offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ld(AT, Address(SP, src_offset));
+          __ sd(AT, Address(SP, dst_offset));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+              st->print("ld    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
+                        "sd    AT, [SP + #%d]",
+                        src_offset, dst_offset);
+          }
+#endif
+        }
+        size += 8;
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        // No pushl/popl, so:
+        int src_offset = ra_->reg2offset(src_first);
+        int dst_offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ lw(AT, Address(SP, src_offset));
+          __ sw(AT, Address(SP, dst_offset));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+              st->print("lw    AT, [SP + #%d] spill 2\n\t"
+                        "sw    AT, [SP + #%d]\n\t",
+                        src_offset, dst_offset);
+          }
+#endif
+        }
+        size += 8;
+      }
+      return size;
+    } else if (dst_first_rc == rc_int) {
+      // mem -> gpr
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        int offset = ra_->reg2offset(src_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+              st->print("ld    %s, [SP + #%d]\t# spill 3",
+                        Matcher::regName[dst_first],
+                        offset);
+          }
+#endif
+        }
+        size += 4;
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        int offset = ra_->reg2offset(src_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          if (this->ideal_reg() == Op_RegI)
+            __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
+          else
+            __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+          } else {
+            if(!do_size){
+              if (size != 0) st->print("\n\t");
+              if (this->ideal_reg() == Op_RegI)
+                st->print("lw    %s, [SP + #%d]\t# spill 4",
+                          Matcher::regName[dst_first],
+                          offset);
+              else
+                st->print("lwu    %s, [SP + #%d]\t# spill 5",
+                          Matcher::regName[dst_first],
+                          offset);
+            }
+#endif
+          }
+          size += 4;
+      }
+      return size;
+    } else if (dst_first_rc == rc_float) {
+      // mem-> xmm
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        int offset = ra_->reg2offset(src_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          if (!do_size) {
+            if (size != 0) st->print("\n\t");
+            st->print("ldc1  %s, [SP + #%d]\t# spill 6",
+                      Matcher::regName[dst_first],
+                      offset);
+          }
+#endif
+        }
+        size += 4;
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        int offset = ra_->reg2offset(src_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("lwc1   %s, [SP + #%d]\t# spill 7",
+                      Matcher::regName[dst_first],
+                      offset);
+            }
+#endif
+        }
+        size += 4;
+      }
+      return size;
+    }
+  } else if (src_first_rc == rc_int) {
+    // gpr ->
+    if (dst_first_rc == rc_stack) {
+      // gpr -> mem
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        int offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("sd    %s, [SP + #%d] # spill 8",
+                      Matcher::regName[src_first],
+                      offset);
+          }
+#endif
+        }
+        size += 4;
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        int offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          if (!do_size) {
+            if (size != 0) st->print("\n\t");
+            st->print("sw    %s, [SP + #%d]\t# spill 9",
+                      Matcher::regName[src_first], offset);
+          }
+#endif
+        }
+        size += 4;
+      }
+      return size;
+    } else if (dst_first_rc == rc_int) {
+      // gpr -> gpr
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ move(as_Register(Matcher::_regEncode[dst_first]),
+                  as_Register(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("move(64bit)    %s <-- %s\t# spill 10",
+                      Matcher::regName[dst_first],
+                      Matcher::regName[src_first]);
+          }
+#endif
+        }
+        size += 4;
+        return size;
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          if (this->ideal_reg() == Op_RegI)
+              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
+          else
+              __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
+#ifndef PRODUCT
+        } else {
+          if (!do_size) {
+            if (size != 0) st->print("\n\t");
+            st->print("move(32-bit)    %s <-- %s\t# spill 11",
+                      Matcher::regName[dst_first],
+                      Matcher::regName[src_first]);
+          }
+#endif
+        }
+        size += 4;
+        return size;
+      }
+    } else if (dst_first_rc == rc_float) {
+      // gpr -> xmm
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("dmtc1   %s, %s\t# spill 12",
+                      Matcher::regName[dst_first],
+                      Matcher::regName[src_first]);
+          }
+#endif
+        }
+        size += 4;
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) );
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("mtc1   %s, %s\t# spill 13",
+                      Matcher::regName[dst_first],
+                      Matcher::regName[src_first]);
+          }
+#endif
+        }
+        size += 4;
+      }
+      return size;
+    }
+  } else if (src_first_rc == rc_float) {
+    // xmm ->
+    if (dst_first_rc == rc_stack) {
+      // xmm -> mem
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        int offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("sdc1   %s, [SP + #%d]\t# spill 14",
+                      Matcher::regName[src_first],
+                      offset);
+          }
+#endif
+        }
+        size += 4;
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        int offset = ra_->reg2offset(dst_first);
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("swc1   %s, [SP + #%d]\t# spill 15",
+                      Matcher::regName[src_first],
+                      offset);
+          }
+#endif
+        }
+        size += 4;
+      }
+      return size;
+    } else if (dst_first_rc == rc_int) {
+      // xmm -> gpr
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("dmfc1   %s, %s\t# spill 16",
+                      Matcher::regName[dst_first],
+                      Matcher::regName[src_first]);
+          }
+#endif
+        }
+        size += 4;
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+      if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("mfc1   %s, %s\t# spill 17",
+                      Matcher::regName[dst_first],
+                      Matcher::regName[src_first]);
+          }
+#endif
+        }
+        size += 4;
+      }
+      return size;
+    } else if (dst_first_rc == rc_float) {
+      // xmm -> xmm
+      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
+          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
+        // 64-bit
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("mov_d  %s <-- %s\t# spill 18",
+                      Matcher::regName[dst_first],
+                      Matcher::regName[src_first]);
+          }
+#endif
+        }
+        size += 4;
+      } else {
+        // 32-bit
+        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
+        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
+        if (cbuf) {
+          MacroAssembler _masm(cbuf);
+          __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
+#ifndef PRODUCT
+        } else {
+          if(!do_size){
+            if (size != 0) st->print("\n\t");
+            st->print("mov_s  %s <-- %s\t# spill 19",
+                      Matcher::regName[dst_first],
+                      Matcher::regName[src_first]);
+          }
+#endif
+        }
+        size += 4;
+      }
+      return size;
+    }
+  }
+
+  assert(0," foo ");
+  Unimplemented();
+  return size;
+
+}
+
+#ifndef PRODUCT
+void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+  implementation( NULL, ra_, false, st );
+}
+#endif
+
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  implementation( &cbuf, ra_, false, NULL );
+}
+
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+//=============================================================================
+#
+
+#ifndef PRODUCT
+void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
+  st->print("BRK");
+}
+#endif
+
+void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
+  MacroAssembler _masm(&cbuf);
+  __ brk(5);
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
+  return MachNode::size(ra_);
+}
+
+
+//=============================================================================
+#ifndef PRODUCT
+void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+  Compile *C = ra_->C;
+  int framesize = C->frame_size_in_bytes();
+
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+
+  st->print_cr("daddiu   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
+  st->print("\t");
+  if (UseLEXT1) {
+    st->print_cr("gslq  RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2);
+  } else {
+    st->print_cr("ld    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
+    st->print("\t");
+    st->print_cr("ld    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
+  }
+
+  if( do_polling() && C->is_method_compilation() ) {
+    st->print("\t");
+    st->print_cr("Poll Safepoint # MachEpilogNode");
+  }
+}
+#endif
+
+void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  Compile *C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+  int framesize = C->frame_size_in_bytes();
+
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+
+  if (UseLEXT1) {
+    __ gslq(RA, FP, SP, framesize - wordSize * 2);
+  } else {
+    __ ld(RA, SP, framesize - wordSize );
+    __ ld(FP, SP, framesize - wordSize * 2);
+  }
+  __ daddiu(SP, SP, framesize);
+
+  if( do_polling() && C->is_method_compilation() ) {
+    __ set64(AT, (long)os::get_polling_page());
+    __ relocate(relocInfo::poll_return_type);
+    __ lw(AT, AT, 0);
+  }
+}
+
+uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_); // too many variables; just compute it the hard way  fujie debug
+}
+
+int MachEpilogNode::reloc() const {
+  return 0; // a large enough number
+}
+
+const Pipeline * MachEpilogNode::pipeline() const {
+  return MachNode::pipeline_class();
+}
+
+int MachEpilogNode::safepoint_offset() const { return 0; }
+
+//=============================================================================
+
+#ifndef PRODUCT
+void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg = ra_->get_reg_first(this);
+  st->print("ADDI %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
+}
+#endif
+
+
+uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
+  return 4;
+}
+
+void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
+  int reg = ra_->get_encode(this);
+
+  __ addiu(as_Register(reg), SP, offset);
+}
+
+
+//static int sizeof_FFree_Float_Stack_All = -1;
+
+int MachCallRuntimeNode::ret_addr_offset() {
+  //lui
+  //ori
+  //dsll
+  //ori
+  //jalr
+  //nop
+  assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()");
+  return NativeCall::instruction_size;
+}
+
+
+//=============================================================================
+#ifndef PRODUCT
+void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
+  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
+}
+#endif
+
+void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
+  MacroAssembler _masm(&cbuf);
+  int i = 0;
+  for(i = 0; i < _count; i++)
+     __ nop();
+}
+
+uint MachNopNode::size(PhaseRegAlloc *) const {
+  return 4 * _count;
+}
+const Pipeline* MachNopNode::pipeline() const {
+  return MachNode::pipeline_class();
+}
+
+//=============================================================================
+
+//=============================================================================
+#ifndef PRODUCT
+void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+  st->print_cr("load_klass(T9, T0)");
+  st->print_cr("\tbeq(T9, iCache, L)");
+  st->print_cr("\tnop");
+  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
+  st->print_cr("\tnop");
+  st->print_cr("\tnop");
+  st->print_cr("    L:");
+}
+#endif
+
+
+void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  MacroAssembler _masm(&cbuf);
+  int  ic_reg = Matcher::inline_cache_reg_encode();
+  Label L;
+  Register receiver = T0;
+  Register   iCache = as_Register(ic_reg);
+
+  __ load_klass(T9, receiver);
+  __ beq(T9, iCache, L);
+  __ delayed()->nop();
+  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
+  __ delayed()->nop();
+  __ bind(L);
+}
+
+uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
+}
+
+
+
+//=============================================================================
+
+const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
+
+int Compile::ConstantTable::calculate_table_base_offset() const {
+  return 0;  // absolute addressing, no offset
+}
+
+bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
+void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
+  ShouldNotReachHere();
+}
+
+void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+  Compile* C = ra_->C;
+  Compile::ConstantTable& constant_table = C->constant_table();
+  MacroAssembler _masm(&cbuf);
+
+  Register Rtoc = as_Register(ra_->get_encode(this));
+  CodeSection* consts_section = __ code()->consts();
+  int consts_size = consts_section->align_at_start(consts_section->size());
+  assert(constant_table.size() == consts_size, "must be equal");
+
+  if (consts_section->size()) {
+    // Materialize the constant table base.
+    address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
+    // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
+    __ relocate(relocInfo::internal_word_type);
+    __ patchable_set48(Rtoc, (long)baseaddr);
+  }
+}
+
+uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
+  // patchable_set48 (4 insts)
+  return 4 * 4;
+}
+
+#ifndef PRODUCT
+void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
+  Register r = as_Register(ra_->get_encode(this));
+  st->print("patchable_set48    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
+}
+#endif
+
+
+//=============================================================================
+#ifndef PRODUCT
+void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
+  Compile* C = ra_->C;
+
+  int framesize = C->frame_size_in_bytes();
+  int bangsize = C->bang_size_in_bytes();
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+
+  // Calls to C2R adapters often do not accept exceptional returns.
+  // We require that their callers must bang for them.  But be careful, because
+  // some VM calls (such as call site linkage) can use several kilobytes of
+  // stack.  But the stack safety zone should account for that.
+  // See bugs 4446381, 4468289, 4497237.
+  if (C->need_stack_bang(bangsize)) {
+    st->print_cr("# stack bang"); st->print("\t");
+  }
+  if (UseLEXT1) {
+    st->print("gssq     RA, FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
+  } else {
+    st->print("sd       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
+    st->print("sd       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
+  }
+  st->print("daddiu   FP, SP, -%d \n\t", wordSize*2);
+  st->print("daddiu   SP, SP, -%d \t",framesize);
+}
+#endif
+
+
+void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  Compile* C = ra_->C;
+  MacroAssembler _masm(&cbuf);
+
+  int framesize = C->frame_size_in_bytes();
+  int bangsize = C->bang_size_in_bytes();
+
+  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+
+  // Make enough room for patch_verified_entry
+  __ nop();
+  __ nop();
+
+  if (C->need_stack_bang(bangsize)) {
+    __ generate_stack_overflow_check(bangsize);
+  }
+
+  __ daddiu(SP, SP, -framesize);
+  if (UseLEXT1) {
+    __ gssq(RA, FP, SP, framesize - wordSize * 2);
+  } else {
+    __ sd(RA, SP, framesize - wordSize);
+    __ sd(FP, SP, framesize - wordSize * 2);
+  }
+  __ daddiu(FP, SP, framesize - wordSize * 2);
+
+  C->set_frame_complete(cbuf.insts_size());
+  if (C->has_mach_constant_base_node()) {
+    // NOTE: We set the table base offset here because users might be
+    // emitted before MachConstantBaseNode.
+    Compile::ConstantTable& constant_table = C->constant_table();
+    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
+  }
+}
+
+
+uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_); // too many variables; just compute it the hard way
+}
+
+int MachPrologNode::reloc() const {
+  return 0; // a large enough number
+}
+
+%}
+
+//----------ENCODING BLOCK-----------------------------------------------------
+// This block specifies the encoding classes used by the compiler to output
+// byte streams.  Encoding classes generate functions which are called by
+// Machine Instruction Nodes in order to generate the bit encoding of the
+// instruction.  Operands specify their base encoding interface with the
+// interface keyword.  There are currently supported four interfaces,
+// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
+// operand to generate a function which returns its register number when
+// queried.   CONST_INTER causes an operand to generate a function which
+// returns the value of the constant when queried.  MEMORY_INTER causes an
+// operand to generate four functions which return the Base Register, the
+// Index Register, the Scale Value, and the Offset Value of the operand when
+// queried.  COND_INTER causes an operand to generate six functions which
+// return the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional instruction.
+// Instructions specify two basic values for encoding.  They use the
+// ins_encode keyword to specify their encoding class (which must be one of
+// the class names specified in the encoding block), and they use the
+// opcode keyword to specify, in order, their primary, secondary, and
+// tertiary opcode.  Only the opcode sections which a particular instruction
+// needs for encoding need to be specified.
+encode %{
+
+  //Load byte signed
+  enc_class load_B_enc (mRegI dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  dst = $dst$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if( Assembler::is_simm16(disp) ) {
+        if (UseLEXT1) {
+          if (scale == 0) {
+            __ gslbx(as_Register(dst), as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gslbx(as_Register(dst), as_Register(base), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ addu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ addu(AT, as_Register(base), AT);
+          }
+          __ lb(as_Register(dst), AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ addu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ addu(AT, as_Register(base), AT);
+        }
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gslbx(as_Register(dst), AT, T9, 0);
+        } else {
+          __ addu(AT, AT, T9);
+          __ lb(as_Register(dst), AT, 0);
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ lb(as_Register(dst), as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gslbx(as_Register(dst), as_Register(base), T9, 0);
+        } else {
+          __ addu(AT, as_Register(base), T9);
+          __ lb(as_Register(dst), AT, 0);
+        }
+      }
+    }
+  %}
+
+  //Load byte unsigned
+  enc_class load_UB_enc (mRegI dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  dst = $dst$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ daddu(AT, as_Register(base), as_Register(index));
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        __ daddu(AT, as_Register(base), AT);
+      }
+      if( Assembler::is_simm16(disp) ) {
+        __ lbu(as_Register(dst), AT, disp);
+      } else {
+        __ move(T9, disp);
+        __ daddu(AT, AT, T9);
+        __ lbu(as_Register(dst), AT, 0);
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ lbu(as_Register(dst), as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        __ daddu(AT, as_Register(base), T9);
+        __ lbu(as_Register(dst), AT, 0);
+      }
+    }
+  %}
+
+  enc_class store_B_reg_enc (memory mem, mRegI src) %{
+    MacroAssembler _masm(&cbuf);
+    int  src = $src$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        if( Assembler::is_simm(disp, 8) ) {
+          if (UseLEXT1) {
+            __ gssbx(as_Register(src), as_Register(base), as_Register(index), disp);
+          } else {
+            __ addu(AT, as_Register(base), as_Register(index));
+            __ sb(as_Register(src), AT, disp);
+          }
+        } else if( Assembler::is_simm16(disp) ) {
+          __ addu(AT, as_Register(base), as_Register(index));
+          __ sb(as_Register(src), AT, disp);
+        } else {
+          __ addu(AT, as_Register(base), as_Register(index));
+          __ move(T9, disp);
+          if (UseLEXT1) {
+            __ gssbx(as_Register(src), AT, T9, 0);
+          } else {
+            __ addu(AT, AT, T9);
+            __ sb(as_Register(src), AT, 0);
+          }
+        }
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        if( Assembler::is_simm(disp, 8) ) {
+          if (UseLEXT1) {
+            __ gssbx(as_Register(src), AT, as_Register(base), disp);
+          } else {
+            __ addu(AT, as_Register(base), AT);
+            __ sb(as_Register(src), AT, disp);
+          }
+        } else if( Assembler::is_simm16(disp) ) {
+          __ addu(AT, as_Register(base), AT);
+          __ sb(as_Register(src), AT, disp);
+        } else {
+          __ addu(AT, as_Register(base), AT);
+          __ move(T9, disp);
+          if (UseLEXT1) {
+            __ gssbx(as_Register(src), AT, T9, 0);
+          } else {
+            __ addu(AT, AT, T9);
+            __ sb(as_Register(src), AT, 0);
+          }
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ sb(as_Register(src), as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gssbx(as_Register(src), as_Register(base), T9, 0);
+        } else {
+          __ addu(AT, as_Register(base), T9);
+          __ sb(as_Register(src), AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class store_B_immI_enc (memory mem, immI8 src) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    int value = $src$$constant;
+
+    if( index != 0 ) {
+      if (!UseLEXT1) {
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        if( Assembler::is_simm16(disp) ) {
+          if (value == 0) {
+            __ sb(R0, AT, disp);
+          } else {
+            __ move(T9, value);
+            __ sb(T9, AT, disp);
+          }
+        } else {
+          if (value == 0) {
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+            __ sb(R0, AT, 0);
+          } else {
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+            __ move(T9, value);
+            __ sb(T9, AT, 0);
+          }
+        }
+      } else {
+
+        if (scale == 0) {
+          if( Assembler::is_simm(disp, 8) ) {
+            if (value == 0) {
+              __ gssbx(R0, as_Register(base), as_Register(index), disp);
+            } else {
+              __ move(T9, value);
+              __ gssbx(T9, as_Register(base), as_Register(index), disp);
+            }
+          } else if( Assembler::is_simm16(disp) ) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+            if (value == 0) {
+              __ sb(R0, AT, disp);
+            } else {
+              __ move(T9, value);
+              __ sb(T9, AT, disp);
+            }
+          } else {
+            if (value == 0) {
+              __ daddu(AT, as_Register(base), as_Register(index));
+              __ move(T9, disp);
+              __ gssbx(R0, AT, T9, 0);
+            } else {
+              __ move(AT, disp);
+              __ move(T9, value);
+              __ daddu(AT, as_Register(base), AT);
+              __ gssbx(T9, AT, as_Register(index), 0);
+            }
+          }
+
+        } else {
+
+          if( Assembler::is_simm(disp, 8) ) {
+            __ dsll(AT, as_Register(index), scale);
+            if (value == 0) {
+              __ gssbx(R0, as_Register(base), AT, disp);
+            } else {
+              __ move(T9, value);
+              __ gssbx(T9, as_Register(base), AT, disp);
+            }
+          } else if( Assembler::is_simm16(disp) ) {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), AT);
+            if (value == 0) {
+              __ sb(R0, AT, disp);
+            } else {
+              __ move(T9, value);
+              __ sb(T9, AT, disp);
+            }
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            if (value == 0) {
+              __ daddu(AT, as_Register(base), AT);
+              __ move(T9, disp);
+              __ gssbx(R0, AT, T9, 0);
+            } else {
+              __ move(T9, disp);
+              __ daddu(AT, AT, T9);
+              __ move(T9, value);
+              __ gssbx(T9, as_Register(base), AT, 0);
+            }
+          }
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        if (value == 0) {
+          __ sb(R0, as_Register(base), disp);
+        } else {
+          __ move(AT, value);
+          __ sb(AT, as_Register(base), disp);
+        }
+      } else {
+        if (value == 0) {
+          __ move(T9, disp);
+          if (UseLEXT1) {
+            __ gssbx(R0, as_Register(base), T9, 0);
+          } else {
+            __ daddu(AT, as_Register(base), T9);
+            __ sb(R0, AT, 0);
+          }
+        } else {
+          __ move(T9, disp);
+          if (UseLEXT1) {
+            __ move(AT, value);
+            __ gssbx(AT, as_Register(base), T9, 0);
+          } else {
+            __ daddu(AT, as_Register(base), T9);
+            __ move(T9, value);
+            __ sb(T9, AT, 0);
+          }
+        }
+      }
+    }
+  %}
+
+
+  enc_class store_B_immI_enc_sync (memory mem, immI8 src) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    int value = $src$$constant;
+
+    if( index != 0 ) {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm(disp,8) ) {
+          if ( scale == 0 ) {
+            if ( value == 0 ) {
+              __ gssbx(R0, as_Register(base), as_Register(index), disp);
+            } else {
+              __ move(AT, value);
+              __ gssbx(AT, as_Register(base), as_Register(index), disp);
+            }
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            if ( value == 0 ) {
+              __ gssbx(R0, as_Register(base), AT, disp);
+            } else {
+              __ move(T9, value);
+              __ gssbx(T9, as_Register(base), AT, disp);
+            }
+          }
+        } else if ( Assembler::is_simm16(disp) ) {
+          if ( scale == 0 ) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+            if ( value == 0 ){
+              __ sb(R0, AT, disp);
+            } else {
+              __ move(T9, value);
+              __ sb(T9, AT, disp);
+            }
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), AT);
+            if ( value == 0 ) {
+              __ sb(R0, AT, disp);
+            } else {
+              __ move(T9, value);
+              __ sb(T9, AT, disp);
+            }
+          }
+        } else {
+          if ( scale == 0 ) {
+            __ move(AT, disp);
+            __ daddu(AT, as_Register(index), AT);
+            if ( value == 0 ) {
+              __ gssbx(R0, as_Register(base), AT, 0);
+            } else {
+              __ move(T9, value);
+              __ gssbx(T9, as_Register(base), AT, 0);
+            }
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+            if ( value == 0 ) {
+              __ gssbx(R0, as_Register(base), AT, 0);
+            } else {
+              __ move(T9, value);
+              __ gssbx(T9, as_Register(base), AT, 0);
+            }
+          }
+        }
+      } else { //not use loongson isa
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        if( Assembler::is_simm16(disp) ) {
+          if (value == 0) {
+            __ sb(R0, AT, disp);
+          } else {
+            __ move(T9, value);
+            __ sb(T9, AT, disp);
+          }
+        } else {
+          if (value == 0) {
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+            __ sb(R0, AT, 0);
+          } else {
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+            __ move(T9, value);
+            __ sb(T9, AT, 0);
+          }
+        }
+      }
+    } else {
+      if (UseLEXT1){
+        if ( Assembler::is_simm16(disp) ){
+          if ( value == 0 ) {
+            __ sb(R0, as_Register(base), disp);
+          } else {
+            __ move(AT, value);
+            __ sb(AT, as_Register(base), disp);
+          }
+        } else {
+          __ move(AT, disp);
+          if ( value == 0 ) {
+            __ gssbx(R0, as_Register(base), AT, 0);
+          } else {
+            __ move(T9, value);
+            __ gssbx(T9, as_Register(base), AT, 0);
+          }
+        }
+      } else {
+        if( Assembler::is_simm16(disp) ) {
+          if (value == 0) {
+            __ sb(R0, as_Register(base), disp);
+          } else {
+            __ move(AT, value);
+            __ sb(AT, as_Register(base), disp);
+          }
+        } else {
+          if (value == 0) {
+            __ move(T9, disp);
+            __ daddu(AT, as_Register(base), T9);
+            __ sb(R0, AT, 0);
+          } else {
+            __ move(T9, disp);
+            __ daddu(AT, as_Register(base), T9);
+            __ move(T9, value);
+            __ sb(T9, AT, 0);
+          }
+        }
+      }
+    }
+
+    __ sync();
+  %}
+
+  // Load Short (16bit signed)
+  enc_class load_S_enc (mRegI dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  dst = $dst$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm(disp, 8) ) {
+          if (scale == 0) {
+            __ gslhx(as_Register(dst), as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gslhx(as_Register(dst), as_Register(base), AT, disp);
+          }
+        } else if ( Assembler::is_simm16(disp) ) {
+          if (scale == 0) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+            __ lh(as_Register(dst), AT, disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), AT);
+            __ lh(as_Register(dst), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ move(AT, disp);
+            __ daddu(AT, as_Register(index), AT);
+            __ gslhx(as_Register(dst), as_Register(base), AT, 0);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+            __ gslhx(as_Register(dst), as_Register(base), AT, 0);
+          }
+        }
+      } else { // not use loongson isa
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        if( Assembler::is_simm16(disp) ) {
+          __ lh(as_Register(dst), AT, disp);
+        } else {
+          __ move(T9, disp);
+          __ daddu(AT, AT, T9);
+          __ lh(as_Register(dst), AT, 0);
+        }
+      }
+    } else { // index is 0
+      if (UseLEXT1) {
+        if ( Assembler::is_simm16(disp) ) {
+          __ lh(as_Register(dst), as_Register(base), disp);
+        } else {
+          __ move(T9, disp);
+          __ gslhx(as_Register(dst), as_Register(base), T9, 0);
+        }
+      } else { //not use loongson isa
+        if( Assembler::is_simm16(disp) ) {
+          __ lh(as_Register(dst), as_Register(base), disp);
+        } else {
+          __ move(T9, disp);
+          __ daddu(AT, as_Register(base), T9);
+          __ lh(as_Register(dst), AT, 0);
+        }
+      }
+    }
+  %}
+
+  // Load Char (16bit unsigned)
+  enc_class load_C_enc (mRegI dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  dst = $dst$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ daddu(AT, as_Register(base), as_Register(index));
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        __ daddu(AT, as_Register(base), AT);
+      }
+      if( Assembler::is_simm16(disp) ) {
+        __ lhu(as_Register(dst), AT, disp);
+      } else {
+        __ move(T9, disp);
+        __ addu(AT, AT, T9);
+        __ lhu(as_Register(dst), AT, 0);
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ lhu(as_Register(dst), as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        __ daddu(AT, as_Register(base), T9);
+        __ lhu(as_Register(dst), AT, 0);
+      }
+    }
+  %}
+
+  // Store Char (16bit unsigned)
+  enc_class store_C_reg_enc (memory mem, mRegI src) %{
+    MacroAssembler _masm(&cbuf);
+    int  src = $src$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if( Assembler::is_simm16(disp) ) {
+        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+          if (scale == 0) {
+            __ gsshx(as_Register(src), as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gsshx(as_Register(src), as_Register(base), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ addu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ addu(AT, as_Register(base), AT);
+          }
+          __ sh(as_Register(src), AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ addu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ addu(AT, as_Register(base), AT);
+        }
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gsshx(as_Register(src), AT, T9, 0);
+        } else {
+          __ addu(AT, AT, T9);
+          __ sh(as_Register(src), AT, 0);
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ sh(as_Register(src), as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gsshx(as_Register(src), as_Register(base), T9, 0);
+        } else {
+          __ addu(AT, as_Register(base), T9);
+          __ sh(as_Register(src), AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class store_C0_enc (memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if ( Assembler::is_simm16(disp) ) {
+        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+          if (scale == 0) {
+            __ gsshx(R0, as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gsshx(R0, as_Register(base), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ addu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ addu(AT, as_Register(base), AT);
+          }
+          __ sh(R0, AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ addu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ addu(AT, as_Register(base), AT);
+        }
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gsshx(R0, AT, T9, 0);
+        } else {
+          __ addu(AT, AT, T9);
+          __ sh(R0, AT, 0);
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ sh(R0, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gsshx(R0, as_Register(base), T9, 0);
+        } else {
+          __ addu(AT, as_Register(base), T9);
+          __ sh(R0, AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class load_I_enc (mRegI dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  dst = $dst$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if( Assembler::is_simm16(disp) ) {
+        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+          if (scale == 0) {
+            __ gslwx(as_Register(dst), as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gslwx(as_Register(dst), as_Register(base), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ addu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ addu(AT, as_Register(base), AT);
+          }
+          __ lw(as_Register(dst), AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ addu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ addu(AT, as_Register(base), AT);
+        }
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gslwx(as_Register(dst), AT, T9, 0);
+        } else {
+          __ addu(AT, AT, T9);
+          __ lw(as_Register(dst), AT, 0);
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ lw(as_Register(dst), as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gslwx(as_Register(dst), as_Register(base), T9, 0);
+        } else {
+          __ addu(AT, as_Register(base), T9);
+          __ lw(as_Register(dst), AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class store_I_reg_enc (memory mem, mRegI src) %{
+    MacroAssembler _masm(&cbuf);
+    int  src = $src$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if( Assembler::is_simm16(disp) ) {
+        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+          if (scale == 0) {
+            __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gsswx(as_Register(src), as_Register(base), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ addu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ addu(AT, as_Register(base), AT);
+          }
+          __ sw(as_Register(src), AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ addu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ addu(AT, as_Register(base), AT);
+        }
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gsswx(as_Register(src), AT, T9, 0);
+        } else {
+          __ addu(AT, AT, T9);
+          __ sw(as_Register(src), AT, 0);
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ sw(as_Register(src), as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gsswx(as_Register(src), as_Register(base), T9, 0);
+        } else {
+          __ addu(AT, as_Register(base), T9);
+          __ sw(as_Register(src), AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class store_I_immI_enc (memory mem, immI src) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    int value = $src$$constant;
+
+    if( index != 0 ) {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm(disp, 8) ) {
+          if ( scale == 0 ) {
+            if ( value == 0 ) {
+              __ gsswx(R0, as_Register(base), as_Register(index), disp);
+            } else {
+              __ move(T9, value);
+              __ gsswx(T9, as_Register(base), as_Register(index), disp);
+            }
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            if ( value == 0 ) {
+              __ gsswx(R0, as_Register(base), AT, disp);
+            } else {
+              __ move(T9, value);
+              __ gsswx(T9, as_Register(base), AT, disp);
+            }
+          }
+        } else if ( Assembler::is_simm16(disp) ) {
+          if ( scale == 0 ) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+            if ( value == 0 ) {
+              __ sw(R0, AT, disp);
+            } else {
+              __ move(T9, value);
+              __ sw(T9, AT, disp);
+            }
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), AT);
+            if ( value == 0 ) {
+              __ sw(R0, AT, disp);
+            } else {
+              __ move(T9, value);
+              __ sw(T9, AT, disp);
+            }
+          }
+        } else {
+          if ( scale == 0 ) {
+            __ move(T9, disp);
+            __ daddu(AT, as_Register(index), T9);
+            if ( value ==0 ) {
+              __ gsswx(R0, as_Register(base), AT, 0);
+            } else {
+              __ move(T9, value);
+              __ gsswx(T9, as_Register(base), AT, 0);
+            }
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+            if ( value == 0 ) {
+              __ gsswx(R0, as_Register(base), AT, 0);
+            } else {
+              __ move(T9, value);
+              __ gsswx(T9, as_Register(base), AT, 0);
+            }
+          }
+        }
+      } else { //not use loongson isa
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        if( Assembler::is_simm16(disp) ) {
+          if (value == 0) {
+            __ sw(R0, AT, disp);
+          } else {
+            __ move(T9, value);
+            __ sw(T9, AT, disp);
+          }
+        } else {
+          if (value == 0) {
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+            __ sw(R0, AT, 0);
+          } else {
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+            __ move(T9, value);
+            __ sw(T9, AT, 0);
+          }
+        }
+      }
+    } else {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm16(disp) ) {
+          if ( value == 0 ) {
+            __ sw(R0, as_Register(base), disp);
+          } else {
+            __ move(AT, value);
+            __ sw(AT, as_Register(base), disp);
+          }
+        } else {
+          __ move(T9, disp);
+          if ( value == 0 ) {
+            __ gsswx(R0, as_Register(base), T9, 0);
+          } else {
+            __ move(AT, value);
+            __ gsswx(AT, as_Register(base), T9, 0);
+          }
+        }
+      } else {
+        if( Assembler::is_simm16(disp) ) {
+          if (value == 0) {
+            __ sw(R0, as_Register(base), disp);
+          } else {
+            __ move(AT, value);
+            __ sw(AT, as_Register(base), disp);
+          }
+        } else {
+          if (value == 0) {
+            __ move(T9, disp);
+            __ daddu(AT, as_Register(base), T9);
+            __ sw(R0, AT, 0);
+          } else {
+            __ move(T9, disp);
+            __ daddu(AT, as_Register(base), T9);
+            __ move(T9, value);
+            __ sw(T9, AT, 0);
+          }
+        }
+      }
+    }
+  %}
+
+  enc_class load_N_enc (mRegN dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  dst = $dst$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    relocInfo::relocType disp_reloc = $mem->disp_reloc();
+    assert(disp_reloc == relocInfo::none, "cannot have disp");
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ daddu(AT, as_Register(base), as_Register(index));
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        __ daddu(AT, as_Register(base), AT);
+      }
+      if( Assembler::is_simm16(disp) ) {
+        __ lwu(as_Register(dst), AT, disp);
+      } else {
+        __ set64(T9, disp);
+        __ daddu(AT, AT, T9);
+        __ lwu(as_Register(dst), AT, 0);
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ lwu(as_Register(dst), as_Register(base), disp);
+      } else {
+        __ set64(T9, disp);
+        __ daddu(AT, as_Register(base), T9);
+        __ lwu(as_Register(dst), AT, 0);
+      }
+    }
+  %}
+
+
+  enc_class load_P_enc (mRegP dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  dst = $dst$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    relocInfo::relocType disp_reloc = $mem->disp_reloc();
+    assert(disp_reloc == relocInfo::none, "cannot have disp");
+
+    if( index != 0 ) {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm(disp, 8) ) {
+          if ( scale != 0 ) {
+            __ dsll(AT, as_Register(index), scale);
+            __ gsldx(as_Register(dst), as_Register(base), AT, disp);
+          } else {
+            __ gsldx(as_Register(dst), as_Register(base), as_Register(index), disp);
+          }
+        } else if ( Assembler::is_simm16(disp) ){
+          if ( scale != 0 ) {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, AT, as_Register(base));
+          } else {
+            __ daddu(AT, as_Register(index), as_Register(base));
+          }
+          __ ld(as_Register(dst), AT, disp);
+        } else {
+          if ( scale != 0 ) {
+            __ dsll(AT, as_Register(index), scale);
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+          } else {
+            __ move(T9, disp);
+            __ daddu(AT, as_Register(index), T9);
+          }
+          __ gsldx(as_Register(dst), as_Register(base), AT, 0);
+        }
+      } else { //not use loongson isa
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        if( Assembler::is_simm16(disp) ) {
+          __ ld(as_Register(dst), AT, disp);
+        } else {
+          __ set64(T9, disp);
+          __ daddu(AT, AT, T9);
+          __ ld(as_Register(dst), AT, 0);
+        }
+      }
+    } else {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm16(disp) ){
+          __ ld(as_Register(dst), as_Register(base), disp);
+        } else {
+          __ set64(T9, disp);
+          __ gsldx(as_Register(dst), as_Register(base), T9, 0);
+        }
+      } else { //not use loongson isa
+        if( Assembler::is_simm16(disp) ) {
+          __ ld(as_Register(dst), as_Register(base), disp);
+        } else {
+          __ set64(T9, disp);
+          __ daddu(AT, as_Register(base), T9);
+          __ ld(as_Register(dst), AT, 0);
+        }
+      }
+    }
+  %}
+
+  // Load acquire.
+  // load_P_enc + sync
+  enc_class load_P_enc_ac (mRegP dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  dst = $dst$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    relocInfo::relocType disp_reloc = $mem->disp_reloc();
+    assert(disp_reloc == relocInfo::none, "cannot have disp");
+
+    if( index != 0 ) {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm(disp, 8) ) {
+          if ( scale != 0 ) {
+            __ dsll(AT, as_Register(index), scale);
+            __ gsldx(as_Register(dst), as_Register(base), AT, disp);
+          } else {
+            __ gsldx(as_Register(dst), as_Register(base), as_Register(index), disp);
+          }
+        } else if ( Assembler::is_simm16(disp) ){
+          if ( scale != 0 ) {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, AT, as_Register(base));
+          } else {
+            __ daddu(AT, as_Register(index), as_Register(base));
+          }
+          __ ld(as_Register(dst), AT, disp);
+        } else {
+          if ( scale != 0 ) {
+            __ dsll(AT, as_Register(index), scale);
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+          } else {
+            __ move(T9, disp);
+            __ daddu(AT, as_Register(index), T9);
+          }
+          __ gsldx(as_Register(dst), as_Register(base), AT, 0);
+        }
+      } else { //not use loongson isa
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        if( Assembler::is_simm16(disp) ) {
+          __ ld(as_Register(dst), AT, disp);
+        } else {
+          __ set64(T9, disp);
+          __ daddu(AT, AT, T9);
+          __ ld(as_Register(dst), AT, 0);
+        }
+      }
+    } else {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm16(disp) ){
+          __ ld(as_Register(dst), as_Register(base), disp);
+        } else {
+          __ set64(T9, disp);
+          __ gsldx(as_Register(dst), as_Register(base), T9, 0);
+        }
+      } else { //not use loongson isa
+        if( Assembler::is_simm16(disp) ) {
+          __ ld(as_Register(dst), as_Register(base), disp);
+        } else {
+          __ set64(T9, disp);
+          __ daddu(AT, as_Register(base), T9);
+          __ ld(as_Register(dst), AT, 0);
+        }
+      }
+    }
+    __ sync();
+  %}
+
+  enc_class store_P_reg_enc (memory mem, mRegP src) %{
+    MacroAssembler _masm(&cbuf);
+    int  src = $src$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (UseLEXT1){
+        if ( Assembler::is_simm(disp, 8) ) {
+          if ( scale == 0 ) {
+            __ gssdx(as_Register(src), as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gssdx(as_Register(src), as_Register(base), AT, disp);
+          }
+        } else if ( Assembler::is_simm16(disp) ) {
+          if ( scale == 0 ) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), AT);
+          }
+          __ sd(as_Register(src), AT, disp);
+        } else {
+          if ( scale == 0 ) {
+            __ move(T9, disp);
+            __ daddu(AT, as_Register(index), T9);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+          }
+          __ gssdx(as_Register(src), as_Register(base), AT, 0);
+        }
+      } else { //not use loongson isa
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        if( Assembler::is_simm16(disp) ) {
+          __ sd(as_Register(src), AT, disp);
+        } else {
+          __ move(T9, disp);
+          __ daddu(AT, AT, T9);
+          __ sd(as_Register(src), AT, 0);
+        }
+      }
+    } else {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm16(disp) ) {
+          __ sd(as_Register(src), as_Register(base), disp);
+        } else {
+          __ move(T9, disp);
+          __ gssdx(as_Register(src), as_Register(base), T9, 0);
+        }
+      } else {
+        if( Assembler::is_simm16(disp) ) {
+          __ sd(as_Register(src), as_Register(base), disp);
+        } else {
+          __ move(T9, disp);
+          __ daddu(AT, as_Register(base), T9);
+          __ sd(as_Register(src), AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class store_N_reg_enc (memory mem, mRegN src) %{
+    MacroAssembler _masm(&cbuf);
+    int  src = $src$$reg;
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (UseLEXT1){
+        if ( Assembler::is_simm(disp, 8) ) {
+          if ( scale == 0 ) {
+            __ gsswx(as_Register(src), as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gsswx(as_Register(src), as_Register(base), AT, disp);
+          }
+        } else if ( Assembler::is_simm16(disp) ) {
+          if ( scale == 0 ) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), AT);
+          }
+          __ sw(as_Register(src), AT, disp);
+        } else {
+          if ( scale == 0 ) {
+            __ move(T9, disp);
+            __ daddu(AT, as_Register(index), T9);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ move(T9, disp);
+            __ daddu(AT, AT, T9);
+          }
+          __ gsswx(as_Register(src), as_Register(base), AT, 0);
+        }
+      } else { //not use loongson isa
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        if( Assembler::is_simm16(disp) ) {
+          __ sw(as_Register(src), AT, disp);
+        } else {
+          __ move(T9, disp);
+          __ daddu(AT, AT, T9);
+          __ sw(as_Register(src), AT, 0);
+        }
+      }
+    } else {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm16(disp) ) {
+          __ sw(as_Register(src), as_Register(base), disp);
+        } else {
+          __ move(T9, disp);
+          __ gsswx(as_Register(src), as_Register(base), T9, 0);
+        }
+      } else {
+        if( Assembler::is_simm16(disp) ) {
+          __ sw(as_Register(src), as_Register(base), disp);
+        } else {
+          __ move(T9, disp);
+          __ daddu(AT, as_Register(base), T9);
+          __ sw(as_Register(src), AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class store_P_immP0_enc (memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        if ( Assembler::is_simm16(disp) ) {
+          if (UseLEXT1 && Assembler::is_simm(disp, 8)) {
+            __ gssdx(R0, as_Register(base), as_Register(index), disp);
+          } else {
+            __ daddu(AT, as_Register(base), as_Register(index));
+            __ sd(R0, AT, disp);
+          }
+        } else {
+          __ daddu(AT, as_Register(base), as_Register(index));
+          __ move(T9, disp);
+          if (UseLEXT1) {
+            __ gssdx(R0, AT, T9, 0);
+          } else {
+            __ daddu(AT, AT, T9);
+            __ sd(R0, AT, 0);
+          }
+        }
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        if( Assembler::is_simm16(disp) ) {
+          if (UseLEXT1 && Assembler::is_simm(disp, 8)) {
+            __ gssdx(R0, as_Register(base), AT, disp);
+          } else {
+            __ daddu(AT, as_Register(base), AT);
+            __ sd(R0, AT, disp);
+          }
+        } else {
+          __ daddu(AT, as_Register(base), AT);
+          __ move(T9, disp);
+          if (UseLEXT1) {
+            __ gssdx(R0, AT, T9, 0);
+          } else {
+            __ daddu(AT, AT, T9);
+            __ sd(R0, AT, 0);
+          }
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ sd(R0, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gssdx(R0, as_Register(base), T9, 0);
+        } else {
+          __ daddu(AT, as_Register(base), T9);
+          __ sd(R0, AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class storeImmN0_enc(memory mem, ImmN0 src) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if(index!=0){
+      if (scale == 0) {
+        __ daddu(AT, as_Register(base), as_Register(index));
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        __ daddu(AT, as_Register(base), AT);
+      }
+
+      if( Assembler::is_simm16(disp) ) {
+        __ sw(R0, AT, disp);
+      } else {
+        __ move(T9, disp);
+        __ daddu(AT, AT, T9);
+        __ sw(R0, AT, 0);
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ sw(R0, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        __ daddu(AT, as_Register(base), T9);
+        __ sw(R0, AT, 0);
+      }
+    }
+  %}
+
+  enc_class load_L_enc (mRegL dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    Register  dst_reg = as_Register($dst$$reg);
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ daddu(AT, as_Register(base), as_Register(index));
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        __ daddu(AT, as_Register(base), AT);
+      }
+      if( Assembler::is_simm16(disp) ) {
+        __ ld(dst_reg, AT, disp);
+      } else {
+        __ move(T9, disp);
+        __ daddu(AT, AT, T9);
+        __ ld(dst_reg, AT, 0);
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ ld(dst_reg, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        __ daddu(AT, as_Register(base), T9);
+        __ ld(dst_reg, AT, 0);
+      }
+    }
+  %}
+
+  enc_class store_L_reg_enc (memory mem, mRegL src) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    Register  src_reg = as_Register($src$$reg);
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ daddu(AT, as_Register(base), as_Register(index));
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        __ daddu(AT, as_Register(base), AT);
+      }
+      if( Assembler::is_simm16(disp) ) {
+        __ sd(src_reg, AT, disp);
+      } else {
+        __ move(T9, disp);
+        __ daddu(AT, AT, T9);
+        __ sd(src_reg, AT, 0);
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ sd(src_reg, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        __ daddu(AT, as_Register(base), T9);
+        __ sd(src_reg, AT, 0);
+      }
+    }
+  %}
+
+  enc_class store_L_immL_0_enc (memory mem, immL_0 src) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ daddu(AT, as_Register(base), as_Register(index));
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        __ daddu(AT, as_Register(base), AT);
+      }
+      if( Assembler::is_simm16(disp) ) {
+        __ sd(R0, AT, disp);
+      } else {
+        __ move(T9, disp);
+        __ addu(AT, AT, T9);
+        __ sd(R0, AT, 0);
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ sd(R0, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        __ addu(AT, as_Register(base), T9);
+        __ sd(R0, AT, 0);
+      }
+    }
+  %}
+
+  enc_class store_L_immL_enc (memory mem, immL src) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    long  imm = $src$$constant;
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ daddu(AT, as_Register(base), as_Register(index));
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        __ daddu(AT, as_Register(base), AT);
+      }
+      if( Assembler::is_simm16(disp) ) {
+        __ set64(T9, imm);
+        __ sd(T9, AT, disp);
+      } else {
+        __ move(T9, disp);
+        __ addu(AT, AT, T9);
+        __ set64(T9, imm);
+        __ sd(T9, AT, 0);
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ move(AT, as_Register(base));
+        __ set64(T9, imm);
+        __ sd(T9, AT, disp);
+      } else {
+        __ move(T9, disp);
+        __ addu(AT, as_Register(base), T9);
+        __ set64(T9, imm);
+        __ sd(T9, AT, 0);
+      }
+    }
+  %}
+
+  enc_class load_F_enc (regF dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    FloatRegister dst = $dst$$FloatRegister;
+
+    if( index != 0 ) {
+      if( Assembler::is_simm16(disp) ) {
+        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+          if (scale == 0) {
+            __ gslwxc1(dst, as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gslwxc1(dst, as_Register(base), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), AT);
+          }
+          __ lwc1(dst, AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gslwxc1(dst, AT, T9, 0);
+        } else {
+          __ daddu(AT, AT, T9);
+          __ lwc1(dst, AT, 0);
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ lwc1(dst, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gslwxc1(dst, as_Register(base), T9, 0);
+        } else {
+          __ daddu(AT, as_Register(base), T9);
+          __ lwc1(dst, AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class store_F_reg_enc (memory mem, regF src) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    FloatRegister src = $src$$FloatRegister;
+
+    if( index != 0 ) {
+      if ( Assembler::is_simm16(disp) ) {
+        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+          if (scale == 0) {
+            __ gsswxc1(src, as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gsswxc1(src, as_Register(base), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), AT);
+          }
+          __ swc1(src, AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gsswxc1(src, AT, T9, 0);
+        } else {
+          __ daddu(AT, AT, T9);
+          __ swc1(src, AT, 0);
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ swc1(src, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gsswxc1(src, as_Register(base), T9, 0);
+        } else {
+          __ daddu(AT, as_Register(base), T9);
+          __ swc1(src, AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class load_D_enc (regD dst, memory mem) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+
+    if ( index != 0 ) {
+      if ( Assembler::is_simm16(disp) ) {
+        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+          if (scale == 0) {
+            __ gsldxc1(dst_reg, as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gsldxc1(dst_reg, as_Register(base), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), AT);
+          }
+          __ ldc1(dst_reg, AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gsldxc1(dst_reg, AT, T9, 0);
+        } else {
+          __ addu(AT, AT, T9);
+          __ ldc1(dst_reg, AT, 0);
+        }
+      }
+    } else {
+      if( Assembler::is_simm16(disp) ) {
+        __ ldc1(dst_reg, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gsldxc1(dst_reg, as_Register(base), T9, 0);
+        } else {
+          __ addu(AT, as_Register(base), T9);
+          __ ldc1(dst_reg, AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class store_D_reg_enc (memory mem, regD src) %{
+    MacroAssembler _masm(&cbuf);
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+    FloatRegister src_reg = as_FloatRegister($src$$reg);
+
+    if ( index != 0 ) {
+      if ( Assembler::is_simm16(disp) ) {
+        if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
+          if (scale == 0) {
+            __ gssdxc1(src_reg, as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gssdxc1(src_reg, as_Register(base), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), AT);
+          }
+          __ sdc1(src_reg, AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), AT);
+        }
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gssdxc1(src_reg, AT, T9, 0);
+        } else {
+          __ addu(AT, AT, T9);
+          __ sdc1(src_reg, AT, 0);
+        }
+      }
+    } else {
+      if ( Assembler::is_simm16(disp) ) {
+        __ sdc1(src_reg, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gssdxc1(src_reg, as_Register(base), T9, 0);
+        } else {
+          __ addu(AT, as_Register(base), T9);
+          __ sdc1(src_reg, AT, 0);
+        }
+      }
+    }
+  %}
+
+  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
+    MacroAssembler _masm(&cbuf);
+    // This is the instruction starting address for relocation info.
+    __ block_comment("Java_To_Runtime");
+    cbuf.set_insts_mark();
+    __ relocate(relocInfo::runtime_call_type);
+    __ patchable_call((address)$meth$$method);
+  %}
+
+  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
+    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
+    // who we intended to call.
+    MacroAssembler _masm(&cbuf);
+    address addr = (address)$meth$$method;
+    address call;
+    __ block_comment("Java_Static_Call");
+
+    if ( !_method ) {
+      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
+      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
+    } else if(_optimized_virtual) {
+      call = __ trampoline_call(AddressLiteral(addr, relocInfo::opt_virtual_call_type), &cbuf);
+    } else {
+      call = __ trampoline_call(AddressLiteral(addr, relocInfo::static_call_type), &cbuf);
+    }
+
+    if (call == NULL) {
+      ciEnv::current()->record_failure("CodeCache is full");
+      return;
+    }
+
+    if( _method ) {  // Emit stub for static call
+      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
+      if (stub == NULL) {
+        ciEnv::current()->record_failure("CodeCache is full");
+        return;
+      }
+    }
+  %}
+
+
+  //
+  // [Ref: LIR_Assembler::ic_call() ]
+  //
+  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
+    MacroAssembler _masm(&cbuf);
+    __ block_comment("Java_Dynamic_Call");
+    __ ic_call((address)$meth$$method);
+  %}
+
+
+  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
+    Register result = $result$$Register;
+    Register sub    = $sub$$Register;
+    Register super  = $super$$Register;
+    Register length = $tmp$$Register;
+    Register tmp    = T9;
+    Label miss;
+
+    // result may be the same as sub
+    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
+    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
+    //    4bc     mov   S2, NULL #@loadConP
+    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
+    //
+    MacroAssembler _masm(&cbuf);
+    Label done;
+    __ check_klass_subtype_slow_path(sub, super, length, tmp,
+        NULL, &miss,
+        /*set_cond_codes:*/ true);
+    // Refer to X86_64's RDI
+    __ move(result, 0);
+    __ b(done);
+    __ delayed()->nop();
+
+    __ bind(miss);
+    __ move(result, 1);
+    __ bind(done);
+  %}
+
+%}
+
+
+//---------MIPS FRAME--------------------------------------------------------------
+// Definition of frame structure and management information.
+//
+//  S T A C K   L A Y O U T    Allocators stack-slot number
+//                             |   (to get allocators register number
+//  G  Owned by    |        |  v    add SharedInfo::stack0)
+//  r   CALLER     |        |
+//  o     |        +--------+      pad to even-align allocators stack-slot
+//  w     V        |  pad0  |        numbers; owned by CALLER
+//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
+//  h     ^        |   in   |  5
+//        |        |  args  |  4   Holes in incoming args owned by SELF
+//  |     |    old |        |  3
+//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
+//  v     |        |  ret   |  3   return address
+//     Owned by    +--------+
+//      Self       |  pad2  |  2   pad to align old SP
+//        |        +--------+  1
+//        |        | locks  |  0
+//        |        +--------+----> SharedInfo::stack0, even aligned
+//        |        |  pad1  | 11   pad to align new SP
+//        |        +--------+
+//        |        |        | 10
+//        |        | spills |  9   spills
+//        V        |        |  8   (pad0 slot for callee)
+//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
+//        ^        |  out   |  7
+//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
+//   Owned by  new |        |
+//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
+//                  |        |
+//
+// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
+//         known from SELF's arguments and the Java calling convention.
+//         Region 6-7 is determined per call site.
+// Note 2: If the calling convention leaves holes in the incoming argument
+//         area, those holes are owned by SELF.  Holes in the outgoing area
+//         are owned by the CALLEE.  Holes should not be nessecary in the
+//         incoming area, as the Java calling convention is completely under
+//         the control of the AD file.  Doubles can be sorted and packed to
+//         avoid holes.  Holes in the outgoing arguments may be nessecary for
+//         varargs C calling conventions.
+// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
+//         even aligned with pad0 as needed.
+//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
+//         region 6-11 is even aligned; it may be padded out more so that
+//         the region from SP to FP meets the minimum stack alignment.
+// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
+//         alignment.  Region 11, pad1, may be dynamically extended so that
+//         SP meets the minimum alignment.
+
+
+frame %{
+
+  stack_direction(TOWARDS_LOW);
+
+  // These two registers define part of the calling convention
+  // between compiled code and the interpreter.
+  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
+  // for more information.
+
+  inline_cache_reg(T1);                // Inline Cache Register
+  interpreter_method_oop_reg(S3);      // Method Oop Register when calling interpreter
+
+  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
+  cisc_spilling_operand_name(indOffset32);
+
+  // Number of stack slots consumed by locking an object
+  // generate Compile::sync_stack_slots
+  sync_stack_slots(2);
+
+  frame_pointer(SP);
+
+  // Interpreter stores its frame pointer in a register which is
+  // stored to the stack by I2CAdaptors.
+  // I2CAdaptors convert from interpreted java to compiled java.
+
+  interpreter_frame_pointer(FP);
+
+  // generate Matcher::stack_alignment
+  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
+
+  // Number of stack slots between incoming argument block and the start of
+  // a new frame.  The PROLOG must add this many slots to the stack.  The
+  // EPILOG must remove this many slots.
+  in_preserve_stack_slots(4);  //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp
+
+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
+  // for calls to C.  Supports the var-args backing area for register parms.
+  varargs_C_out_slots_killed(0);
+
+  // The after-PROLOG location of the return address.  Location of
+  // return address specifies a type (REG or STACK) and a number
+  // representing the register number (i.e. - use a register name) or
+  // stack slot.
+  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
+  // Otherwise, it is above the locks and verification slot and alignment word
+  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
+  return_addr(REG RA);
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots.  Passed an array
+  // of ideal registers called "sig" and a "length" count.  Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE.  Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+
+
+  // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing)
+  // StartNode::calling_convention call this.
+  calling_convention %{
+    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
+  %}
+
+
+
+
+  // Body of function which returns an integer array locating
+  // arguments either in registers or in stack slots.  Passed an array
+  // of ideal registers called "sig" and a "length" count.  Stack-slot
+  // offsets are based on outgoing arguments, i.e. a CALLER setting up
+  // arguments for a CALLEE.  Incoming stack arguments are
+  // automatically biased by the preserve_stack_slots field above.
+
+
+  // SEE CallRuntimeNode::calling_convention for more information.
+  c_calling_convention %{
+   (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
+  %}
+
+
+  // Location of C & interpreter return values
+  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
+  // SEE Matcher::match.
+  c_return_value %{
+    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
+                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
+    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
+    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
+    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
+  %}
+
+  // Location of return values
+  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
+  // SEE Matcher::match.
+
+  return_value %{
+    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
+                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
+    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
+    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
+    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
+  %}
+
+%}
+
+//----------ATTRIBUTES---------------------------------------------------------
+//----------Operand Attributes-------------------------------------------------
+op_attrib op_cost(0);        // Required cost attribute
+
+//----------Instruction Attributes---------------------------------------------
+ins_attrib ins_cost(100);       // Required cost attribute
+ins_attrib ins_size(32);         // Required size attribute (in bits)
+ins_attrib ins_pc_relative(0);  // Required PC Relative flag
+ins_attrib ins_short_branch(0); // Required flag: is this instruction a
+                                // non-matching short branch variant of some
+                                                            // long branch?
+ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
+                                // specifies the alignment that some part of the instruction (not
+                                // necessarily the start) requires.  If > 1, a compute_padding()
+                                // function must be provided for the instruction
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+// Vectors
+operand vecD() %{
+  constraint(ALLOC_IN_RC(dbl_reg));
+  match(VecD);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Flags register, used as output of compare instructions
+operand FlagsReg() %{
+  constraint(ALLOC_IN_RC(t0_reg));
+  match(RegFlags);
+
+  format %{ "T0" %}
+  interface(REG_INTER);
+%}
+
+//----------Simple Operands----------------------------------------------------
+// TODO: Should we need to define some more special immediate number ?
+// Immediate Operands
+// Integer Immediate
+operand immI() %{
+  match(ConI);
+  // TODO: should not match immI8 here LEE
+  match(immI8);
+
+  op_cost(20);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI8() %{
+  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI16() %{
+  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
+  match(ConI);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_M65536() %{
+  predicate(n->get_int() == -65536);
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for decrement
+operand immI_M1() %{
+  predicate(n->get_int() == -1);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for test vs zero
+operand immI_0() %{
+  predicate(n->get_int() == 0);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for increment
+operand immI_1() %{
+  predicate(n->get_int() == 1);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constants for increment
+operand immI_16() %{
+  predicate(n->get_int() == 16);
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_24() %{
+  predicate(n->get_int() == 24);
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for long shifts
+operand immI_32() %{
+  predicate(n->get_int() == 32);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Constant for byte-wide masking
+operand immI_255() %{
+  predicate(n->get_int() == 255);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_65535() %{
+  predicate(n->get_int() == 65535);
+  match(ConI);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_MaxI() %{
+  predicate(n->get_int() == 2147483647);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_M32767_32768() %{
+  predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768));
+  match(ConI);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Valid scale values for addressing modes
+operand immI_0_3() %{
+  predicate(0 <= n->get_int() && (n->get_int() <= 3));
+  match(ConI);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_0_31() %{
+  predicate(n->get_int() >= 0 && n->get_int() <= 31);
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_0_32767() %{
+  predicate(n->get_int() >= 0 && n->get_int() <= 32767);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_0_65535() %{
+  predicate(n->get_int() >= 0 && n->get_int() <= 65535);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immI_32_63() %{
+  predicate(n->get_int() >= 32 && n->get_int() <= 63);
+  match(ConI);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Operand for non-negtive integer mask
+operand immI_nonneg_mask() %{
+  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
+  match(ConI);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate
+operand immL() %{
+  match(ConL);
+
+  op_cost(20);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate 8-bit
+operand immL8() %{
+  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
+  match(ConL);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL16() %{
+  predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767));
+  match(ConL);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate 32-bit signed
+operand immL32() %{
+  predicate(n->get_long() == (int)(n->get_long()));
+  match(ConL);
+
+  op_cost(15);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// bit 3..6 zero
+operand immL_M121() %{
+  predicate(n->get_long() == -121L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// bit 0..2 zero
+operand immL_M8() %{
+  predicate(n->get_long() == -8L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// bit 1..2 zero
+operand immL_M7() %{
+  predicate(n->get_long() == -7L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// bit 2 zero
+operand immL_M5() %{
+  predicate(n->get_long() == -5L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// bit 0..1 zero
+operand immL_M4() %{
+  predicate(n->get_long() == -4L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_M1() %{
+  predicate(n->get_long() == -1L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate zero
+operand immL_0() %{
+  predicate(n->get_long() == 0L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_7() %{
+  predicate(n->get_long() == 7L);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Long Immediate: low 32-bit mask
+operand immL_MaxUI() %{
+  predicate(n->get_long() == 0xFFFFFFFFL);
+  match(ConL);
+  op_cost(20);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_M32767_32768() %{
+  predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768));
+  match(ConL);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immL_0_65535() %{
+  predicate(n->get_long() >= 0 && n->get_long() <= 65535);
+  match(ConL);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Operand for non-negtive long mask
+operand immL_nonneg_mask() %{
+  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
+  match(ConL);
+
+  op_cost(0);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate
+operand immP() %{
+  match(ConP);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immP_0() %{
+  predicate(n->get_ptr() == 0);
+  match(ConP);
+  op_cost(0);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate: 64-bit
+operand immP_no_oop_cheap() %{
+  predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3));
+  match(ConP);
+
+  op_cost(5);
+  // formats are generated automatically for constants and base registers
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer for polling page
+operand immP_poll() %{
+  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
+  match(ConP);
+  op_cost(5);
+
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Pointer Immediate
+operand immN() %{
+  match(ConN);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+operand immNKlass() %{
+  match(ConNKlass);
+
+  op_cost(10);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// NULL Pointer Immediate
+operand immN_0() %{
+  predicate(n->get_narrowcon() == 0);
+  match(ConN);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Single-precision floating-point immediate
+operand immF() %{
+  match(ConF);
+
+  op_cost(20);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Single-precision floating-point zero
+operand immF_0() %{
+  predicate(jint_cast(n->getf()) == 0);
+  match(ConF);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double-precision floating-point immediate
+operand immD() %{
+  match(ConD);
+
+  op_cost(20);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Double-precision floating-point zero
+operand immD_0() %{
+  predicate(jlong_cast(n->getd()) == 0);
+  match(ConD);
+
+  op_cost(5);
+  format %{ %}
+  interface(CONST_INTER);
+%}
+
+// Register Operands
+// Integer Register
+operand mRegI() %{
+  constraint(ALLOC_IN_RC(int_reg));
+  match(RegI);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand no_Ax_mRegI() %{
+  constraint(ALLOC_IN_RC(no_Ax_int_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{  %}
+  interface(REG_INTER);
+%}
+
+operand mS0RegI() %{
+  constraint(ALLOC_IN_RC(s0_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S0" %}
+  interface(REG_INTER);
+%}
+
+operand mS1RegI() %{
+  constraint(ALLOC_IN_RC(s1_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S1" %}
+  interface(REG_INTER);
+%}
+
+operand mS2RegI() %{
+  constraint(ALLOC_IN_RC(s2_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S2" %}
+  interface(REG_INTER);
+%}
+
+operand mS3RegI() %{
+  constraint(ALLOC_IN_RC(s3_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S3" %}
+  interface(REG_INTER);
+%}
+
+operand mS4RegI() %{
+  constraint(ALLOC_IN_RC(s4_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S4" %}
+  interface(REG_INTER);
+%}
+
+operand mS5RegI() %{
+  constraint(ALLOC_IN_RC(s5_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S5" %}
+  interface(REG_INTER);
+%}
+
+operand mS6RegI() %{
+  constraint(ALLOC_IN_RC(s6_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S6" %}
+  interface(REG_INTER);
+%}
+
+operand mS7RegI() %{
+  constraint(ALLOC_IN_RC(s7_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "S7" %}
+  interface(REG_INTER);
+%}
+
+
+operand mT0RegI() %{
+  constraint(ALLOC_IN_RC(t0_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T0" %}
+  interface(REG_INTER);
+%}
+
+operand mT1RegI() %{
+  constraint(ALLOC_IN_RC(t1_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T1" %}
+  interface(REG_INTER);
+%}
+
+operand mT2RegI() %{
+  constraint(ALLOC_IN_RC(t2_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T2" %}
+  interface(REG_INTER);
+%}
+
+operand mT3RegI() %{
+  constraint(ALLOC_IN_RC(t3_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T3" %}
+  interface(REG_INTER);
+%}
+
+operand mT8RegI() %{
+  constraint(ALLOC_IN_RC(t8_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T8" %}
+  interface(REG_INTER);
+%}
+
+operand mT9RegI() %{
+  constraint(ALLOC_IN_RC(t9_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "T9" %}
+  interface(REG_INTER);
+%}
+
+operand mA0RegI() %{
+  constraint(ALLOC_IN_RC(a0_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A0" %}
+  interface(REG_INTER);
+%}
+
+operand mA1RegI() %{
+  constraint(ALLOC_IN_RC(a1_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A1" %}
+  interface(REG_INTER);
+%}
+
+operand mA2RegI() %{
+  constraint(ALLOC_IN_RC(a2_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A2" %}
+  interface(REG_INTER);
+%}
+
+operand mA3RegI() %{
+  constraint(ALLOC_IN_RC(a3_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A3" %}
+  interface(REG_INTER);
+%}
+
+operand mA4RegI() %{
+  constraint(ALLOC_IN_RC(a4_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A4" %}
+  interface(REG_INTER);
+%}
+
+operand mA5RegI() %{
+  constraint(ALLOC_IN_RC(a5_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A5" %}
+  interface(REG_INTER);
+%}
+
+operand mA6RegI() %{
+  constraint(ALLOC_IN_RC(a6_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A6" %}
+  interface(REG_INTER);
+%}
+
+operand mA7RegI() %{
+  constraint(ALLOC_IN_RC(a7_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "A7" %}
+  interface(REG_INTER);
+%}
+
+operand mV0RegI() %{
+  constraint(ALLOC_IN_RC(v0_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "V0" %}
+  interface(REG_INTER);
+%}
+
+operand mV1RegI() %{
+  constraint(ALLOC_IN_RC(v1_reg));
+  match(RegI);
+  match(mRegI);
+
+  format %{ "V1" %}
+  interface(REG_INTER);
+%}
+
+operand mRegN() %{
+  constraint(ALLOC_IN_RC(int_reg));
+  match(RegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t0_RegN() %{
+  constraint(ALLOC_IN_RC(t0_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t1_RegN() %{
+  constraint(ALLOC_IN_RC(t1_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t2_RegN() %{
+  constraint(ALLOC_IN_RC(t2_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t3_RegN() %{
+  constraint(ALLOC_IN_RC(t3_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t8_RegN() %{
+  constraint(ALLOC_IN_RC(t8_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t9_RegN() %{
+  constraint(ALLOC_IN_RC(t9_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a0_RegN() %{
+  constraint(ALLOC_IN_RC(a0_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a1_RegN() %{
+  constraint(ALLOC_IN_RC(a1_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a2_RegN() %{
+  constraint(ALLOC_IN_RC(a2_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a3_RegN() %{
+  constraint(ALLOC_IN_RC(a3_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a4_RegN() %{
+  constraint(ALLOC_IN_RC(a4_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a5_RegN() %{
+  constraint(ALLOC_IN_RC(a5_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a6_RegN() %{
+  constraint(ALLOC_IN_RC(a6_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a7_RegN() %{
+  constraint(ALLOC_IN_RC(a7_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s0_RegN() %{
+  constraint(ALLOC_IN_RC(s0_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s1_RegN() %{
+  constraint(ALLOC_IN_RC(s1_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s2_RegN() %{
+  constraint(ALLOC_IN_RC(s2_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s3_RegN() %{
+  constraint(ALLOC_IN_RC(s3_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s4_RegN() %{
+  constraint(ALLOC_IN_RC(s4_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s5_RegN() %{
+  constraint(ALLOC_IN_RC(s5_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s6_RegN() %{
+  constraint(ALLOC_IN_RC(s6_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s7_RegN() %{
+  constraint(ALLOC_IN_RC(s7_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand v0_RegN() %{
+  constraint(ALLOC_IN_RC(v0_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand v1_RegN() %{
+  constraint(ALLOC_IN_RC(v1_reg));
+  match(RegN);
+  match(mRegN);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Pointer Register
+operand mRegP() %{
+  constraint(ALLOC_IN_RC(p_reg));
+  match(RegP);
+  match(a0_RegP);
+
+  format %{  %}
+  interface(REG_INTER);
+%}
+
+operand no_T8_mRegP() %{
+  constraint(ALLOC_IN_RC(no_T8_p_reg));
+  match(RegP);
+  match(mRegP);
+
+  format %{  %}
+  interface(REG_INTER);
+%}
+
+operand s0_RegP()
+%{
+  constraint(ALLOC_IN_RC(s0_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s1_RegP()
+%{
+  constraint(ALLOC_IN_RC(s1_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s2_RegP()
+%{
+  constraint(ALLOC_IN_RC(s2_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s3_RegP()
+%{
+  constraint(ALLOC_IN_RC(s3_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s4_RegP()
+%{
+  constraint(ALLOC_IN_RC(s4_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s5_RegP()
+%{
+  constraint(ALLOC_IN_RC(s5_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s6_RegP()
+%{
+  constraint(ALLOC_IN_RC(s6_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s7_RegP()
+%{
+  constraint(ALLOC_IN_RC(s7_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t0_RegP()
+%{
+  constraint(ALLOC_IN_RC(t0_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t1_RegP()
+%{
+  constraint(ALLOC_IN_RC(t1_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t2_RegP()
+%{
+  constraint(ALLOC_IN_RC(t2_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t3_RegP()
+%{
+  constraint(ALLOC_IN_RC(t3_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t8_RegP()
+%{
+  constraint(ALLOC_IN_RC(t8_long_reg));
+  match(RegP);
+  match(mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t9_RegP()
+%{
+  constraint(ALLOC_IN_RC(t9_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a0_RegP()
+%{
+  constraint(ALLOC_IN_RC(a0_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a1_RegP()
+%{
+  constraint(ALLOC_IN_RC(a1_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a2_RegP()
+%{
+  constraint(ALLOC_IN_RC(a2_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a3_RegP()
+%{
+  constraint(ALLOC_IN_RC(a3_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a4_RegP()
+%{
+  constraint(ALLOC_IN_RC(a4_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+
+operand a5_RegP()
+%{
+  constraint(ALLOC_IN_RC(a5_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a6_RegP()
+%{
+  constraint(ALLOC_IN_RC(a6_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a7_RegP()
+%{
+  constraint(ALLOC_IN_RC(a7_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand v0_RegP()
+%{
+  constraint(ALLOC_IN_RC(v0_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand v1_RegP()
+%{
+  constraint(ALLOC_IN_RC(v1_long_reg));
+  match(RegP);
+  match(mRegP);
+  match(no_T8_mRegP);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+/*
+operand mSPRegP(mRegP reg) %{
+  constraint(ALLOC_IN_RC(sp_reg));
+  match(reg);
+
+  format %{ "SP"  %}
+  interface(REG_INTER);
+%}
+
+operand mFPRegP(mRegP reg) %{
+  constraint(ALLOC_IN_RC(fp_reg));
+  match(reg);
+
+  format %{ "FP"  %}
+  interface(REG_INTER);
+%}
+*/
+
+operand mRegL() %{
+  constraint(ALLOC_IN_RC(long_reg));
+  match(RegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand v0RegL() %{
+  constraint(ALLOC_IN_RC(v0_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand v1RegL() %{
+  constraint(ALLOC_IN_RC(v1_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a0RegL() %{
+  constraint(ALLOC_IN_RC(a0_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ "A0" %}
+  interface(REG_INTER);
+%}
+
+operand a1RegL() %{
+  constraint(ALLOC_IN_RC(a1_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a2RegL() %{
+  constraint(ALLOC_IN_RC(a2_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a3RegL() %{
+  constraint(ALLOC_IN_RC(a3_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t0RegL() %{
+  constraint(ALLOC_IN_RC(t0_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t1RegL() %{
+  constraint(ALLOC_IN_RC(t1_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t2RegL() %{
+  constraint(ALLOC_IN_RC(t2_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t3RegL() %{
+  constraint(ALLOC_IN_RC(t3_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand t8RegL() %{
+  constraint(ALLOC_IN_RC(t8_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a4RegL() %{
+  constraint(ALLOC_IN_RC(a4_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a5RegL() %{
+  constraint(ALLOC_IN_RC(a5_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a6RegL() %{
+  constraint(ALLOC_IN_RC(a6_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand a7RegL() %{
+  constraint(ALLOC_IN_RC(a7_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s0RegL() %{
+  constraint(ALLOC_IN_RC(s0_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s1RegL() %{
+  constraint(ALLOC_IN_RC(s1_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s2RegL() %{
+  constraint(ALLOC_IN_RC(s2_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s3RegL() %{
+  constraint(ALLOC_IN_RC(s3_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s4RegL() %{
+  constraint(ALLOC_IN_RC(s4_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand s7RegL() %{
+  constraint(ALLOC_IN_RC(s7_long_reg));
+  match(RegL);
+  match(mRegL);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Floating register operands
+operand regF() %{
+  constraint(ALLOC_IN_RC(flt_reg));
+  match(RegF);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+//Double Precision Floating register operands
+operand regD() %{
+  constraint(ALLOC_IN_RC(dbl_reg));
+  match(RegD);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+//----------Memory Operands----------------------------------------------------
+// Indirect Memory Operand
+operand indirect(mRegP reg) %{
+  constraint(ALLOC_IN_RC(p_reg));
+  match(reg);
+
+  format %{ "[$reg] @ indirect" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);  /* NO_INDEX */
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Plus Short Offset Operand
+operand indOffset8(mRegP reg, immL8 off)
+%{
+  constraint(ALLOC_IN_RC(p_reg));
+  match(AddP reg off);
+
+  op_cost(10);
+  format %{ "[$reg + $off (8-bit)] @ indOffset8" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0); /* NO_INDEX */
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Times Scale Plus Index Register
+operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale)
+%{
+  predicate(UseLEXT1);
+  constraint(ALLOC_IN_RC(p_reg));
+  match(AddP reg (LShiftL lreg scale));
+
+  op_cost(10);
+  format %{"[$reg + $lreg << $scale] @ indIndexScale" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+
+// [base + index + offset]
+operand baseIndexOffset8(mRegP base, mRegL index, immL8 off)
+%{
+  predicate(UseLEXT1);
+  constraint(ALLOC_IN_RC(p_reg));
+  op_cost(5);
+  match(AddP (AddP base index) off);
+
+  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index($index);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// [base + index + offset]
+operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off)
+%{
+  predicate(UseLEXT1);
+  constraint(ALLOC_IN_RC(p_reg));
+  op_cost(5);
+  match(AddP (AddP base (ConvI2L index)) off);
+
+  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index($index);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// [base + index<<scale + offset]
+operand basePosIndexScaleOffset8(mRegP base, mRegI index, immL8 off, immI_0_31 scale)
+%{
+  constraint(ALLOC_IN_RC(p_reg));
+  //predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
+  op_cost(10);
+  match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off);
+
+  format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %}
+  interface(MEMORY_INTER) %{
+    base($base);
+    index($index);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+//FIXME: I think it's better to limit the immI to be 16-bit at most!
+// Indirect Memory Plus Long Offset Operand
+operand indOffset32(mRegP reg, immL32 off) %{
+  constraint(ALLOC_IN_RC(p_reg));
+  op_cost(20);
+  match(AddP reg off);
+
+  format %{ "[$reg + $off (32-bit)] @ indOffset32" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);   /* NO_INDEX */
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register
+operand indIndex(mRegP addr, mRegL index) %{
+  constraint(ALLOC_IN_RC(p_reg));
+  match(AddP addr index);
+
+  op_cost(20);
+  format %{"[$addr + $index] @ indIndex" %}
+  interface(MEMORY_INTER) %{
+    base($addr);
+    index($index);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indirectNarrowKlass(mRegN reg)
+%{
+  predicate(Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(p_reg));
+  op_cost(10);
+  match(DecodeNKlass reg);
+
+  format %{ "[$reg] @ indirectNarrowKlass" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indOffset8NarrowKlass(mRegN reg, immL8 off)
+%{
+  predicate(Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(p_reg));
+  op_cost(10);
+  match(AddP (DecodeNKlass reg) off);
+
+  format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffset32NarrowKlass(mRegN reg, immL32 off)
+%{
+  predicate(Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(p_reg));
+  op_cost(10);
+  match(AddP (DecodeNKlass reg) off);
+
+  format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off)
+%{
+  predicate(UseLEXT1);
+  predicate(Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(p_reg));
+  match(AddP (AddP (DecodeNKlass reg) lreg) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indIndexNarrowKlass(mRegN reg, mRegL lreg)
+%{
+  predicate(Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(p_reg));
+  match(AddP (DecodeNKlass reg) lreg);
+
+  op_cost(10);
+  format %{"[$reg + $lreg] @ indIndexNarrowKlass" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Operand
+operand indirectNarrow(mRegN reg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(p_reg));
+  op_cost(10);
+  match(DecodeN reg);
+
+  format %{ "[$reg] @ indirectNarrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+// Indirect Memory Plus Short Offset Operand
+operand indOffset8Narrow(mRegN reg, immL8 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(p_reg));
+  op_cost(10);
+  match(AddP (DecodeN reg) off);
+
+  format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+// Indirect Memory Plus Index Register Plus Offset Operand
+operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off)
+%{
+  predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1);
+  constraint(ALLOC_IN_RC(p_reg));
+  match(AddP (AddP (DecodeN reg) lreg) off);
+
+  op_cost(10);
+  format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+//----------Conditional Branch Operands----------------------------------------
+// Comparison Op  - This is the operation of the comparison, and is limited to
+//                  the following set of codes:
+//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
+//
+// Other attributes of the comparison, such as unsignedness, are specified
+// by the comparison instruction that sets a condition code flags register.
+// That result is represented by a flags operand whose subtype is appropriate
+// to the unsignedness (etc.) of the comparison.
+//
+// Later, the instruction which matches both the Comparison Op (a Bool) and
+// the flags (produced by the Cmp) specifies the coding of the comparison op
+// by matching a specific subtype of Bool operand below, such as cmpOpU.
+
+// Comparision Code
+operand cmpOp() %{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x01);
+    not_equal(0x02);
+    greater(0x03);
+    greater_equal(0x04);
+    less(0x05);
+    less_equal(0x06);
+    overflow(0x7);
+    no_overflow(0x8);
+  %}
+%}
+
+
+// Comparision Code
+// Comparison Code, unsigned compare.  Used by FP also, with
+// C2 (unordered) turned into GT or LT already.  The other bits
+// C0 and C3 are turned into Carry & Zero flags.
+operand cmpOpU() %{
+  match(Bool);
+
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal(0x01);
+    not_equal(0x02);
+    greater(0x03);
+    greater_equal(0x04);
+    less(0x05);
+    less_equal(0x06);
+    overflow(0x7);
+    no_overflow(0x8);
+  %}
+%}
+
+
+//----------Special Memory Operands--------------------------------------------
+// Stack Slot Operand - This operand is used for loading and storing temporary
+//                      values on the stack where a match requires a value to
+//                      flow through memory.
+operand stackSlotP(sRegP reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  op_cost(50);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1d);  // SP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotI(sRegI reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  op_cost(50);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1d);  // SP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotF(sRegF reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  op_cost(50);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1d);  // SP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotD(sRegD reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  op_cost(50);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1d);  // SP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+operand stackSlotL(sRegL reg) %{
+  constraint(ALLOC_IN_RC(stack_slots));
+  // No match rule because this operand is only generated in matching
+  op_cost(50);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base(0x1d);  // SP
+    index(0x0);  // No Index
+    scale(0x0);  // No Scale
+    disp($reg);  // Stack Offset
+  %}
+%}
+
+
+//------------------------OPERAND CLASSES--------------------------------------
+//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset );
+opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow);
+
+
+//----------PIPELINE-----------------------------------------------------------
+// Rules which define the behavior of the target architectures pipeline.
+
+pipeline %{
+
+  //----------ATTRIBUTES---------------------------------------------------------
+  attributes %{
+    fixed_size_instructions;          // Fixed size instructions
+    branch_has_delay_slot;      // branch have delay slot in gs2
+    max_instructions_per_bundle = 1;     // 1 instruction per bundle
+    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
+         bundle_unit_size=4;
+    instruction_unit_size = 4;           // An instruction is 4 bytes long
+    instruction_fetch_unit_size = 16;    // The processor fetches one line
+    instruction_fetch_units = 1;         // of 16 bytes
+
+    // List of nop instructions
+    nops( MachNop );
+  %}
+
+  //----------RESOURCES----------------------------------------------------------
+  // Resources are the functional units available to the machine
+
+  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
+
+  //----------PIPELINE DESCRIPTION-----------------------------------------------
+  // Pipeline Description specifies the stages in the machine's pipeline
+
+  // IF: fetch
+  // ID: decode
+  // RD: read
+  // CA: caculate
+  // WB: write back
+  // CM: commit
+
+  pipe_desc(IF, ID, RD, CA, WB, CM);
+
+
+  //----------PIPELINE CLASSES---------------------------------------------------
+  // Pipeline Classes describe the stages in which input and output are
+  // referenced by the hardware pipeline.
+
+  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
+  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
+    single_instruction;
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write)+1;
+    DECODE : ID;
+    ALU    : CA;
+  %}
+
+  //No.19 Integer mult operation : dst <-- reg1 mult reg2
+  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write)+5;
+    DECODE : ID;
+    ALU2   : CA;
+  %}
+
+  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write)+10;
+    DECODE : ID;
+    ALU2   : CA;
+  %}
+
+  //No.19 Integer div operation : dst <-- reg1 div reg2
+  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write)+10;
+    DECODE : ID;
+    ALU2   : CA;
+  %}
+
+  //No.19 Integer mod operation : dst <-- reg1 mod reg2
+  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
+    instruction_count(2);
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write)+10;
+    DECODE : ID;
+    ALU2   : CA;
+  %}
+
+  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
+  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
+    instruction_count(2);
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    ALU    : CA;
+  %}
+
+  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
+  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
+    instruction_count(2);
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    ALU    : CA;
+  %}
+
+  //no.16 load Long from memory :
+  pipe_class ialu_loadL(mRegL dst, memory mem) %{
+    instruction_count(2);
+    mem    : RD(read);
+    dst    : WB(write)+5;
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  //No.17 Store Long to Memory :
+  pipe_class ialu_storeL(mRegL src, memory mem) %{
+    instruction_count(2);
+    mem    : RD(read);
+    src    : RD(read);
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
+  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
+         single_instruction;
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    ALU    : CA;
+  %}
+
+  //No.3 Integer move operation : dst <-- reg
+  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    ALU    : CA;
+  %}
+
+  //No.4 No instructions : do nothing
+  pipe_class empty( ) %{
+    instruction_count(0);
+  %}
+
+  //No.5 UnConditional branch :
+  pipe_class pipe_jump( label labl ) %{
+    multiple_bundles;
+    DECODE : ID;
+    BR     : RD;
+  %}
+
+  //No.6 ALU Conditional branch :
+  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
+    multiple_bundles;
+    src1   : RD(read);
+    src2   : RD(read);
+    DECODE : ID;
+    BR     : RD;
+  %}
+
+  //no.7 load integer from memory :
+  pipe_class ialu_loadI(mRegI dst, memory mem) %{
+    mem    : RD(read);
+    dst    : WB(write)+3;
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  //No.8 Store Integer to Memory :
+  pipe_class ialu_storeI(mRegI src, memory mem) %{
+    mem    : RD(read);
+    src    : RD(read);
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+
+  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
+  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU    : CA;
+  %}
+
+  //No.22 Floating div operation : dst <-- reg1 div reg2
+  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU2   : CA;
+  %}
+
+  pipe_class fcvt_I2D(regD dst, mRegI src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU1   : CA;
+  %}
+
+  pipe_class fcvt_D2I(mRegI dst, regD src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU1   : CA;
+  %}
+
+  pipe_class pipe_mfc1(mRegI dst, regD src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  pipe_class pipe_mtc1(regD dst, mRegI src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    MEM    : RD(5);
+  %}
+
+  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
+  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
+    multiple_bundles;
+    src1   : RD(read);
+    src2   : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU2   : CA;
+  %}
+
+  //No.11 Load Floating from Memory :
+  pipe_class fpu_loadF(regF dst, memory mem) %{
+    instruction_count(1);
+    mem    : RD(read);
+    dst    : WB(write)+3;
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  //No.12 Store Floating to Memory :
+  pipe_class fpu_storeF(regF src, memory mem) %{
+    instruction_count(1);
+    mem    : RD(read);
+    src    : RD(read);
+    DECODE : ID;
+    MEM    : RD;
+  %}
+
+  //No.13 FPU Conditional branch :
+  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
+    multiple_bundles;
+    src1   : RD(read);
+    src2   : RD(read);
+    DECODE : ID;
+    BR     : RD;
+  %}
+
+//No.14 Floating FPU reg operation : dst <-- op reg
+  pipe_class fpu1_regF(regF dst, regF src) %{
+    src    : RD(read);
+    dst    : WB(write);
+    DECODE : ID;
+    FPU    : CA;
+  %}
+
+  pipe_class long_memory_op() %{
+    instruction_count(10); multiple_bundles; force_serialization;
+    fixed_latency(30);
+  %}
+
+  pipe_class simple_call() %{
+   instruction_count(10); multiple_bundles; force_serialization;
+   fixed_latency(200);
+   BR     : RD;
+  %}
+
+  pipe_class call() %{
+    instruction_count(10); multiple_bundles; force_serialization;
+    fixed_latency(200);
+  %}
+
+  //FIXME:
+  //No.9 Piple slow : for multi-instructions
+  pipe_class pipe_slow(  ) %{
+    instruction_count(20);
+    force_serialization;
+    multiple_bundles;
+    fixed_latency(50);
+  %}
+
+%}
+
+
+
+//----------INSTRUCTIONS-------------------------------------------------------
+//
+// match      -- States which machine-independent subtree may be replaced
+//               by this instruction.
+// ins_cost   -- The estimated cost of this instruction is used by instruction
+//               selection to identify a minimum cost tree of machine
+//               instructions that matches a tree of machine-independent
+//               instructions.
+// format     -- A string providing the disassembly for this instruction.
+//               The value of an instruction's operand may be inserted
+//               by referring to it with a '$' prefix.
+// opcode     -- Three instruction opcodes may be provided.  These are referred
+//               to within an encode class as $primary, $secondary, and $tertiary
+//               respectively.  The primary opcode is commonly used to
+//               indicate the type of machine instruction, while secondary
+//               and tertiary are often used for prefix options or addressing
+//               modes.
+// ins_encode -- A list of encode classes with parameters. The encode class
+//               name must have been defined in an 'enc_class' specification
+//               in the encode section of the architecture description.
+
+
+// Load Integer
+instruct loadI(mRegI dst, memory mem) %{
+  match(Set dst (LoadI mem));
+
+  ins_cost(125);
+  format %{ "lw    $dst, $mem   #@loadI" %}
+  ins_encode (load_I_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+instruct loadI_convI2L(mRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadI mem)));
+
+  ins_cost(125);
+  format %{ "lw    $dst, $mem   #@loadI_convI2L" %}
+  ins_encode (load_I_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Integer (32 bit signed) to Byte (8 bit signed)
+instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
+  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
+
+  ins_cost(125);
+  format %{ "lb  $dst, $mem\t# int -> byte #@loadI2B" %}
+  ins_encode(load_B_enc(dst, mem));
+  ins_pipe(ialu_loadI);
+%}
+
+// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
+instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
+  match(Set dst (AndI (LoadI mem) mask));
+
+  ins_cost(125);
+  format %{ "lbu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
+  ins_encode(load_UB_enc(dst, mem));
+  ins_pipe(ialu_loadI);
+%}
+
+// Load Integer (32 bit signed) to Short (16 bit signed)
+instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
+  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
+
+  ins_cost(125);
+  format %{ "lh  $dst, $mem\t# int -> short #@loadI2S" %}
+  ins_encode(load_S_enc(dst, mem));
+  ins_pipe(ialu_loadI);
+%}
+
+// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
+instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
+  match(Set dst (AndI (LoadI mem) mask));
+
+  ins_cost(125);
+  format %{ "lhu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
+  ins_encode(load_C_enc(dst, mem));
+  ins_pipe(ialu_loadI);
+%}
+
+// Load Long.
+instruct loadL(mRegL dst, memory mem) %{
+//  predicate(!((LoadLNode*)n)->require_atomic_access());
+  match(Set dst (LoadL mem));
+
+  ins_cost(250);
+  format %{ "ld    $dst, $mem   #@loadL" %}
+  ins_encode(load_L_enc(dst, mem));
+  ins_pipe( ialu_loadL );
+%}
+
+// Load Long - UNaligned
+instruct loadL_unaligned(mRegL dst, memory mem) %{
+  match(Set dst (LoadL_unaligned mem));
+
+  // FIXME: Need more effective ldl/ldr
+  ins_cost(450);
+  format %{ "ld    $dst, $mem   #@loadL_unaligned\n\t" %}
+  ins_encode(load_L_enc(dst, mem));
+  ins_pipe( ialu_loadL );
+%}
+
+// Store Long
+instruct storeL_reg(memory mem, mRegL src) %{
+  match(Set mem (StoreL mem src));
+
+  ins_cost(200);
+  format %{ "sd    $mem,   $src #@storeL_reg\n" %}
+  ins_encode(store_L_reg_enc(mem, src));
+  ins_pipe( ialu_storeL );
+%}
+
+instruct storeL_immL_0(memory mem, immL_0 zero) %{
+  match(Set mem (StoreL mem zero));
+
+  ins_cost(180);
+  format %{ "sd    zero, $mem #@storeL_immL_0" %}
+  ins_encode(store_L_immL_0_enc(mem, zero));
+  ins_pipe( ialu_storeL );
+%}
+
+instruct storeL_imm(memory mem, immL src) %{
+  match(Set mem (StoreL mem src));
+
+  ins_cost(200);
+  format %{ "sd    $src, $mem #@storeL_imm" %}
+  ins_encode(store_L_immL_enc(mem, src));
+  ins_pipe( ialu_storeL );
+%}
+
+// Load Compressed Pointer
+instruct loadN(mRegN dst, memory mem)
+%{
+   match(Set dst (LoadN mem));
+
+   ins_cost(125); // XXX
+   format %{ "lwu    $dst, $mem\t# compressed ptr @ loadN" %}
+   ins_encode (load_N_enc(dst, mem));
+   ins_pipe( ialu_loadI ); // XXX
+%}
+
+instruct loadN2P(mRegP dst, memory mem)
+%{
+   match(Set dst (DecodeN (LoadN mem)));
+   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+
+   ins_cost(125); // XXX
+   format %{ "lwu    $dst, $mem\t# @ loadN2P" %}
+   ins_encode (load_N_enc(dst, mem));
+   ins_pipe( ialu_loadI ); // XXX
+%}
+
+// Load Pointer
+instruct loadP(mRegP dst, memory mem) %{
+  match(Set dst (LoadP mem));
+
+  ins_cost(125);
+  format %{ "ld    $dst, $mem #@loadP" %}
+  ins_encode (load_P_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Klass Pointer
+instruct loadKlass(mRegP dst, memory mem) %{
+  match(Set dst (LoadKlass mem));
+
+  ins_cost(125);
+  format %{ "MOV    $dst,$mem @ loadKlass" %}
+  ins_encode (load_P_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+// Load narrow Klass Pointer
+instruct loadNKlass(mRegN dst, memory mem)
+%{
+  match(Set dst (LoadNKlass mem));
+
+  ins_cost(125); // XXX
+  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
+  ins_encode (load_N_enc(dst, mem));
+  ins_pipe( ialu_loadI ); // XXX
+%}
+
+instruct loadN2PKlass(mRegP dst, memory mem)
+%{
+  match(Set dst (DecodeNKlass (LoadNKlass mem)));
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+
+  ins_cost(125); // XXX
+  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
+  ins_encode (load_N_enc(dst, mem));
+  ins_pipe( ialu_loadI ); // XXX
+%}
+
+// Load Constant
+instruct loadConI(mRegI dst, immI src) %{
+  match(Set dst src);
+
+  ins_cost(150);
+  format %{ "mov    $dst, $src #@loadConI" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    int    value = $src$$constant;
+    __ move(dst, value);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+
+instruct loadConL_set64(mRegL dst, immL src) %{
+  match(Set dst src);
+  ins_cost(120);
+  format %{ "li   $dst, $src @ loadConL_set64" %}
+  ins_encode %{
+    __ set64($dst$$Register, $src$$constant);
+  %}
+  ins_pipe(ialu_regL_regL);
+%}
+
+instruct loadConL16(mRegL dst, immL16 src) %{
+  match(Set dst src);
+  ins_cost(105);
+  format %{ "mov    $dst, $src #@loadConL16" %}
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    int      value   = $src$$constant;
+    __ daddiu(dst_reg, R0, value);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+
+instruct loadConL_immL_0(mRegL dst, immL_0 src) %{
+  match(Set dst src);
+  ins_cost(100);
+  format %{ "mov    $dst, zero #@loadConL_immL_0" %}
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    __ daddu(dst_reg, R0, R0);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Load Range
+instruct loadRange(mRegI dst, memory mem) %{
+  match(Set dst (LoadRange mem));
+
+  ins_cost(125);
+  format %{ "MOV    $dst,$mem @ loadRange" %}
+  ins_encode(load_I_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+
+instruct storeP(memory mem, mRegP src ) %{
+  match(Set mem (StoreP mem src));
+
+  ins_cost(125);
+  format %{ "sd    $src, $mem #@storeP" %}
+  ins_encode(store_P_reg_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+// Store NULL Pointer, mark word, or other simple pointer constant.
+instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
+  match(Set mem (StoreP mem zero));
+
+  ins_cost(125);
+  format %{ "mov    $mem, $zero #@storeImmP_immP_0" %}
+  ins_encode(store_P_immP0_enc(mem));
+  ins_pipe( ialu_storeI );
+%}
+
+// Store Byte Immediate
+instruct storeImmB(memory mem, immI8 src) %{
+  match(Set mem (StoreB mem src));
+
+  ins_cost(150);
+  format %{ "movb   $mem, $src #@storeImmB" %}
+  ins_encode(store_B_immI_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+// Store Compressed Pointer
+instruct storeN(memory mem, mRegN src)
+%{
+  match(Set mem (StoreN mem src));
+
+  ins_cost(125); // XXX
+  format %{ "sw    $mem, $src\t# compressed ptr @ storeN" %}
+  ins_encode(store_N_reg_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeP2N(memory mem, mRegP src)
+%{
+  match(Set mem (StoreN mem (EncodeP src)));
+  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+
+  ins_cost(125); // XXX
+  format %{ "sw    $mem, $src\t# @ storeP2N" %}
+  ins_encode(store_N_reg_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeNKlass(memory mem, mRegN src)
+%{
+  match(Set mem (StoreNKlass mem src));
+
+  ins_cost(125); // XXX
+  format %{ "sw    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
+  ins_encode(store_N_reg_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeP2NKlass(memory mem, mRegP src)
+%{
+  match(Set mem (StoreNKlass mem (EncodePKlass src)));
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+
+  ins_cost(125); // XXX
+  format %{ "sw    $mem, $src\t# @ storeP2NKlass" %}
+  ins_encode(store_N_reg_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeImmN_immN_0(memory mem, immN_0 zero)
+%{
+  match(Set mem (StoreN mem zero));
+
+  ins_cost(125); // XXX
+  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
+  ins_encode(storeImmN0_enc(mem, zero));
+  ins_pipe( ialu_storeI );
+%}
+
+// Store Byte
+instruct storeB(memory mem, mRegI src) %{
+  match(Set mem (StoreB mem src));
+
+  ins_cost(125);
+  format %{ "sb    $src, $mem #@storeB" %}
+  ins_encode(store_B_reg_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeB_convL2I(memory mem, mRegL src) %{
+  match(Set mem (StoreB mem (ConvL2I src)));
+
+  ins_cost(125);
+  format %{ "sb    $src, $mem #@storeB_convL2I" %}
+  ins_encode(store_B_reg_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+// Load Byte (8bit signed)
+instruct loadB(mRegI dst, memory mem) %{
+  match(Set dst (LoadB mem));
+
+  ins_cost(125);
+  format %{ "lb   $dst, $mem #@loadB" %}
+  ins_encode(load_B_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+instruct loadB_convI2L(mRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadB mem)));
+
+  ins_cost(125);
+  format %{ "lb   $dst, $mem #@loadB_convI2L" %}
+  ins_encode(load_B_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Byte (8bit UNsigned)
+instruct loadUB(mRegI dst, memory mem) %{
+  match(Set dst (LoadUB mem));
+
+  ins_cost(125);
+  format %{ "lbu   $dst, $mem #@loadUB" %}
+  ins_encode(load_UB_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+instruct loadUB_convI2L(mRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadUB mem)));
+
+  ins_cost(125);
+  format %{ "lbu   $dst, $mem #@loadUB_convI2L" %}
+  ins_encode(load_UB_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Short (16bit signed)
+instruct loadS(mRegI dst, memory mem) %{
+  match(Set dst (LoadS mem));
+
+  ins_cost(125);
+  format %{ "lh   $dst, $mem #@loadS" %}
+  ins_encode(load_S_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Short (16 bit signed) to Byte (8 bit signed)
+instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
+  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
+
+  ins_cost(125);
+  format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %}
+  ins_encode(load_B_enc(dst, mem));
+  ins_pipe(ialu_loadI);
+%}
+
+instruct loadS_convI2L(mRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadS mem)));
+
+  ins_cost(125);
+  format %{ "lh   $dst, $mem #@loadS_convI2L" %}
+  ins_encode(load_S_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+// Store Integer Immediate
+instruct storeImmI(memory mem, immI src) %{
+  match(Set mem (StoreI mem src));
+
+  ins_cost(150);
+  format %{ "mov    $mem, $src #@storeImmI" %}
+  ins_encode(store_I_immI_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+// Store Integer
+instruct storeI(memory mem, mRegI src) %{
+  match(Set mem (StoreI mem src));
+
+  ins_cost(125);
+  format %{ "sw    $mem, $src #@storeI" %}
+  ins_encode(store_I_reg_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+instruct storeI_convL2I(memory mem, mRegL src) %{
+  match(Set mem (StoreI mem (ConvL2I src)));
+
+  ins_cost(125);
+  format %{ "sw    $mem, $src #@storeI_convL2I" %}
+  ins_encode(store_I_reg_enc(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+// Load Float
+instruct loadF(regF dst, memory mem) %{
+  match(Set dst (LoadF mem));
+
+  ins_cost(150);
+  format %{ "loadF $dst, $mem #@loadF" %}
+  ins_encode(load_F_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+instruct loadConP_general(mRegP dst, immP src) %{
+  match(Set dst src);
+
+  ins_cost(120);
+  format %{ "li   $dst, $src #@loadConP_general" %}
+
+  ins_encode %{
+    Register dst = $dst$$Register;
+    long* value = (long*)$src$$constant;
+
+    if($src->constant_reloc() == relocInfo::metadata_type){
+      int klass_index = __ oop_recorder()->find_index((Klass*)value);
+      RelocationHolder rspec = metadata_Relocation::spec(klass_index);
+
+      __ relocate(rspec);
+      __ patchable_set48(dst, (long)value);
+    } else if($src->constant_reloc() == relocInfo::oop_type){
+      int oop_index = __ oop_recorder()->find_index((jobject)value);
+      RelocationHolder rspec = oop_Relocation::spec(oop_index);
+
+      __ relocate(rspec);
+      __ patchable_set48(dst, (long)value);
+    } else if ($src->constant_reloc() == relocInfo::none) {
+        __ set64(dst, (long)value);
+    }
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
+  match(Set dst src);
+
+  ins_cost(80);
+  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
+
+  ins_encode %{
+    __ set64($dst$$Register, $src$$constant);
+  %}
+
+  ins_pipe(ialu_regI_regI);
+%}
+
+
+instruct loadConP_poll(mRegP dst, immP_poll src) %{
+  match(Set dst src);
+
+  ins_cost(50);
+  format %{ "li   $dst, $src #@loadConP_poll" %}
+
+  ins_encode %{
+    Register dst = $dst$$Register;
+    intptr_t value = (intptr_t)$src$$constant;
+
+    __ set64(dst, (jlong)value);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct loadConP_immP_0(mRegP dst, immP_0 src)
+%{
+  match(Set dst src);
+
+  ins_cost(50);
+  format %{ "mov    $dst, R0\t# ptr" %}
+  ins_encode %{
+     Register dst_reg = $dst$$Register;
+     __ daddu(dst_reg, R0, R0);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
+  match(Set dst src);
+  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
+  ins_encode %{
+    __ move($dst$$Register, R0);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct loadConN(mRegN dst, immN src) %{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    __ set_narrow_oop(dst, (jobject)$src$$constant);
+  %}
+  ins_pipe( ialu_regI_regI ); // XXX
+%}
+
+instruct loadConNKlass(mRegN dst, immNKlass src) %{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    __ set_narrow_klass(dst, (Klass*)$src$$constant);
+  %}
+  ins_pipe( ialu_regI_regI ); // XXX
+%}
+
+//FIXME
+// Tail Call; Jump from runtime stub to Java code.
+// Also known as an 'interprocedural jump'.
+// Target of jump will eventually return to caller.
+// TailJump below removes the return address.
+instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{
+  match(TailCall jump_target method_oop );
+  ins_cost(300);
+  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
+
+  ins_encode %{
+    Register target = $jump_target$$Register;
+    Register    oop = $method_oop$$Register;
+
+    // RA will be used in generate_forward_exception()
+    __ push(RA);
+
+    __ move(S3, oop);
+    __ jr(target);
+    __ delayed()->nop();
+  %}
+
+  ins_pipe( pipe_jump );
+%}
+
+// Create exception oop: created by stack-crawling runtime code.
+// Created exception is now available to this handler, and is setup
+// just prior to jumping to this handler.  No code emitted.
+instruct CreateException( a0_RegP ex_oop )
+%{
+  match(Set ex_oop (CreateEx));
+
+  // use the following format syntax
+  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
+  ins_encode %{
+    // X86 leaves this function empty
+    __ block_comment("CreateException is empty in MIPS");
+  %}
+  ins_pipe( empty );
+//  ins_pipe( pipe_jump );
+%}
+
+
+/* The mechanism of exception handling is clear now.
+
+- Common try/catch:
+  [stubGenerator_mips.cpp] generate_forward_exception()
+      |- V0, V1 are created
+      |- T9 <= SharedRuntime::exception_handler_for_return_address
+      `- jr T9
+           `- the caller's exception_handler
+                 `- jr OptoRuntime::exception_blob
+                        `- here
+- Rethrow(e.g. 'unwind'):
+  * The callee:
+     |- an exception is triggered during execution
+     `- exits the callee method through RethrowException node
+          |- The callee pushes exception_oop(T0) and exception_pc(RA)
+          `- The callee jumps to OptoRuntime::rethrow_stub()
+  * In OptoRuntime::rethrow_stub:
+     |- The VM calls _rethrow_Java to determine the return address in the caller method
+     `- exits the stub with tailjmpInd
+          |- pops exception_oop(V0) and exception_pc(V1)
+          `- jumps to the return address(usually an exception_handler)
+  * The caller:
+     `- continues processing the exception_blob with V0/V1
+*/
+
+// Rethrow exception:
+// The exception oop will come in the first argument position.
+// Then JUMP (not call) to the rethrow stub code.
+instruct RethrowException()
+%{
+  match(Rethrow);
+
+  // use the following format syntax
+  format %{ "JMP    rethrow_stub #@RethrowException" %}
+  ins_encode %{
+    __ block_comment("@ RethrowException");
+
+    cbuf.set_insts_mark();
+    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
+
+    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
+    __ patchable_jump((address)OptoRuntime::rethrow_stub());
+  %}
+  ins_pipe( pipe_jump );
+%}
+
+// ============================================================================
+// Branch Instructions --- long offset versions
+
+// Jump Direct
+instruct jmpDir_long(label labl) %{
+  match(Goto);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "JMP    $labl #@jmpDir_long" %}
+
+  ins_encode %{
+    Label* L = $labl$$label;
+    __ jmp_far(*L);
+  %}
+
+  ins_pipe( pipe_jump );
+  //ins_pc_relative(1);
+%}
+
+// Jump Direct Conditional - Label defines a relative address from Jcc+1
+instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
+  match(CountedLoopEnd cop (CmpI src1 src2));
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label*     L = $labl$$label;
+    int     flag = $cop$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      case 0x03: //above
+        __ slt(AT, op2, op1);
+        __ bne_long(AT, R0, *L);
+        break;
+      case 0x04: //above_equal
+        __ slt(AT, op1, op2);
+        __ beq_long(AT, R0, *L);
+        break;
+      case 0x05: //below
+        __ slt(AT, op1, op2);
+        __ bne_long(AT, R0, *L);
+        break;
+      case 0x06: //below_equal
+        __ slt(AT, op2, op1);
+        __ beq_long(AT, R0, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+%}
+
+instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
+  match(CountedLoopEnd cop (CmpI src1 src2));
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = AT;
+    Label*     L = $labl$$label;
+    int     flag = $cop$$cmpcode;
+
+    __ move(op2, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      case 0x03: //above
+        __ slt(AT, op2, op1);
+        __ bne_long(AT, R0, *L);
+        break;
+      case 0x04: //above_equal
+        __ slt(AT, op1, op2);
+        __ beq_long(AT, R0, *L);
+        break;
+      case 0x05: //below
+        __ slt(AT, op1, op2);
+        __ bne_long(AT, R0, *L);
+        break;
+      case 0x06: //below_equal
+        __ slt(AT, op2, op1);
+        __ beq_long(AT, R0, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+%}
+
+
+// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
+instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{
+  match(If cop cr);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %}
+
+  ins_encode %{
+    Label*    L =  $labl$$label;
+    switch($cop$$cmpcode) {
+      case 0x01: //equal
+        __ bne_long($cr$$Register, R0, *L);
+        break;
+      case 0x02: //not equal
+        __ beq_long($cr$$Register, R0, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+%}
+
+// Conditional jumps
+instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
+  match(If cmp (CmpP op1 zero));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_long" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = R0;
+    Label*    L  = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
+  match(If cmp (CmpP (DecodeN op1) zero));
+  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_long" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = R0;
+    Label*    L  = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag)
+    {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+
+instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
+  match(If cmp (CmpP op1 op2));
+//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
+  effect(USE labl);
+
+  ins_cost(200);
+  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = $op2$$Register;
+    Label*    L  = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      case 0x03: //above
+        __ sltu(AT, op2, op1);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x04: //above_equal
+        __ sltu(AT, op1, op2);
+        __ beq_long(AT, R0, *L);
+        break;
+      case 0x05: //below
+        __ sltu(AT, op1, op2);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x06: //below_equal
+        __ sltu(AT, op2, op1);
+        __ beq_long(AT, R0, *L);
+       break;
+      default:
+          Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
+  match(If cmp (CmpN op1 null));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
+            "BP$cmp   $labl @ cmpN_null_branch_long" %}
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = R0;
+    Label*    L  = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+    case 0x01: //equal
+      __ beq_long(op1, op2, *L);
+      break;
+    case 0x02: //not_equal
+      __ bne_long(op1, op2, *L);
+      break;
+    default:
+          Unimplemented();
+    }
+  %}
+//TODO: pipe_branchP or create pipe_branchN LEE
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
+  match(If cmp (CmpN op1 op2));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
+            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
+  ins_encode %{
+    Register op1_reg = $op1$$Register;
+    Register op2_reg = $op2$$Register;
+    Label*    L  = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+    case 0x01: //equal
+      __ beq_long(op1_reg, op2_reg, *L);
+      break;
+    case 0x02: //not_equal
+      __ bne_long(op1_reg, op2_reg, *L);
+      break;
+    case 0x03: //above
+      __ sltu(AT, op2_reg, op1_reg);
+      __ bne_long(R0, AT, *L);
+      break;
+    case 0x04: //above_equal
+      __ sltu(AT, op1_reg, op2_reg);
+      __ beq_long(AT, R0, *L);
+      break;
+    case 0x05: //below
+      __ sltu(AT, op1_reg, op2_reg);
+      __ bne_long(R0, AT, *L);
+      break;
+    case 0x06: //below_equal
+      __ sltu(AT, op2_reg, op1_reg);
+      __ beq_long(AT, R0, *L);
+      break;
+    default:
+      Unimplemented();
+    }
+  %}
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
+  match( If cmp (CmpU src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label*     L = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      case 0x03: //above
+        __ sltu(AT, op2, op1);
+        __ bne_long(AT, R0, *L);
+        break;
+      case 0x04: //above_equal
+        __ sltu(AT, op1, op2);
+        __ beq_long(AT, R0, *L);
+        break;
+      case 0x05: //below
+        __ sltu(AT, op1, op2);
+        __ bne_long(AT, R0, *L);
+        break;
+      case 0x06: //below_equal
+        __ sltu(AT, op2, op1);
+        __ beq_long(AT, R0, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+
+instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
+  match( If cmp (CmpU src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    int      val = $src2$$constant;
+    Label*     L = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    __ move(AT, val);
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, AT, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, AT, *L);
+        break;
+      case 0x03: //above
+        __ sltu(AT, AT, op1);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x04: //above_equal
+        __ sltu(AT, op1, AT);
+        __ beq_long(AT, R0, *L);
+        break;
+      case 0x05: //below
+        __ sltu(AT, op1, AT);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x06: //below_equal
+        __ sltu(AT, AT, op1);
+        __ beq_long(AT, R0, *L);
+       break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label*     L = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, op2, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, op2, *L);
+        break;
+      case 0x03: //above
+        __ slt(AT, op2, op1);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x04: //above_equal
+        __ slt(AT, op1, op2);
+        __ beq_long(AT, R0, *L);
+        break;
+      case 0x05: //below
+        __ slt(AT, op1, op2);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x06: //below_equal
+        __ slt(AT, op2, op1);
+        __ beq_long(AT, R0, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  ins_cost(170);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Label*     L =  $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, R0, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, R0, *L);
+        break;
+      case 0x03: //greater
+        __ slt(AT, R0, op1);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x04: //greater_equal
+        __ slt(AT, op1, R0);
+        __ beq_long(AT, R0, *L);
+        break;
+      case 0x05: //less
+        __ slt(AT, op1, R0);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x06: //less_equal
+        __ slt(AT, R0, op1);
+        __ beq_long(AT, R0, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  ins_cost(200);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    int      val = $src2$$constant;
+    Label*     L =  $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    __ move(AT, val);
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, AT, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, AT, *L);
+        break;
+      case 0x03: //greater
+        __ slt(AT, AT, op1);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x04: //greater_equal
+        __ slt(AT, op1, AT);
+        __ beq_long(AT, R0, *L);
+        break;
+      case 0x05: //less
+        __ slt(AT, op1, AT);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x06: //less_equal
+        __ slt(AT, AT, op1);
+        __ beq_long(AT, R0, *L);
+       break;
+      default:
+          Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
+  match( If cmp (CmpU src1 zero) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Label*     L = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(op1, R0, *L);
+        break;
+      case 0x02: //not_equal
+        __ bne_long(op1, R0, *L);
+        break;
+      case 0x03: //above
+        __ bne_long(R0, op1, *L);
+        break;
+      case 0x04: //above_equal
+        __ beq_long(R0, R0, *L);
+        break;
+      case 0x05: //below
+        return;
+        break;
+      case 0x06: //below_equal
+        __ beq_long(op1, R0, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+
+instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
+  match( If cmp (CmpU src1 src2) );
+  effect(USE labl);
+  ins_cost(180);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_long" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    int      val = $src2$$constant;
+    Label*     L = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ move(AT, val);
+        __ beq_long(op1, AT, *L);
+        break;
+      case 0x02: //not_equal
+        __ move(AT, val);
+        __ bne_long(op1, AT, *L);
+        break;
+      case 0x03: //above
+        __ move(AT, val);
+        __ sltu(AT, AT, op1);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x04: //above_equal
+        __ sltiu(AT, op1, val);
+        __ beq_long(AT, R0, *L);
+        break;
+      case 0x05: //below
+        __ sltiu(AT, op1, val);
+        __ bne_long(R0, AT, *L);
+        break;
+      case 0x06: //below_equal
+        __ move(AT, val);
+        __ sltu(AT, AT, op1);
+        __ beq_long(AT, R0, *L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+
+instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
+  match( If cmp (CmpL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
+  ins_cost(250);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = as_Register($src2$$reg);
+
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: //not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: //greater
+        __ slt(AT, opr2_reg, opr1_reg);
+        __ bne_long(AT, R0, *target);
+        break;
+
+      case 0x04: //greater_equal
+        __ slt(AT, opr1_reg, opr2_reg);
+        __ beq_long(AT, R0, *target);
+        break;
+
+      case 0x05: //less
+        __ slt(AT, opr1_reg, opr2_reg);
+        __ bne_long(AT, R0, *target);
+        break;
+
+      case 0x06: //less_equal
+        __ slt(AT, opr2_reg, opr1_reg);
+        __ beq_long(AT, R0, *target);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
+  match(If cmp (CmpUL src1 src2));
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
+  ins_cost(250);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = as_Register($src2$$reg);
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: // equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: // not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: // greater
+        __ sltu(AT, opr2_reg, opr1_reg);
+        __ bne_long(AT, R0, *target);
+        break;
+
+      case 0x04: // greater_equal
+        __ sltu(AT, opr1_reg, opr2_reg);
+        __ beq_long(AT, R0, *target);
+        break;
+
+      case 0x05: // less
+        __ sltu(AT, opr1_reg, opr2_reg);
+        __ bne_long(AT, R0, *target);
+        break;
+
+      case 0x06: // less_equal
+        __ sltu(AT, opr2_reg, opr1_reg);
+        __ beq_long(AT, R0, *target);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_alu_branch);
+%}
+
+instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
+  match( If cmp (CmpL src1 zero) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_long" %}
+  ins_cost(150);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = R0;
+
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: //not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: //greater
+        __ slt(AT, opr2_reg, opr1_reg);
+        __ bne_long(AT, R0, *target);
+        break;
+
+      case 0x04: //greater_equal
+        __ slt(AT, opr1_reg, opr2_reg);
+        __ beq_long(AT, R0, *target);
+        break;
+
+      case 0x05: //less
+        __ slt(AT, opr1_reg, opr2_reg);
+        __ bne_long(AT, R0, *target);
+        break;
+
+      case 0x06: //less_equal
+        __ slt(AT, opr2_reg, opr1_reg);
+        __ beq_long(AT, R0, *target);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
+  match(If cmp (CmpUL src1 zero));
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_long" %}
+  ins_cost(150);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = R0;
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: // equal
+      case 0x04: // greater_equal
+      case 0x06: // less_equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: // not_equal
+      case 0x03: // greater
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x05: // less
+        __ beq_long(R0, R0, *target);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_alu_branch);
+%}
+
+instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
+  match( If cmp (CmpL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
+  ins_cost(180);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = AT;
+
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    __ set64(opr2_reg, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: //equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: //not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: //greater
+        __ slt(AT, opr2_reg, opr1_reg);
+        __ bne_long(AT, R0, *target);
+        break;
+
+      case 0x04: //greater_equal
+        __ slt(AT, opr1_reg, opr2_reg);
+        __ beq_long(AT, R0, *target);
+        break;
+
+      case 0x05: //less
+        __ slt(AT, opr1_reg, opr2_reg);
+        __ bne_long(AT, R0, *target);
+        break;
+
+      case 0x06: //less_equal
+        __ slt(AT, opr2_reg, opr1_reg);
+        __ beq_long(AT, R0, *target);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+%}
+
+instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
+  match(If cmp (CmpUL src1 src2));
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
+  ins_cost(180);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = AT;
+    Label*   target = $labl$$label;
+    int     flag = $cmp$$cmpcode;
+
+    __ set64(opr2_reg, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: // equal
+        __ beq_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x02: // not_equal
+        __ bne_long(opr1_reg, opr2_reg, *target);
+        break;
+
+      case 0x03: // greater
+        __ sltu(AT, opr2_reg, opr1_reg);
+        __ bne_long(AT, R0, *target);
+        break;
+
+      case 0x04: // greater_equal
+        __ sltu(AT, opr1_reg, opr2_reg);
+        __ beq_long(AT, R0, *target);
+        break;
+
+      case 0x05: // less
+        __ sltu(AT, opr1_reg, opr2_reg);
+        __ bne_long(AT, R0, *target);
+        break;
+
+      case 0x06: // less_equal
+        __ sltu(AT, opr2_reg, opr1_reg);
+        __ beq_long(AT, R0, *target);
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_alu_branch);
+%}
+
+//FIXME
+instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
+  match( If cmp (CmpF src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $src1$$FloatRegister;
+    FloatRegister reg_op2 = $src2$$FloatRegister;
+    Label* L = $labl$$label;
+    int flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: // equal
+        __ c_eq_s(reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      case 0x02: // not_equal
+        __ c_eq_s(reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x03: // greater
+        __ c_ule_s(reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x04: // greater_equal
+        __ c_ult_s(reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x05: // less
+        __ c_ult_s(reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      case 0x06: // less_equal
+        __ c_ule_s(reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_slow);
+%}
+
+instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
+  match( If cmp (CmpD src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $src1$$FloatRegister;
+    FloatRegister reg_op2 = $src2$$FloatRegister;
+    Label* L = $labl$$label;
+    int flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: // equal
+        __ c_eq_d(reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      case 0x02: // not_equal
+        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
+        __ c_eq_d(reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x03: // greater
+        __ c_ule_d(reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x04: // greater_equal
+        __ c_ult_d(reg_op1, reg_op2);
+        __ bc1f_long(*L);
+        break;
+      case 0x05: // less
+        __ c_ult_d(reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      case 0x06: // less_equal
+        __ c_ule_d(reg_op1, reg_op2);
+        __ bc1t_long(*L);
+        break;
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_slow);
+%}
+
+
+// ============================================================================
+// Branch Instructions -- short offset versions
+
+// Jump Direct
+instruct jmpDir_short(label labl) %{
+  match(Goto);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "JMP    $labl #@jmpDir_short" %}
+
+  ins_encode %{
+    Label &L = *($labl$$label);
+    if(&L)
+       __ b(L);
+    else
+       __ b(int(0));
+    __ delayed()->nop();
+  %}
+
+    ins_pipe( pipe_jump );
+    ins_pc_relative(1);
+    ins_short_branch(1);
+%}
+
+// Jump Direct Conditional - Label defines a relative address from Jcc+1
+instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
+  match(CountedLoopEnd cop (CmpI src1 src2));
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label     &L = *($labl$$label);
+    int     flag = $cop$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      case 0x03: //above
+        __ slt(AT, op2, op1);
+        if(&L)
+          __ bne(AT, R0, L);
+        else
+          __ bne(AT, R0, (int)0);
+        break;
+      case 0x04: //above_equal
+        __ slt(AT, op1, op2);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+      case 0x05: //below
+        __ slt(AT, op1, op2);
+        if(&L)
+          __ bne(AT, R0, L);
+        else
+          __ bne(AT, R0, (int)0);
+        break;
+      case 0x06: //below_equal
+        __ slt(AT, op2, op1);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
+  match(CountedLoopEnd cop (CmpI src1 src2));
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = AT;
+    Label     &L = *($labl$$label);
+    int     flag = $cop$$cmpcode;
+
+    __ move(op2, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      case 0x03: //above
+        __ slt(AT, op2, op1);
+        if(&L)
+          __ bne(AT, R0, L);
+        else
+          __ bne(AT, R0, (int)0);
+        break;
+      case 0x04: //above_equal
+        __ slt(AT, op1, op2);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+      case 0x05: //below
+        __ slt(AT, op1, op2);
+        if(&L)
+          __ bne(AT, R0, L);
+        else
+          __ bne(AT, R0, (int)0);
+        break;
+      case 0x06: //below_equal
+        __ slt(AT, op2, op1);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+
+// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
+instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{
+  match(If cop cr);
+  effect(USE labl);
+
+  ins_cost(300);
+  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %}
+
+  ins_encode %{
+    Label    &L =  *($labl$$label);
+    switch($cop$$cmpcode) {
+      case 0x01: //equal
+        if (&L)
+          __ bne($cr$$Register, R0, L);
+        else
+          __ bne($cr$$Register, R0, (int)0);
+        break;
+      case 0x02: //not equal
+        if (&L)
+          __ beq($cr$$Register, R0, L);
+        else
+          __ beq($cr$$Register, R0, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pipe( pipe_jump );
+  ins_pc_relative(1);
+  ins_short_branch(1);
+%}
+
+// Conditional jumps
+instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
+  match(If cmp (CmpP op1 zero));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_short" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = R0;
+    Label    &L  = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
+  match(If cmp (CmpP (DecodeN op1) zero));
+  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_short" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = R0;
+    Label    &L  = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag)
+    {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+
+instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
+  match(If cmp (CmpP op1 op2));
+//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
+  effect(USE labl);
+
+  ins_cost(200);
+  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
+
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = $op2$$Register;
+    Label    &L  = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      case 0x03: //above
+        __ sltu(AT, op2, op1);
+        if(&L)
+          __ bne(R0, AT, L);
+        else
+                __ bne(R0, AT, (int)0);
+        break;
+      case 0x04: //above_equal
+        __ sltu(AT, op1, op2);
+        if(&L)
+                 __ beq(AT, R0, L);
+        else
+                 __ beq(AT, R0, (int)0);
+        break;
+      case 0x05: //below
+        __ sltu(AT, op1, op2);
+        if(&L)
+           __ bne(R0, AT, L);
+        else
+           __ bne(R0, AT, (int)0);
+        break;
+      case 0x06: //below_equal
+        __ sltu(AT, op2, op1);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+       break;
+      default:
+          Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
+  match(If cmp (CmpN op1 null));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
+            "BP$cmp   $labl @ cmpN_null_branch_short" %}
+  ins_encode %{
+    Register op1 = $op1$$Register;
+    Register op2 = R0;
+    Label    &L  = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+    case 0x01: //equal
+      if (&L)
+        __ beq(op1, op2, L);
+      else
+        __ beq(op1, op2, (int)0);
+      break;
+    case 0x02: //not_equal
+      if (&L)
+        __ bne(op1, op2, L);
+      else
+        __ bne(op1, op2, (int)0);
+      break;
+    default:
+          Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+//TODO: pipe_branchP or create pipe_branchN LEE
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
+  match(If cmp (CmpN op1 op2));
+  effect(USE labl);
+
+  ins_cost(180);
+  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
+            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
+  ins_encode %{
+    Register op1_reg = $op1$$Register;
+    Register op2_reg = $op2$$Register;
+    Label    &L  = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+    case 0x01: //equal
+      if (&L)
+        __ beq(op1_reg, op2_reg, L);
+      else
+        __ beq(op1_reg, op2_reg, (int)0);
+      break;
+    case 0x02: //not_equal
+      if (&L)
+        __ bne(op1_reg, op2_reg, L);
+      else
+        __ bne(op1_reg, op2_reg, (int)0);
+      break;
+    case 0x03: //above
+      __ sltu(AT, op2_reg, op1_reg);
+      if(&L)
+        __ bne(R0, AT, L);
+      else
+        __ bne(R0, AT, (int)0);
+      break;
+    case 0x04: //above_equal
+      __ sltu(AT, op1_reg, op2_reg);
+      if(&L)
+        __ beq(AT, R0, L);
+      else
+        __ beq(AT, R0, (int)0);
+      break;
+    case 0x05: //below
+      __ sltu(AT, op1_reg, op2_reg);
+      if(&L)
+        __ bne(R0, AT, L);
+      else
+        __ bne(R0, AT, (int)0);
+      break;
+    case 0x06: //below_equal
+      __ sltu(AT, op2_reg, op1_reg);
+      if(&L)
+        __ beq(AT, R0, L);
+      else
+        __ beq(AT, R0, (int)0);
+      break;
+    default:
+      Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
+  match( If cmp (CmpU src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label     &L = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      case 0x03: //above
+        __ sltu(AT, op2, op1);
+        if(&L)
+          __ bne(AT, R0, L);
+        else
+                __ bne(AT, R0, (int)0);
+        break;
+      case 0x04: //above_equal
+        __ sltu(AT, op1, op2);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+                __ beq(AT, R0, (int)0);
+        break;
+      case 0x05: //below
+        __ sltu(AT, op1, op2);
+        if(&L)
+           __ bne(AT, R0, L);
+        else
+           __ bne(AT, R0, (int)0);
+        break;
+      case 0x06: //below_equal
+        __ sltu(AT, op2, op1);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+
+instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
+  match( If cmp (CmpU src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    int      val = $src2$$constant;
+    Label     &L = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    __ move(AT, val);
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, AT, L);
+        else
+          __ beq(op1, AT, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, AT, L);
+        else
+          __ bne(op1, AT, (int)0);
+        break;
+      case 0x03: //above
+        __ sltu(AT, AT, op1);
+        if(&L)
+          __ bne(R0, AT, L);
+        else
+                __ bne(R0, AT, (int)0);
+        break;
+      case 0x04: //above_equal
+        __ sltu(AT, op1, AT);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+                __ beq(AT, R0, (int)0);
+        break;
+      case 0x05: //below
+        __ sltu(AT, op1, AT);
+        if(&L)
+           __ bne(R0, AT, L);
+        else
+           __ bne(R0, AT, (int)0);
+        break;
+      case 0x06: //below_equal
+        __ sltu(AT, AT, op1);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+       break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Register op2 = $src2$$Register;
+    Label     &L = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, op2, L);
+        else
+          __ beq(op1, op2, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, op2, L);
+        else
+          __ bne(op1, op2, (int)0);
+        break;
+      case 0x03: //above
+        __ slt(AT, op2, op1);
+        if(&L)
+          __ bne(R0, AT, L);
+        else
+                __ bne(R0, AT, (int)0);
+        break;
+      case 0x04: //above_equal
+        __ slt(AT, op1, op2);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+                __ beq(AT, R0, (int)0);
+        break;
+      case 0x05: //below
+        __ slt(AT, op1, op2);
+        if(&L)
+           __ bne(R0, AT, L);
+        else
+           __ bne(R0, AT, (int)0);
+        break;
+      case 0x06: //below_equal
+        __ slt(AT, op2, op1);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+       break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  ins_cost(170);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Label     &L =  *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, R0, L);
+        else
+          __ beq(op1, R0, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, R0, L);
+        else
+          __ bne(op1, R0, (int)0);
+        break;
+      case 0x03: //greater
+        if(&L)
+               __ bgtz(op1, L);
+        else
+               __ bgtz(op1, (int)0);
+        break;
+      case 0x04: //greater_equal
+        if(&L)
+               __ bgez(op1, L);
+        else
+               __ bgez(op1, (int)0);
+        break;
+      case 0x05: //less
+        if(&L)
+                __ bltz(op1, L);
+        else
+                __ bltz(op1, (int)0);
+        break;
+      case 0x06: //less_equal
+        if(&L)
+               __ blez(op1, L);
+        else
+               __ blez(op1, (int)0);
+       break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+
+instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
+  match( If cmp (CmpI src1 src2) );
+  effect(USE labl);
+  ins_cost(200);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    int      val = $src2$$constant;
+    Label     &L =  *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    __ move(AT, val);
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, AT, L);
+        else
+          __ beq(op1, AT, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, AT, L);
+        else
+          __ bne(op1, AT, (int)0);
+        break;
+      case 0x03: //greater
+        __ slt(AT, AT, op1);
+        if(&L)
+          __ bne(R0, AT, L);
+        else
+                __ bne(R0, AT, (int)0);
+        break;
+      case 0x04: //greater_equal
+        __ slt(AT, op1, AT);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+                __ beq(AT, R0, (int)0);
+        break;
+      case 0x05: //less
+        __ slt(AT, op1, AT);
+        if(&L)
+           __ bne(R0, AT, L);
+        else
+           __ bne(R0, AT, (int)0);
+        break;
+      case 0x06: //less_equal
+        __ slt(AT, AT, op1);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+       break;
+      default:
+          Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
+  match( If cmp (CmpU src1 zero) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    Label     &L = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&L)
+          __ beq(op1, R0, L);
+        else
+          __ beq(op1, R0, (int)0);
+        break;
+      case 0x02: //not_equal
+        if (&L)
+          __ bne(op1, R0, L);
+        else
+          __ bne(op1, R0, (int)0);
+        break;
+      case 0x03: //above
+        if(&L)
+          __ bne(R0, op1, L);
+        else
+          __ bne(R0, op1, (int)0);
+        break;
+      case 0x04: //above_equal
+        if(&L)
+          __ beq(R0, R0, L);
+        else
+          __ beq(R0, R0, (int)0);
+        break;
+      case 0x05: //below
+        return;
+        break;
+      case 0x06: //below_equal
+        if(&L)
+          __ beq(op1, R0, L);
+        else
+          __ beq(op1, R0, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+    %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+
+instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
+  match( If cmp (CmpU src1 src2) );
+  effect(USE labl);
+  ins_cost(180);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_short" %}
+
+  ins_encode %{
+    Register op1 = $src1$$Register;
+    int      val = $src2$$constant;
+    Label     &L = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        __ move(AT, val);
+        if (&L)
+          __ beq(op1, AT, L);
+        else
+          __ beq(op1, AT, (int)0);
+        break;
+      case 0x02: //not_equal
+        __ move(AT, val);
+        if (&L)
+          __ bne(op1, AT, L);
+        else
+          __ bne(op1, AT, (int)0);
+        break;
+      case 0x03: //above
+        __ move(AT, val);
+        __ sltu(AT, AT, op1);
+        if(&L)
+          __ bne(R0, AT, L);
+        else
+          __ bne(R0, AT, (int)0);
+        break;
+      case 0x04: //above_equal
+        __ sltiu(AT, op1, val);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+      case 0x05: //below
+        __ sltiu(AT, op1, val);
+        if(&L)
+          __ bne(R0, AT, L);
+        else
+          __ bne(R0, AT, (int)0);
+        break;
+      case 0x06: //below_equal
+        __ move(AT, val);
+        __ sltu(AT, AT, op1);
+        if(&L)
+          __ beq(AT, R0, L);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+
+instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
+  match( If cmp (CmpL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
+  ins_cost(250);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = as_Register($src2$$reg);
+
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&target)
+          __ beq(opr1_reg, opr2_reg, target);
+        else
+          __ beq(opr1_reg, opr2_reg, (int)0);
+        __ delayed()->nop();
+        break;
+
+      case 0x02: //not_equal
+        if(&target)
+          __ bne(opr1_reg, opr2_reg, target);
+        else
+          __ bne(opr1_reg, opr2_reg, (int)0);
+        __ delayed()->nop();
+        break;
+
+      case 0x03: //greater
+        __ slt(AT, opr2_reg, opr1_reg);
+        if(&target)
+          __ bne(AT, R0, target);
+        else
+          __ bne(AT, R0, (int)0);
+        __ delayed()->nop();
+        break;
+
+      case 0x04: //greater_equal
+        __ slt(AT, opr1_reg, opr2_reg);
+        if(&target)
+          __ beq(AT, R0, target);
+        else
+          __ beq(AT, R0, (int)0);
+        __ delayed()->nop();
+
+        break;
+
+      case 0x05: //less
+        __ slt(AT, opr1_reg, opr2_reg);
+        if(&target)
+          __ bne(AT, R0, target);
+        else
+          __ bne(AT, R0, (int)0);
+        __ delayed()->nop();
+
+        break;
+
+      case 0x06: //less_equal
+        __ slt(AT, opr2_reg, opr1_reg);
+
+        if(&target)
+          __ beq(AT, R0, target);
+        else
+          __ beq(AT, R0, (int)0);
+        __ delayed()->nop();
+
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
+  match( If cmp (CmpUL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
+  ins_cost(250);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = as_Register($src2$$reg);
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: // equal
+        if (&target)
+          __ beq(opr1_reg, opr2_reg, target);
+        else
+          __ beq(opr1_reg, opr2_reg, (int)0);
+        __ delayed()->nop();
+        break;
+
+      case 0x02: // not_equal
+        if(&target)
+          __ bne(opr1_reg, opr2_reg, target);
+        else
+          __ bne(opr1_reg, opr2_reg, (int)0);
+        __ delayed()->nop();
+        break;
+
+      case 0x03: // greater
+        __ sltu(AT, opr2_reg, opr1_reg);
+        if(&target)
+          __ bne(AT, R0, target);
+        else
+          __ bne(AT, R0, (int)0);
+        __ delayed()->nop();
+        break;
+
+      case 0x04: // greater_equal
+        __ sltu(AT, opr1_reg, opr2_reg);
+        if(&target)
+          __ beq(AT, R0, target);
+        else
+          __ beq(AT, R0, (int)0);
+        __ delayed()->nop();
+        break;
+
+      case 0x05: // less
+        __ sltu(AT, opr1_reg, opr2_reg);
+        if(&target)
+          __ bne(AT, R0, target);
+        else
+          __ bne(AT, R0, (int)0);
+        __ delayed()->nop();
+        break;
+
+      case 0x06: // less_equal
+        __ sltu(AT, opr2_reg, opr1_reg);
+        if(&target)
+          __ beq(AT, R0, target);
+        else
+          __ beq(AT, R0, (int)0);
+        __ delayed()->nop();
+        break;
+
+      default:
+        Unimplemented();
+    }
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_alu_branch);
+  ins_short_branch(1);
+%}
+
+instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
+  match( If cmp (CmpL src1 zero) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_short" %}
+  ins_cost(150);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&target)
+           __ beq(opr1_reg, R0, target);
+        else
+           __ beq(opr1_reg, R0, int(0));
+        break;
+
+      case 0x02: //not_equal
+        if(&target)
+           __ bne(opr1_reg, R0, target);
+        else
+           __ bne(opr1_reg, R0, (int)0);
+        break;
+
+      case 0x03: //greater
+        if(&target)
+           __ bgtz(opr1_reg, target);
+        else
+           __ bgtz(opr1_reg, (int)0);
+       break;
+
+      case 0x04: //greater_equal
+        if(&target)
+           __ bgez(opr1_reg, target);
+        else
+           __ bgez(opr1_reg, (int)0);
+        break;
+
+      case 0x05: //less
+        __ slt(AT, opr1_reg, R0);
+        if(&target)
+           __ bne(AT, R0, target);
+        else
+           __ bne(AT, R0, (int)0);
+        break;
+
+      case 0x06: //less_equal
+        if (&target)
+           __ blez(opr1_reg, target);
+        else
+           __ blez(opr1_reg, int(0));
+        break;
+
+      default:
+          Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
+  match(If cmp (CmpUL src1 zero));
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_short" %}
+  ins_cost(150);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: // equal
+      case 0x04: // greater_equal
+      case 0x06: // less_equal
+        if (&target)
+           __ beq(opr1_reg, R0, target);
+        else
+           __ beq(opr1_reg, R0, int(0));
+        break;
+
+      case 0x02: // not_equal
+      case 0x03: // greater
+        if(&target)
+           __ bne(opr1_reg, R0, target);
+        else
+           __ bne(opr1_reg, R0, (int)0);
+        break;
+
+      case 0x05: // less
+        if(&target)
+           __ beq(R0, R0, target);
+        else
+           __ beq(R0, R0, (int)0);
+        break;
+
+      default:
+          Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_alu_branch);
+  ins_short_branch(1);
+%}
+
+instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
+  match( If cmp (CmpL src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
+  ins_cost(180);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = AT;
+
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    __ set64(opr2_reg, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: //equal
+        if (&target)
+          __ beq(opr1_reg, opr2_reg, target);
+        else
+          __ beq(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x02: //not_equal
+        if(&target)
+          __ bne(opr1_reg, opr2_reg, target);
+        else
+          __ bne(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x03: //greater
+        __ slt(AT, opr2_reg, opr1_reg);
+        if(&target)
+          __ bne(AT, R0, target);
+        else
+          __ bne(AT, R0, (int)0);
+        break;
+
+      case 0x04: //greater_equal
+        __ slt(AT, opr1_reg, opr2_reg);
+        if(&target)
+          __ beq(AT, R0, target);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+
+      case 0x05: //less
+        __ slt(AT, opr1_reg, opr2_reg);
+        if(&target)
+          __ bne(AT, R0, target);
+        else
+          __ bne(AT, R0, (int)0);
+        break;
+
+      case 0x06: //less_equal
+        __ slt(AT, opr2_reg, opr1_reg);
+        if(&target)
+          __ beq(AT, R0, target);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+
+  ins_pc_relative(1);
+  ins_pipe( pipe_alu_branch );
+  ins_short_branch(1);
+%}
+
+instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
+  match(If cmp (CmpUL src1 src2));
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
+  ins_cost(180);
+
+  ins_encode %{
+    Register opr1_reg = as_Register($src1$$reg);
+    Register opr2_reg = AT;
+    Label   &target = *($labl$$label);
+    int     flag = $cmp$$cmpcode;
+
+    __ set64(opr2_reg, $src2$$constant);
+
+    switch(flag) {
+      case 0x01: // equal
+        if (&target)
+          __ beq(opr1_reg, opr2_reg, target);
+        else
+          __ beq(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x02: // not_equal
+        if(&target)
+          __ bne(opr1_reg, opr2_reg, target);
+        else
+          __ bne(opr1_reg, opr2_reg, (int)0);
+        break;
+
+      case 0x03: // greater
+        __ sltu(AT, opr2_reg, opr1_reg);
+        if(&target)
+          __ bne(AT, R0, target);
+        else
+          __ bne(AT, R0, (int)0);
+        break;
+
+      case 0x04: // greater_equal
+        __ sltu(AT, opr1_reg, opr2_reg);
+        if(&target)
+          __ beq(AT, R0, target);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+
+      case 0x05: // less
+        __ sltu(AT, opr1_reg, opr2_reg);
+        if(&target)
+          __ bne(AT, R0, target);
+        else
+          __ bne(AT, R0, (int)0);
+        break;
+
+      case 0x06: // less_equal
+        __ sltu(AT, opr2_reg, opr1_reg);
+        if(&target)
+          __ beq(AT, R0, target);
+        else
+          __ beq(AT, R0, (int)0);
+        break;
+
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_alu_branch);
+  ins_short_branch(1);
+%}
+
+//FIXME
+instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
+  match( If cmp (CmpF src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $src1$$FloatRegister;
+    FloatRegister reg_op2 = $src2$$FloatRegister;
+    Label& L = *($labl$$label);
+    int flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: // equal
+        __ c_eq_s(reg_op1, reg_op2);
+        if (&L)
+          __ bc1t(L);
+        else
+          __ bc1t((int)0);
+        break;
+      case 0x02: // not_equal
+        __ c_eq_s(reg_op1, reg_op2);
+        if (&L)
+          __ bc1f(L);
+        else
+          __ bc1f((int)0);
+        break;
+      case 0x03: // greater
+        __ c_ule_s(reg_op1, reg_op2);
+        if(&L)
+          __ bc1f(L);
+        else
+          __ bc1f((int)0);
+        break;
+      case 0x04: // greater_equal
+        __ c_ult_s(reg_op1, reg_op2);
+        if(&L)
+          __ bc1f(L);
+        else
+          __ bc1f((int)0);
+        break;
+      case 0x05: // less
+        __ c_ult_s(reg_op1, reg_op2);
+        if(&L)
+          __ bc1t(L);
+        else
+          __ bc1t((int)0);
+        break;
+      case 0x06: // less_equal
+        __ c_ule_s(reg_op1, reg_op2);
+        if(&L)
+          __ bc1t(L);
+        else
+          __ bc1t((int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_fpu_branch);
+  ins_short_branch(1);
+%}
+
+instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
+  match( If cmp (CmpD src1 src2) );
+  effect(USE labl);
+  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $src1$$FloatRegister;
+    FloatRegister reg_op2 = $src2$$FloatRegister;
+    Label& L = *($labl$$label);
+    int flag = $cmp$$cmpcode;
+
+    switch(flag) {
+      case 0x01: // equal
+        __ c_eq_d(reg_op1, reg_op2);
+        if (&L)
+          __ bc1t(L);
+        else
+          __ bc1t((int)0);
+        break;
+      case 0x02: // not_equal
+        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
+        __ c_eq_d(reg_op1, reg_op2);
+        if (&L)
+          __ bc1f(L);
+        else
+          __ bc1f((int)0);
+        break;
+      case 0x03: // greater
+        __ c_ule_d(reg_op1, reg_op2);
+        if(&L)
+          __ bc1f(L);
+        else
+          __ bc1f((int)0);
+        break;
+      case 0x04: // greater_equal
+        __ c_ult_d(reg_op1, reg_op2);
+        if(&L)
+          __ bc1f(L);
+        else
+          __ bc1f((int)0);
+        break;
+      case 0x05: // less
+        __ c_ult_d(reg_op1, reg_op2);
+        if(&L)
+          __ bc1t(L);
+        else
+          __ bc1t((int)0);
+        break;
+      case 0x06: // less_equal
+        __ c_ule_d(reg_op1, reg_op2);
+        if(&L)
+          __ bc1t(L);
+        else
+          __ bc1t((int)0);
+        break;
+      default:
+        Unimplemented();
+    }
+    __ delayed()->nop();
+  %}
+
+  ins_pc_relative(1);
+  ins_pipe(pipe_fpu_branch);
+  ins_short_branch(1);
+%}
+
+// =================== End of branch instructions ==========================
+
+// Call Runtime Instruction
+instruct CallRuntimeDirect(method meth) %{
+  match(CallRuntime );
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL,runtime #@CallRuntimeDirect" %}
+  ins_encode( Java_To_Runtime( meth ) );
+  ins_pipe( pipe_slow );
+  ins_alignment(16);
+%}
+
+
+
+//------------------------MemBar Instructions-------------------------------
+//Memory barrier flavors
+
+instruct membar_acquire() %{
+  match(MemBarAcquire);
+  ins_cost(400);
+
+  format %{ "MEMBAR-acquire @ membar_acquire" %}
+  ins_encode %{
+    __ sync();
+  %}
+  ins_pipe(empty);
+%}
+
+instruct load_fence() %{
+  match(LoadFence);
+  ins_cost(400);
+
+  format %{ "MEMBAR @ load_fence" %}
+  ins_encode %{
+    __ sync();
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct membar_acquire_lock()
+%{
+  match(MemBarAcquireLock);
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
+  ins_encode();
+  ins_pipe(empty);
+%}
+
+instruct membar_release() %{
+  match(MemBarRelease);
+  ins_cost(400);
+
+  format %{ "MEMBAR-release @ membar_release" %}
+
+  ins_encode %{
+    // Attention: DO NOT DELETE THIS GUY!
+    __ sync();
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct store_fence() %{
+  match(StoreFence);
+  ins_cost(400);
+
+  format %{ "MEMBAR @ store_fence" %}
+
+  ins_encode %{
+    __ sync();
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct membar_release_lock()
+%{
+  match(MemBarReleaseLock);
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
+  ins_encode();
+  ins_pipe(empty);
+%}
+
+
+instruct membar_volatile() %{
+  match(MemBarVolatile);
+  ins_cost(400);
+
+  format %{ "MEMBAR-volatile" %}
+  ins_encode %{
+    if( !os::is_MP() ) return;     // Not needed on single CPU
+    __ sync();
+
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct unnecessary_membar_volatile() %{
+  match(MemBarVolatile);
+  predicate(Matcher::post_store_load_barrier(n));
+  ins_cost(0);
+
+  size(0);
+  format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %}
+  ins_encode( );
+  ins_pipe(empty);
+%}
+
+instruct membar_storestore() %{
+  match(MemBarStoreStore);
+
+  ins_cost(400);
+  format %{ "MEMBAR-storestore @ membar_storestore" %}
+  ins_encode %{
+    __ sync();
+  %}
+  ins_pipe(empty);
+%}
+
+//----------Move Instructions--------------------------------------------------
+instruct castX2P(mRegP dst, mRegL src) %{
+  match(Set dst (CastX2P src));
+  format %{ "castX2P  $dst, $src @ castX2P" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+  if(src != dst)
+    __ move(dst, src);
+  %}
+  ins_cost(10);
+  ins_pipe( ialu_regI_mov );
+%}
+
+instruct castP2X(mRegL dst, mRegP src ) %{
+  match(Set dst (CastP2X src));
+
+  format %{ "mov    $dst, $src\t  #@castP2X" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+  if(src != dst)
+    __ move(dst, src);
+  %}
+  ins_pipe( ialu_regI_mov );
+%}
+
+instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
+  match(Set dst (MoveF2I src));
+  effect(DEF dst, USE src);
+  ins_cost(85);
+  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+
+    __ mfc1(dst, src);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
+  match(Set dst (MoveI2F src));
+  effect(DEF dst, USE src);
+  ins_cost(85);
+  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
+  ins_encode %{
+    Register src = as_Register($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ mtc1(src, dst);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
+  match(Set dst (MoveD2L src));
+  effect(DEF dst, USE src);
+  ins_cost(85);
+  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+
+    __ dmfc1(dst, src);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
+  match(Set dst (MoveL2D src));
+  effect(DEF dst, USE src);
+  ins_cost(85);
+  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
+  ins_encode %{
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    __ dmtc1(src, dst);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+//----------Conditional Move---------------------------------------------------
+// Conditional move
+instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovI_cmpI_reg_reg"
+         %}
+
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
+         %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $tmp1$$FloatRegister;
+    FloatRegister reg_op2 = $tmp2$$FloatRegister;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
+         %}
+  ins_encode %{
+    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
+    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+    int flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst     = $dst$$Register;
+    Register src     = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
+  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = $dst$$Register;
+    Register src  = $src$$Register;
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst     = $dst$$Register;
+    Register src     = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
+  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = $dst$$Register;
+    Register src  = $src$$Register;
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
+         %}
+  ins_encode %{
+    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
+    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+    int flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
+  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = $dst$$Register;
+    Register src  = $src$$Register;
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
+  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = $dst$$Register;
+    Register src  = $src$$Register;
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
+         %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $tmp1$$FloatRegister;
+    FloatRegister reg_op2 = $tmp2$$FloatRegister;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
+         %}
+
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovL_cmpL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = as_Register($dst$$reg);
+    Register src  = as_Register($src$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
+  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovL_cmpUL_reg_reg"
+         %}
+  ins_encode %{
+    Register opr1 = as_Register($tmp1$$reg);
+    Register opr2 = as_Register($tmp2$$reg);
+    Register dst  = as_Register($dst$$reg);
+    Register src  = as_Register($src$$reg);
+    int     flag  = $cop$$cmpcode;
+
+    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
+             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
+         %}
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
+         %}
+  ins_encode %{
+    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
+    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+    int flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
+  ins_cost(200);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
+         %}
+  ins_encode %{
+    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
+    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+    int flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  ins_cost(200);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
+             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
+         %}
+
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
+  ins_cost(200);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
+             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
+         %}
+
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
+  ins_cost(200);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
+             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
+         %}
+
+  ins_encode %{
+    Register op1 = $tmp1$$Register;
+    Register op2 = $tmp2$$Register;
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+    int     flag = $cop$$cmpcode;
+
+    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+//FIXME
+instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
+  ins_cost(80);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
+         %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $tmp1$$FloatRegister;
+    FloatRegister reg_op2 = $tmp2$$FloatRegister;
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
+  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
+  ins_cost(200);
+  format %{
+             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
+             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
+         %}
+
+  ins_encode %{
+    FloatRegister reg_op1 = $tmp1$$FloatRegister;
+    FloatRegister reg_op2 = $tmp2$$FloatRegister;
+    FloatRegister dst = $dst$$FloatRegister;
+    FloatRegister src = $src$$FloatRegister;
+    int flag = $cop$$cmpcode;
+
+    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Manifest a CmpL result in an integer register.  Very painful.
+// This is the test to avoid.
+instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
+  match(Set dst (CmpL3 src1 src2));
+  ins_cost(1000);
+  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
+  ins_encode %{
+    Register opr1 = as_Register($src1$$reg);
+    Register opr2 = as_Register($src2$$reg);
+    Register dst  = as_Register($dst$$reg);
+
+    Label Done;
+
+    __ subu(AT, opr1, opr2);
+    __ bltz(AT, Done);
+    __ delayed()->daddiu(dst, R0, -1);
+
+    __ move(dst, 1);
+    __ movz(dst, R0, AT);
+
+    __ bind(Done);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+//
+// less_rsult     = -1
+// greater_result =  1
+// equal_result   =  0
+// nan_result     = -1
+//
+instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
+  match(Set dst (CmpF3 src1 src2));
+  ins_cost(1000);
+  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    Register dst = as_Register($dst$$reg);
+
+    Label Done;
+
+    __ c_ult_s(src1, src2);
+    __ bc1t(Done);
+    __ delayed()->daddiu(dst, R0, -1);
+
+    __ c_eq_s(src1, src2);
+    __ move(dst, 1);
+    __ movt(dst, R0);
+
+    __ bind(Done);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
+  match(Set dst (CmpD3 src1 src2));
+  ins_cost(1000);
+  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    Register dst = as_Register($dst$$reg);
+
+    Label Done;
+
+    __ c_ult_d(src1, src2);
+    __ bc1t(Done);
+    __ delayed()->daddiu(dst, R0, -1);
+
+    __ c_eq_d(src1, src2);
+    __ move(dst, 1);
+    __ movt(dst, R0);
+
+    __ bind(Done);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{
+  match(Set dummy (ClearArray cnt base));
+  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
+  ins_encode %{
+    //Assume cnt is the number of bytes in an array to be cleared,
+    //and base points to the starting address of the array.
+    Register base = $base$$Register;
+    Register num  = $cnt$$Register;
+    Label Loop, done;
+
+    __ beq(num, R0, done);
+    __ delayed()->daddu(AT, base, R0);
+
+    __ move(T9, num);  /* T9 = words */
+
+    __ bind(Loop);
+    __ sd(R0, AT, 0);
+    __ daddiu(T9, T9, -1);
+    __ bne(T9, R0, Loop);
+    __ delayed()->daddiu(AT, AT, wordSize);
+
+    __ bind(done);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_compare(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
+
+  format %{ "String Compare $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compare" %}
+  ins_encode %{
+    // Get the first character position in both strings
+    //         [8] char array, [12] offset, [16] count
+    Register str1   = $str1$$Register;
+    Register str2   = $str2$$Register;
+    Register cnt1   = $cnt1$$Register;
+    Register cnt2   = $cnt2$$Register;
+    Register result = $result$$Register;
+
+    Label L, Loop, haveResult, done;
+
+   // compute the and difference of lengths (in result)
+   __ subu(result, cnt1, cnt2); // result holds the difference of two lengths
+
+   // compute the shorter length (in cnt1)
+   __ slt(AT, cnt2, cnt1);
+   __ movn(cnt1, cnt2, AT);
+
+   // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
+   __ bind(Loop);                        // Loop begin
+   __ beq(cnt1, R0, done);
+   __ delayed()->nop();
+   __ lhu(AT, str1, 0);
+
+   // compare current character
+   __ lhu(cnt2, str2, 0);
+   __ bne(AT, cnt2, haveResult);
+   __ delayed()->addiu(str1, str1, 2);
+   __ addiu(str2, str2, 2);
+   __ b(Loop);
+   __ delayed()->addiu(cnt1, cnt1, -1);   // Loop end
+
+   __ bind(haveResult);
+   __ subu(result, AT, cnt2);
+
+   __ bind(done);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+// intrinsic optimization
+instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp);
+
+  format %{ "String Equal $str1, $str2, len:$cnt  tmp:$temp -> $result @ string_equals" %}
+  ins_encode %{
+    // Get the first character position in both strings
+    //         [8] char array, [12] offset, [16] count
+    Register str1   = $str1$$Register;
+    Register str2   = $str2$$Register;
+    Register cnt    = $cnt$$Register;
+    Register tmp    = $temp$$Register;
+    Register result = $result$$Register;
+
+    Label Loop, True, False;
+
+    __ beq(str1, str2, True);  // same char[] ?
+    __ delayed()->daddiu(result, R0, 1);
+
+    __ beq(cnt, R0, True);
+    __ delayed()->nop(); // count == 0
+
+    __ bind(Loop);
+
+    // compare current character
+    __ lhu(AT, str1, 0);
+    __ lhu(tmp, str2, 0);
+    __ bne(AT, tmp, False);
+    __ delayed()->addiu(str1, str1, 2);
+    __ addiu(cnt, cnt, -1);
+    __ bne(cnt, R0, Loop);
+    __ delayed()->addiu(str2, str2, 2);
+
+    __ b(True);
+    __ delayed()->nop();
+
+    __ bind(False);
+    __ daddiu(result, R0, 0);
+
+    __ bind(True);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+//----------Arithmetic Instructions-------------------------------------------
+//----------Addition Instructions---------------------------------------------
+instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (AddI src1 src2));
+
+  format %{ "addu   $dst, $src1, $src2 #@addI_Reg_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ addu32(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct addI_Reg_imm(mRegI dst, mRegI src1,  immI src2) %{
+  match(Set dst (AddI src1 src2));
+
+  format %{ "addu    $dst, $src1, $src2 #@addI_Reg_imm" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    int       imm = $src2$$constant;
+
+    if(Assembler::is_simm16(imm)) {
+       __ addiu32(dst, src1, imm);
+    } else {
+       __ move(AT, imm);
+       __ addu32(dst, src1, AT);
+    }
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{
+  match(Set dst (AddP src1 src2));
+
+  format %{ "daddu    $dst, $src1, $src2 #@addP_reg_reg" %}
+
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ daddu(dst, src1, src2);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{
+  match(Set dst (AddP src1 (ConvI2L src2)));
+
+  format %{ "daddu    $dst, $src1, $src2 #@addP_reg_reg_convI2L" %}
+
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ daddu(dst, src1, src2);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct addP_reg_imm(mRegP dst, mRegP src1,  immL src2) %{
+  match(Set dst (AddP src1 src2));
+
+  format %{ "daddiu   $dst, $src1, $src2 #@addP_reg_imm" %}
+  ins_encode %{
+    Register src1 = $src1$$Register;
+    long      src2 = $src2$$constant;
+    Register  dst = $dst$$Register;
+
+    if(Assembler::is_simm16(src2)) {
+       __ daddiu(dst, src1, src2);
+    } else {
+       __ set64(AT, src2);
+       __ daddu(dst, src1, AT);
+    }
+  %}
+  ins_pipe( ialu_regI_imm16 );
+%}
+
+// Add Long Register with Register
+instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (AddL src1 src2));
+  ins_cost(200);
+  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
+
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+
+    __ daddu(dst_reg, src1_reg, src2_reg);
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2)
+%{
+  match(Set dst (AddL src1 src2));
+
+  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
+  ins_encode %{
+    Register dst_reg  = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    int      src2_imm = $src2$$constant;
+
+    __ daddiu(dst_reg, src1_reg, src2_imm);
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2)
+%{
+  match(Set dst (AddL (ConvI2L src1) src2));
+
+  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_imm " %}
+  ins_encode %{
+    Register dst_reg  = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    int      src2_imm = $src2$$constant;
+
+    __ daddiu(dst_reg, src1_reg, src2_imm);
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
+  match(Set dst (AddL (ConvI2L src1) src2));
+  ins_cost(200);
+  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %}
+
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+
+    __ daddu(dst_reg, src1_reg, src2_reg);
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
+  match(Set dst (AddL (ConvI2L src1) (ConvI2L src2)));
+  ins_cost(200);
+  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %}
+
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+
+    __ daddu(dst_reg, src1_reg, src2_reg);
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
+  match(Set dst (AddL src1 (ConvI2L src2)));
+  ins_cost(200);
+  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %}
+
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+
+    __ daddu(dst_reg, src1_reg, src2_reg);
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+//----------Subtraction Instructions-------------------------------------------
+// Integer Subtraction Instructions
+instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (SubI src1 src2));
+  ins_cost(100);
+
+  format %{ "subu    $dst, $src1, $src2 #@subI_Reg_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ subu32(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1,  immI_M32767_32768 src2) %{
+  match(Set dst (SubI src1 src2));
+  ins_cost(80);
+
+  format %{ "subu    $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    __ addiu32(dst, src1, -1 * $src2$$constant);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct negI_Reg(mRegI dst, immI_0 zero,  mRegI src) %{
+  match(Set dst (SubI zero src));
+  ins_cost(80);
+
+  format %{ "neg    $dst, $src #@negI_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register  src = $src$$Register;
+    __ subu32(dst, R0, src);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct negL_Reg(mRegL dst, immL_0 zero,  mRegL src) %{
+  match(Set dst (SubL zero src));
+  ins_cost(80);
+
+  format %{ "neg    $dst, $src #@negL_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register  src = $src$$Register;
+    __ subu(dst, R0, src);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1,  immL_M32767_32768 src2) %{
+  match(Set dst (SubL src1 src2));
+  ins_cost(80);
+
+  format %{ "subu    $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    __ daddiu(dst, src1, -1 * $src2$$constant);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Subtract Long Register with Register.
+instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (SubL src1 src2));
+  ins_cost(100);
+  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
+  ins_encode %{
+    Register dst  = as_Register($dst$$reg);
+    Register src1 = as_Register($src1$$reg);
+    Register src2 = as_Register($src2$$reg);
+
+    __ subu(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
+  match(Set dst (SubL src1 (ConvI2L src2)));
+  ins_cost(100);
+  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_RegI2L" %}
+  ins_encode %{
+    Register dst  = as_Register($dst$$reg);
+    Register src1 = as_Register($src1$$reg);
+    Register src2 = as_Register($src2$$reg);
+
+    __ subu(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
+  match(Set dst (SubL (ConvI2L src1) src2));
+  ins_cost(200);
+  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_Reg" %}
+  ins_encode %{
+    Register dst  = as_Register($dst$$reg);
+    Register src1 = as_Register($src1$$reg);
+    Register src2 = as_Register($src2$$reg);
+
+    __ subu(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
+  match(Set dst (SubL (ConvI2L src1) (ConvI2L src2)));
+  ins_cost(200);
+  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %}
+  ins_encode %{
+    Register dst  = as_Register($dst$$reg);
+    Register src1 = as_Register($src1$$reg);
+    Register src2 = as_Register($src2$$reg);
+
+    __ subu(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Integer MOD with Register
+instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (ModI src1 src2));
+  ins_cost(300);
+  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    //if (UseLEXT1) {
+    if (0) {
+      // Experiments show that gsmod is slower that div+mfhi.
+      // So I just disable it here.
+      __ gsmod(dst, src1, src2);
+    } else {
+      __ div(src1, src2);
+      __ mfhi(dst);
+    }
+  %}
+
+  //ins_pipe( ialu_mod );
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (ModL src1 src2));
+  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
+
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register op1 = as_Register($src1$$reg);
+    Register op2 = as_Register($src2$$reg);
+
+    if (UseLEXT1) {
+      __ gsdmod(dst, op1, op2);
+    } else {
+      __ ddiv(op1, op2);
+      __ mfhi(dst);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (MulI src1 src2));
+
+  ins_cost(300);
+  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
+  ins_encode %{
+     Register src1 = $src1$$Register;
+     Register src2 = $src2$$Register;
+     Register dst  = $dst$$Register;
+
+     __ mul(dst, src1, src2);
+  %}
+  ins_pipe( ialu_mult );
+%}
+
+instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{
+  match(Set dst (AddI (MulI src1 src2) src3));
+
+  ins_cost(999);
+  format %{ "madd   $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %}
+  ins_encode %{
+     Register src1 = $src1$$Register;
+     Register src2 = $src2$$Register;
+     Register src3 = $src3$$Register;
+     Register dst  = $dst$$Register;
+
+     __ mtlo(src3);
+     __ madd(src1, src2);
+     __ mflo(dst);
+  %}
+  ins_pipe( ialu_mult );
+%}
+
+instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (DivI src1 src2));
+
+  ins_cost(300);
+  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
+  ins_encode %{
+     Register src1 = $src1$$Register;
+     Register src2 = $src2$$Register;
+     Register dst  = $dst$$Register;
+
+    // In MIPS, div does not cause exception.
+    //   We must trap an exception manually.
+    __ teq(R0, src2, 0x7);
+
+    if (UseLEXT1) {
+      __ gsdiv(dst, src1, src2);
+    } else {
+      __ div(src1, src2);
+
+      __ nop();
+      __ nop();
+      __ mflo(dst);
+    }
+  %}
+  ins_pipe( ialu_mod );
+%}
+
+instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (DivF src1 src2));
+
+  ins_cost(300);
+  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
+  ins_encode %{
+     FloatRegister src1 = $src1$$FloatRegister;
+     FloatRegister src2 = $src2$$FloatRegister;
+     FloatRegister dst  = $dst$$FloatRegister;
+
+    /* Here do we need to trap an exception manually ? */
+    __ div_s(dst, src1, src2);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (DivD src1 src2));
+
+  ins_cost(300);
+  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
+  ins_encode %{
+     FloatRegister src1 = $src1$$FloatRegister;
+     FloatRegister src2 = $src2$$FloatRegister;
+     FloatRegister dst  = $dst$$FloatRegister;
+
+    /* Here do we need to trap an exception manually ? */
+    __ div_d(dst, src1, src2);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (MulL src1 src2));
+  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register op1 = as_Register($src1$$reg);
+    Register op2 = as_Register($src2$$reg);
+
+    if (UseLEXT1) {
+      __ gsdmult(dst, op1, op2);
+    } else {
+      __ dmult(op1, op2);
+      __ mflo(dst);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{
+  match(Set dst (MulL src1 (ConvI2L src2)));
+  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_regI2L" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register op1 = as_Register($src1$$reg);
+    Register op2 = as_Register($src2$$reg);
+
+    if (UseLEXT1) {
+      __ gsdmult(dst, op1, op2);
+    } else {
+      __ dmult(op1, op2);
+      __ mflo(dst);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (DivL src1 src2));
+  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
+
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register op1 = as_Register($src1$$reg);
+    Register op2 = as_Register($src2$$reg);
+
+    if (UseLEXT1) {
+      __ gsddiv(dst, op1, op2);
+    } else {
+      __ ddiv(op1, op2);
+      __ mflo(dst);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (AddF src1 src2));
+  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    FloatRegister dst  = as_FloatRegister($dst$$reg);
+
+    __ add_s(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (SubF src1 src2));
+  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    FloatRegister dst  = as_FloatRegister($dst$$reg);
+
+    __ sub_s(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (AddD src1 src2));
+  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    FloatRegister dst  = as_FloatRegister($dst$$reg);
+
+    __ add_d(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (SubD src1 src2));
+  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = as_FloatRegister($src1$$reg);
+    FloatRegister src2 = as_FloatRegister($src2$$reg);
+    FloatRegister dst  = as_FloatRegister($dst$$reg);
+
+    __ sub_d(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct negF_reg(regF dst, regF src) %{
+  match(Set dst (NegF src));
+  format %{ "negF  $dst, $src @negF_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ neg_s(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct negD_reg(regD dst, regD src) %{
+  match(Set dst (NegD src));
+  format %{ "negD  $dst, $src @negD_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ neg_d(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+
+instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
+  match(Set dst (MulF src1 src2));
+  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = $src1$$FloatRegister;
+    FloatRegister src2 = $src2$$FloatRegister;
+    FloatRegister dst  = $dst$$FloatRegister;
+
+    __ mul_s(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
+  match(Set dst (AddF (MulF src1 src2) src3));
+  // For compatibility reason (e.g. on the Loongson platform), disable this guy.
+  ins_cost(44444);
+  format %{ "maddF  $dst, $src1, $src2, $src3 @maddF_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = $src1$$FloatRegister;
+    FloatRegister src2 = $src2$$FloatRegister;
+    FloatRegister src3 = $src3$$FloatRegister;
+    FloatRegister dst  = $dst$$FloatRegister;
+
+    __ madd_s(dst, src1, src2, src3);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+// Mul two double precision floating piont number
+instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
+  match(Set dst (MulD src1 src2));
+  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = $src1$$FloatRegister;
+    FloatRegister src2 = $src2$$FloatRegister;
+    FloatRegister dst  = $dst$$FloatRegister;
+
+    __ mul_d(dst, src1, src2);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
+  match(Set dst (AddD (MulD src1 src2) src3));
+  // For compatibility reason (e.g. on the Loongson platform), disable this guy.
+  ins_cost(44444);
+  format %{ "maddD  $dst, $src1, $src2, $src3 @maddD_reg_reg" %}
+  ins_encode %{
+    FloatRegister src1 = $src1$$FloatRegister;
+    FloatRegister src2 = $src2$$FloatRegister;
+    FloatRegister src3 = $src3$$FloatRegister;
+    FloatRegister dst  = $dst$$FloatRegister;
+
+    __ madd_d(dst, src1, src2, src3);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct absF_reg(regF dst, regF src) %{
+  match(Set dst (AbsF src));
+  ins_cost(100);
+  format %{ "absF  $dst, $src @absF_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ abs_s(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+
+// intrinsics for math_native.
+// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
+
+instruct absD_reg(regD dst, regD src) %{
+  match(Set dst (AbsD src));
+  ins_cost(100);
+  format %{ "absD  $dst, $src @absD_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ abs_d(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct sqrtD_reg(regD dst, regD src) %{
+  match(Set dst (SqrtD src));
+  ins_cost(100);
+  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ sqrt_d(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct sqrtF_reg(regF dst, regF src) %{
+  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
+  ins_cost(100);
+  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
+  ins_encode %{
+    FloatRegister src = as_FloatRegister($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ sqrt_s(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+//----------------------------------Logical Instructions----------------------
+//__________________________________Integer Logical Instructions-------------
+
+//And Instuctions
+// And Register with Immediate
+instruct andI_Reg_immI(mRegI dst, mRegI src1,  immI src2) %{
+  match(Set dst (AndI src1 src2));
+
+  format %{ "and  $dst, $src1, $src2 #@andI_Reg_immI" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int      val = $src2$$constant;
+
+    __ move(AT, val);
+    __ andr(dst, src, AT);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
+  match(Set dst (AndI src1 src2));
+  ins_cost(60);
+
+  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int      val = $src2$$constant;
+
+    __ andi(dst, src, val);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
+  match(Set dst (AndI src1 mask));
+  ins_cost(60);
+
+  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int     size = Assembler::is_int_mask($mask$$constant);
+
+    __ ext(dst, src, 0, size);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
+  match(Set dst (AndL src1 mask));
+  ins_cost(60);
+
+  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int     size = Assembler::is_jlong_mask($mask$$constant);
+
+    __ dext(dst, src, 0, size);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
+  match(Set dst (XorI src1 src2));
+  ins_cost(60);
+
+  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int      val = $src2$$constant;
+
+       __ xori(dst, src, val);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1,  immI_M1 M1) %{
+  match(Set dst (XorI src1 M1));
+  predicate(UseLEXT3);
+  ins_cost(60);
+
+  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+
+    __ gsorn(dst, R0, src);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1,  immI_M1 M1) %{
+  match(Set dst (XorI (ConvL2I src1) M1));
+  predicate(UseLEXT3);
+  ins_cost(60);
+
+  format %{ "xor  $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+
+    __ gsorn(dst, R0, src);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
+  match(Set dst (XorL src1 src2));
+  ins_cost(60);
+
+  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    int      val = $src2$$constant;
+
+       __ xori(dst, src, val);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+/*
+instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1,  immL_M1 M1) %{
+  match(Set dst (XorL src1 M1));
+  predicate(UseLEXT3);
+  ins_cost(60);
+
+  format %{ "xor  $dst, $src1, $M1 #@xorL_Reg_immL_M1" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+
+    __ gsorn(dst, R0, src);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+*/
+
+instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
+  match(Set dst (AndI mask (LoadB mem)));
+  ins_cost(60);
+
+  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
+  ins_encode(load_UB_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
+  match(Set dst (AndI (LoadB mem) mask));
+  ins_cost(60);
+
+  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
+  ins_encode(load_UB_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
+  match(Set dst (AndI src1 src2));
+
+  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ andr(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
+  match(Set dst (AndI src1 (XorI src2 M1)));
+  predicate(UseLEXT3);
+
+  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ gsandn(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
+  match(Set dst (OrI src1 (XorI src2 M1)));
+  predicate(UseLEXT3);
+
+  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ gsorn(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
+  match(Set dst (AndI (XorI src1 M1) src2));
+  predicate(UseLEXT3);
+
+  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ gsandn(dst, src2, src1);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
+  match(Set dst (OrI (XorI src1 M1) src2));
+  predicate(UseLEXT3);
+
+  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ gsorn(dst, src2, src1);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+// And Long Register with Register
+instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (AndL src1 src2));
+  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+
+    __ andr(dst_reg, src1_reg, src2_reg);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{
+  match(Set dst (AndL src1 (ConvI2L src2)));
+  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %}
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+
+    __ andr(dst_reg, src1_reg, src2_reg);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
+  match(Set dst (AndL src1 src2));
+  ins_cost(60);
+
+  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    long     val = $src2$$constant;
+
+       __ andi(dst, src, val);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1,  immL_0_65535 src2) %{
+  match(Set dst (ConvL2I (AndL src1 src2)));
+  ins_cost(60);
+
+  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src1$$Register;
+    long     val = $src2$$constant;
+
+       __ andi(dst, src, val);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+/*
+instruct andnL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
+  match(Set dst (AndL src1 (XorL src2 M1)));
+  predicate(UseLEXT3);
+
+  format %{ "andn   $dst, $src1, $src2 #@andnL_Reg_nReg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ gsandn(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+*/
+
+/*
+instruct ornL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
+  match(Set dst (OrL src1 (XorL src2 M1)));
+  predicate(UseLEXT3);
+
+  format %{ "orn    $dst, $src1, $src2 #@ornL_Reg_nReg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ gsorn(dst, src1, src2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+*/
+
+/*
+instruct andnL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
+  match(Set dst (AndL (XorL src1 M1) src2));
+  predicate(UseLEXT3);
+
+  format %{ "andn   $dst, $src2, $src1 #@andnL_nReg_Reg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ gsandn(dst, src2, src1);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+*/
+
+/*
+instruct ornL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
+  match(Set dst (OrL (XorL src1 M1) src2));
+  predicate(UseLEXT3);
+
+  format %{ "orn    $dst, $src2, $src1 #@ornL_nReg_Reg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    __ gsorn(dst, src2, src1);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+*/
+
+instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
+  match(Set dst (AndL dst M8));
+  ins_cost(60);
+
+  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+
+    __ dins(dst, R0, 0, 3);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
+  match(Set dst (AndL dst M5));
+  ins_cost(60);
+
+  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+
+    __ dins(dst, R0, 2, 1);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
+  match(Set dst (AndL dst M7));
+  ins_cost(60);
+
+  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+
+    __ dins(dst, R0, 1, 2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
+  match(Set dst (AndL dst M4));
+  ins_cost(60);
+
+  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+
+    __ dins(dst, R0, 0, 2);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
+  match(Set dst (AndL dst M121));
+  ins_cost(60);
+
+  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+
+    __ dins(dst, R0, 3, 4);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Or Long Register with Register
+instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (OrL src1 src2));
+  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
+  ins_encode %{
+    Register dst_reg  = $dst$$Register;
+    Register src1_reg = $src1$$Register;
+    Register src2_reg = $src2$$Register;
+
+    __ orr(dst_reg, src1_reg, src2_reg);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{
+  match(Set dst (OrL (CastP2X src1) src2));
+  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
+  ins_encode %{
+    Register dst_reg  = $dst$$Register;
+    Register src1_reg = $src1$$Register;
+    Register src2_reg = $src2$$Register;
+
+    __ orr(dst_reg, src1_reg, src2_reg);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Xor Long Register with Register
+instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
+  match(Set dst (XorL src1 src2));
+  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
+  ins_encode %{
+    Register dst_reg = as_Register($dst$$reg);
+    Register src1_reg = as_Register($src1$$reg);
+    Register src2_reg = as_Register($src2$$reg);
+
+    __ xorr(dst_reg, src1_reg, src2_reg);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Shift Left by 8-bit immediate
+instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
+  match(Set dst (LShiftI src shift));
+
+  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int    shamt = $shift$$constant;
+
+    __ sll(dst, src, shamt);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{
+  match(Set dst (LShiftI (ConvL2I src) shift));
+
+  format %{ "SHL    $dst, $src, $shift #@salL2I_Reg_imm" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int    shamt = $shift$$constant;
+
+    __ sll(dst, src, shamt);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
+  match(Set dst (AndI (LShiftI src shift) mask));
+
+  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+    __ sll(dst, src, 16);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
+%{
+  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
+
+  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+    __ andi(dst, src, 7);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
+// This idiom is used by the compiler the i2s bytecode.
+instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
+%{
+  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
+
+  format %{ "i2s  $dst, $src\t# @i2s" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+    __ seh(dst, src);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
+// This idiom is used by the compiler for the i2b bytecode.
+instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
+%{
+  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
+
+  format %{ "i2b  $dst, $src\t# @i2b" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+    __ seb(dst, src);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+
+instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{
+  match(Set dst (LShiftI (ConvL2I src) shift));
+
+  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int    shamt = $shift$$constant;
+
+    __ sll(dst, src, shamt);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Shift Left by 8-bit immediate
+instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
+  match(Set dst (LShiftI src shift));
+
+  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    Register shamt = $shift$$Register;
+    __ sllv(dst, src, shamt);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+
+// Shift Left Long
+instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
+  match(Set dst (LShiftL src shift));
+  ins_cost(100);
+  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int      shamt = $shift$$constant;
+
+    if (__ is_simm(shamt, 5))
+        __ dsll(dst_reg, src_reg, shamt);
+    else {
+      int sa = Assembler::low(shamt, 6);
+      if (sa < 32) {
+        __ dsll(dst_reg, src_reg, sa);
+      } else {
+        __ dsll32(dst_reg, src_reg, sa - 32);
+      }
+    }
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{
+  match(Set dst (LShiftL (ConvI2L src) shift));
+  ins_cost(100);
+  format %{ "salL    $dst, $src, $shift @ salL_RegI2L_imm" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int      shamt = $shift$$constant;
+
+    if (__ is_simm(shamt, 5))
+        __ dsll(dst_reg, src_reg, shamt);
+    else {
+      int sa = Assembler::low(shamt, 6);
+      if (sa < 32) {
+        __ dsll(dst_reg, src_reg, sa);
+      } else {
+        __ dsll32(dst_reg, src_reg, sa - 32);
+      }
+    }
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Shift Left Long
+instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
+  match(Set dst (LShiftL src shift));
+  ins_cost(100);
+  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+
+    __ dsllv(dst_reg, src_reg, $shift$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Shift Right Long
+instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
+  match(Set dst (RShiftL src shift));
+  ins_cost(100);
+  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int      shamt = ($shift$$constant & 0x3f);
+    if (__  is_simm(shamt, 5))
+      __ dsra(dst_reg, src_reg, shamt);
+    else {
+      int sa = Assembler::low(shamt, 6);
+      if (sa < 32) {
+        __ dsra(dst_reg, src_reg, sa);
+      } else {
+        __ dsra32(dst_reg, src_reg, sa - 32);
+      }
+    }
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{
+  match(Set dst (ConvL2I (RShiftL src shift)));
+  ins_cost(100);
+  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int      shamt   = $shift$$constant;
+
+    __ dsra32(dst_reg, src_reg, shamt - 32);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Shift Right Long arithmetically
+instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
+  match(Set dst (RShiftL src shift));
+  ins_cost(100);
+  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+
+    __ dsrav(dst_reg, src_reg, $shift$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Shift Right Long logically
+instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
+  match(Set dst (URShiftL src shift));
+  ins_cost(100);
+  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+
+    __ dsrlv(dst_reg, src_reg, $shift$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{
+  match(Set dst (URShiftL src shift));
+  ins_cost(80);
+  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ dsrl(dst_reg, src_reg, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{
+  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
+  ins_cost(80);
+  format %{ "dext    $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ dext(dst_reg, src_reg, shamt, 31);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
+  match(Set dst (URShiftL (CastP2X src) shift));
+  ins_cost(80);
+  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ dsrl(dst_reg, src_reg, shamt);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{
+  match(Set dst (URShiftL src shift));
+  ins_cost(80);
+  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ dsrl32(dst_reg, src_reg, shamt - 32);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{
+  match(Set dst (ConvL2I (URShiftL src shift)));
+  predicate(n->in(1)->in(2)->get_int() > 32);
+  ins_cost(80);
+  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ dsrl32(dst_reg, src_reg, shamt - 32);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
+  match(Set dst (URShiftL (CastP2X src) shift));
+  ins_cost(80);
+  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
+  ins_encode %{
+    Register src_reg = as_Register($src$$reg);
+    Register dst_reg = as_Register($dst$$reg);
+    int        shamt = $shift$$constant;
+
+    __ dsrl32(dst_reg, src_reg, shamt - 32);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// Xor Instructions
+// Xor Register with Register
+instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (XorI src1 src2));
+
+  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
+
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ xorr(dst, src1, src2);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Or Instructions
+instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{
+  match(Set dst (OrI src1 src2));
+
+  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
+  ins_encode %{
+    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+// Or Register with Register
+instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
+  match(Set dst (OrI src1 src2));
+
+  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ orr(dst, src1, src2);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
+  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
+  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
+
+  format %{ "rotr     $dst, $src, 1 ...\n\t"
+            "srl      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
+  ins_encode %{
+    Register   dst = $dst$$Register;
+    Register   src = $src$$Register;
+    int     rshift = $rshift$$constant;
+
+    __ rotr(dst, src, 1);
+    if (rshift - 1) {
+      __ srl(dst, dst, rshift - 1);
+    }
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
+  match(Set dst (OrI src1 (CastP2X src2)));
+
+  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
+  ins_encode %{
+    Register  dst = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+    __ orr(dst, src1, src2);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Logical Shift Right by 8-bit immediate
+instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
+  match(Set dst (URShiftI src shift));
+  //effect(KILL cr);
+
+  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_imm" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int    shift = $shift$$constant;
+
+    __ srl(dst, src, shift);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
+  match(Set dst (AndI (URShiftI src shift) mask));
+
+  format %{ "ext    $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int      pos = $shift$$constant;
+    int     size = Assembler::is_int_mask($mask$$constant);
+
+    __ ext(dst, src, pos, size);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
+  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
+
+  ins_cost(100);
+  format %{ "rotr    $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ rotr(dst, dst, sa);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
+
+  ins_cost(100);
+  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ drotr(dst, src, sa);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
+
+  ins_cost(100);
+  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ drotr32(dst, src, sa - 32);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
+  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
+
+  ins_cost(100);
+  format %{ "rotr    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ rotr(dst, src, sa);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
+
+  ins_cost(100);
+  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ drotr(dst, src, sa);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift)
+%{
+  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
+  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
+
+  ins_cost(100);
+  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+    int      sa  = $rshift$$constant;
+
+    __ drotr32(dst, src, sa - 32);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+// Logical Shift Right
+instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
+  match(Set dst (URShiftI src shift));
+
+  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    Register shift = $shift$$Register;
+    __ srlv(dst, src, shift);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+
+instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
+  match(Set dst (RShiftI src shift));
+ // effect(KILL cr);
+
+  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_imm" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    int    shift = $shift$$constant;
+    __ sra(dst, src, shift);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
+  match(Set dst (RShiftI src shift));
+ // effect(KILL cr);
+
+  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+    Register shift = $shift$$Register;
+    __ srav(dst, src, shift);
+  %}
+  ins_pipe( ialu_regI_regI );
+%}
+
+//----------Convert Int to Boolean---------------------------------------------
+
+instruct convI2B(mRegI dst, mRegI src) %{
+  match(Set dst (Conv2B src));
+
+  ins_cost(100);
+  format %{ "convI2B    $dst, $src @ convI2B"  %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    if (dst != src) {
+      __ daddiu(dst, R0, 1);
+      __ movz(dst, R0, src);
+    } else {
+      __ move(AT, src);
+      __ daddiu(dst, R0, 1);
+      __ movz(dst, R0, AT);
+    }
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct convI2L_reg( mRegL dst, mRegI src) %{
+  match(Set dst (ConvI2L src));
+
+  ins_cost(100);
+  format %{ "SLL    $dst, $src @ convI2L_reg\t"  %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    if(dst != src) __ sll(dst, src, 0);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+
+instruct convL2I_reg( mRegI dst, mRegL src ) %{
+  match(Set dst (ConvL2I src));
+
+  format %{ "MOV    $dst, $src @ convL2I_reg" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    __ sll(dst, src, 0);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct convL2I2L_reg( mRegL dst, mRegL src ) %{
+  match(Set dst (ConvI2L (ConvL2I src)));
+
+  format %{ "sll    $dst, $src, 0 @ convL2I2L_reg" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    __ sll(dst, src, 0);
+  %}
+
+  ins_pipe( ialu_regI_regI );
+%}
+
+instruct convL2D_reg( regD dst, mRegL src ) %{
+  match(Set dst (ConvL2D src));
+  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
+  ins_encode %{
+    Register src = as_Register($src$$reg);
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ dmtc1(src, dst);
+    __ cvt_d_l(dst, dst);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct convD2L_reg_fast( mRegL dst, regD src ) %{
+  match(Set dst (ConvD2L src));
+  ins_cost(150);
+  format %{ "convD2L    $dst, $src @ convD2L_reg_fast" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+
+    Label Done;
+
+    __ trunc_l_d(F30, src);
+    // max_long:    0x7fffffffffffffff
+    // __ set64(AT, 0x7fffffffffffffff);
+    __ daddiu(AT, R0, -1);
+    __ dsrl(AT, AT, 1);
+    __ dmfc1(dst, F30);
+
+    __ bne(dst, AT, Done);
+    __ delayed()->mtc1(R0, F30);
+
+    __ cvt_d_w(F30, F30);
+    __ c_ult_d(src, F30);
+    __ bc1f(Done);
+    __ delayed()->daddiu(T9, R0, -1);
+
+    __ c_un_d(src, src);    //NaN?
+    __ subu(dst, T9, AT);
+    __ movt(dst, R0);
+
+    __ bind(Done);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct convD2L_reg_slow( mRegL dst, regD src ) %{
+  match(Set dst (ConvD2L src));
+  ins_cost(250);
+  format %{ "convD2L    $dst, $src @ convD2L_reg_slow" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    FloatRegister src = as_FloatRegister($src$$reg);
+
+    Label L;
+
+    __ c_un_d(src, src);    //NaN?
+    __ bc1t(L);
+    __ delayed();
+    __ move(dst, R0);
+
+    __ trunc_l_d(F30, src);
+    __ cfc1(AT, 31);
+    __ li(T9, 0x10000);
+    __ andr(AT, AT, T9);
+    __ beq(AT, R0, L);
+    __ delayed()->dmfc1(dst, F30);
+
+    __ mov_d(F12, src);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
+    __ move(dst, V0);
+    __ bind(L);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct convF2I_reg_fast( mRegI dst, regF src ) %{
+  match(Set dst (ConvF2I src));
+  ins_cost(150);
+  format %{ "convf2i    $dst, $src @ convF2I_reg_fast" %}
+  ins_encode %{
+    Register      dreg = $dst$$Register;
+    FloatRegister fval = $src$$FloatRegister;
+    Label L;
+
+    __ trunc_w_s(F30, fval);
+    __ move(AT, 0x7fffffff);
+    __ mfc1(dreg, F30);
+    __ c_un_s(fval, fval);    //NaN?
+    __ movt(dreg, R0);
+
+    __ bne(AT, dreg, L);
+    __ delayed()->lui(T9, 0x8000);
+
+    __ mfc1(AT, fval);
+    __ andr(AT, AT, T9);
+
+    __ movn(dreg, T9, AT);
+
+    __ bind(L);
+
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+
+instruct convF2I_reg_slow( mRegI dst, regF src ) %{
+  match(Set dst (ConvF2I src));
+  ins_cost(250);
+  format %{ "convf2i    $dst, $src @ convF2I_reg_slow" %}
+  ins_encode %{
+    Register      dreg = $dst$$Register;
+    FloatRegister fval = $src$$FloatRegister;
+    Label L;
+
+    __ c_un_s(fval, fval);    //NaN?
+    __ bc1t(L);
+    __ delayed();
+    __ move(dreg, R0);
+
+    __ trunc_w_s(F30, fval);
+
+    /* Call SharedRuntime:f2i() to do valid convention */
+    __ cfc1(AT, 31);
+    __ li(T9, 0x10000);
+    __ andr(AT, AT, T9);
+    __ beq(AT, R0, L);
+    __ delayed()->mfc1(dreg, F30);
+
+    __ mov_s(F12, fval);
+
+    //This bug was found when running ezDS's control-panel.
+    //    J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74
+    //
+    // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE.
+    // V0 is corrupted during call_VM_leaf(), and should be preserved.
+    //
+    __ push(fval);
+    if(dreg != V0) {
+      __ push(V0);
+    }
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
+    if(dreg != V0) {
+      __ move(dreg, V0);
+      __ pop(V0);
+    }
+    __ pop(fval);
+    __ bind(L);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct convF2L_reg_fast( mRegL dst, regF src ) %{
+  match(Set dst (ConvF2L src));
+  ins_cost(150);
+  format %{ "convf2l    $dst, $src @ convF2L_reg_fast" %}
+  ins_encode %{
+    Register      dreg = $dst$$Register;
+    FloatRegister fval = $src$$FloatRegister;
+    Label L;
+
+    __ trunc_l_s(F30, fval);
+    __ daddiu(AT, R0, -1);
+    __ dsrl(AT, AT, 1);
+    __ dmfc1(dreg, F30);
+    __ c_un_s(fval, fval);    //NaN?
+    __ movt(dreg, R0);
+
+    __ bne(AT, dreg, L);
+    __ delayed()->lui(T9, 0x8000);
+
+    __ mfc1(AT, fval);
+    __ andr(AT, AT, T9);
+
+    __ dsll32(T9, T9, 0);
+    __ movn(dreg, T9, AT);
+
+    __ bind(L);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct convF2L_reg_slow( mRegL dst, regF src ) %{
+  match(Set dst (ConvF2L src));
+  ins_cost(250);
+  format %{ "convf2l    $dst, $src @ convF2L_reg_slow" %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    FloatRegister fval = $src$$FloatRegister;
+    Label L;
+
+    __ c_un_s(fval, fval);    //NaN?
+    __ bc1t(L);
+    __ delayed();
+    __ move(dst, R0);
+
+    __ trunc_l_s(F30, fval);
+    __ cfc1(AT, 31);
+    __ li(T9, 0x10000);
+    __ andr(AT, AT, T9);
+    __ beq(AT, R0, L);
+    __ delayed()->dmfc1(dst, F30);
+
+    __ mov_s(F12, fval);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
+    __ move(dst, V0);
+    __ bind(L);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct convL2F_reg( regF dst, mRegL src ) %{
+  match(Set dst (ConvL2F src));
+  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
+  ins_encode %{
+    FloatRegister dst = $dst$$FloatRegister;
+    Register src = as_Register($src$$reg);
+    Label L;
+
+    __ dmtc1(src, dst);
+    __ cvt_s_l(dst, dst);
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct convI2F_reg( regF dst, mRegI src ) %{
+  match(Set dst (ConvI2F src));
+  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
+  ins_encode %{
+    Register      src = $src$$Register;
+    FloatRegister dst = $dst$$FloatRegister;
+
+    __ mtc1(src, dst);
+    __ cvt_s_w(dst, dst);
+  %}
+
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
+  match(Set dst (CmpLTMask p zero));
+  ins_cost(100);
+
+  format %{ "sra    $dst, $p, 31 @ cmpLTMask_immI_0" %}
+    ins_encode %{
+       Register src = $p$$Register;
+       Register dst = $dst$$Register;
+
+       __ sra(dst, src, 31);
+    %}
+    ins_pipe( pipe_slow );
+%}
+
+
+instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
+  match(Set dst (CmpLTMask p q));
+  ins_cost(400);
+
+  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
+  ins_encode %{
+    Register p   = $p$$Register;
+    Register q   = $q$$Register;
+    Register dst = $dst$$Register;
+
+    __ slt(dst, p, q);
+    __ subu(dst, R0, dst);
+    %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct convP2B(mRegI dst, mRegP src) %{
+  match(Set dst (Conv2B src));
+
+  ins_cost(100);
+  format %{ "convP2B    $dst, $src @ convP2B"  %}
+  ins_encode %{
+    Register dst = as_Register($dst$$reg);
+    Register src = as_Register($src$$reg);
+
+    if (dst != src) {
+      __ daddiu(dst, R0, 1);
+      __ movz(dst, R0, src);
+    } else {
+      __ move(AT, src);
+      __ daddiu(dst, R0, 1);
+      __ movz(dst, R0, AT);
+    }
+  %}
+
+  ins_pipe( ialu_regL_regL );
+%}
+
+
+instruct convI2D_reg_reg(regD dst, mRegI src) %{
+  match(Set dst (ConvI2D src));
+  format %{ "conI2D $dst, $src @convI2D_reg" %}
+  ins_encode %{
+    Register      src = $src$$Register;
+    FloatRegister dst = $dst$$FloatRegister;
+    __ mtc1(src, dst);
+    __ cvt_d_w(dst, dst);
+    %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct convF2D_reg_reg(regD dst, regF src) %{
+  match(Set dst (ConvF2D src));
+  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
+  ins_encode %{
+    FloatRegister dst = $dst$$FloatRegister;
+    FloatRegister src = $src$$FloatRegister;
+
+    __ cvt_d_s(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct convD2F_reg_reg(regF dst, regD src) %{
+  match(Set dst (ConvD2F src));
+  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
+  ins_encode %{
+    FloatRegister dst = $dst$$FloatRegister;
+    FloatRegister src = $src$$FloatRegister;
+
+    __ cvt_s_d(dst, src);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+
+// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
+instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{
+  match(Set dst (ConvD2I src));
+
+  ins_cost(150);
+  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %}
+
+  ins_encode %{
+    FloatRegister src = $src$$FloatRegister;
+    Register      dst = $dst$$Register;
+
+    Label Done;
+
+    __ trunc_w_d(F30, src);
+    // max_int: 2147483647
+    __ move(AT, 0x7fffffff);
+    __ mfc1(dst, F30);
+
+    __ bne(dst, AT, Done);
+    __ delayed()->mtc1(R0, F30);
+
+    __ cvt_d_w(F30, F30);
+    __ c_ult_d(src, F30);
+    __ bc1f(Done);
+    __ delayed()->addiu(T9, R0, -1);
+
+    __ c_un_d(src, src);    //NaN?
+    __ subu32(dst, T9, AT);
+    __ movt(dst, R0);
+
+    __ bind(Done);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+
+instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{
+  match(Set dst (ConvD2I src));
+
+  ins_cost(250);
+  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %}
+
+  ins_encode %{
+    FloatRegister src = $src$$FloatRegister;
+    Register      dst = $dst$$Register;
+    Label L;
+
+    __ trunc_w_d(F30, src);
+    __ cfc1(AT, 31);
+    __ li(T9, 0x10000);
+    __ andr(AT, AT, T9);
+    __ beq(AT, R0, L);
+    __ delayed()->mfc1(dst, F30);
+
+    __ mov_d(F12, src);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
+    __ move(dst, V0);
+    __ bind(L);
+
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Convert oop pointer into compressed form
+instruct encodeHeapOop(mRegN dst, mRegP src) %{
+  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
+  match(Set dst (EncodeP src));
+  format %{ "encode_heap_oop $dst,$src" %}
+  ins_encode %{
+    Register src = $src$$Register;
+    Register dst = $dst$$Register;
+
+    __ encode_heap_oop(dst, src);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
+  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
+  match(Set dst (EncodeP src));
+  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
+  ins_encode %{
+    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct decodeHeapOop(mRegP dst, mRegN src) %{
+  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
+            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
+  match(Set dst (DecodeN src));
+  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+
+    __ decode_heap_oop(d, s);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
+  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
+            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
+  match(Set dst (DecodeN src));
+  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    if (s != d) {
+      __ decode_heap_oop_not_null(d, s);
+    } else {
+      __ decode_heap_oop_not_null(d);
+    }
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
+  match(Set dst (EncodePKlass src));
+  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
+  ins_encode %{
+    __ encode_klass_not_null($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
+  match(Set dst (DecodeNKlass src));
+  format %{ "decode_heap_klass_not_null $dst,$src" %}
+  ins_encode %{
+    Register s = $src$$Register;
+    Register d = $dst$$Register;
+    if (s != d) {
+      __ decode_klass_not_null(d, s);
+    } else {
+      __ decode_klass_not_null(d);
+    }
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+//FIXME
+instruct tlsLoadP(mRegP dst) %{
+  match(Set dst (ThreadLocal));
+
+  ins_cost(0);
+  format %{ " get_thread in $dst #@tlsLoadP" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+#ifdef OPT_THREAD
+    __ move(dst, TREG);
+#else
+    __ get_thread(dst);
+#endif
+  %}
+
+  ins_pipe( ialu_loadI );
+%}
+
+
+instruct checkCastPP( mRegP dst ) %{
+  match(Set dst (CheckCastPP dst));
+
+  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
+  ins_encode( /*empty encoding*/ );
+  ins_pipe( empty );
+%}
+
+instruct castPP(mRegP dst)
+%{
+  match(Set dst (CastPP dst));
+
+  size(0);
+  format %{ "# castPP of $dst" %}
+  ins_encode(/* empty encoding */);
+  ins_pipe(empty);
+%}
+
+instruct castII( mRegI dst ) %{
+  match(Set dst (CastII dst));
+  format %{ "#castII of $dst  empty encoding" %}
+  ins_encode( /*empty encoding*/ );
+  ins_cost(0);
+  ins_pipe( empty );
+%}
+
+// Return Instruction
+// Remove the return address & jump to it.
+instruct Ret() %{
+  match(Return);
+  format %{ "RET #@Ret" %}
+
+  ins_encode %{
+   __ jr(RA);
+   __ delayed()->nop();
+  %}
+
+  ins_pipe( pipe_jump );
+%}
+
+/*
+// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported.
+instruct jumpXtnd(mRegL switch_val) %{
+  match(Jump switch_val);
+
+  ins_cost(350);
+
+  format %{  "load   T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t"
+             "jr     T9\n\t"
+             "nop" %}
+  ins_encode %{
+    Register table_base = $constanttablebase;
+    int      con_offset = $constantoffset;
+    Register switch_reg = $switch_val$$Register;
+
+    if (UseLEXT1) {
+       if (Assembler::is_simm(con_offset, 8)) {
+         __ gsldx(T9, table_base, switch_reg, con_offset);
+       } else if (Assembler::is_simm16(con_offset)) {
+         __ daddu(T9, table_base, switch_reg);
+         __ ld(T9, T9, con_offset);
+       } else {
+         __ move(T9, con_offset);
+         __ daddu(AT, table_base, switch_reg);
+         __ gsldx(T9, AT, T9, 0);
+       }
+    } else {
+       if (Assembler::is_simm16(con_offset)) {
+         __ daddu(T9, table_base, switch_reg);
+         __ ld(T9, T9, con_offset);
+       } else {
+         __ move(T9, con_offset);
+         __ daddu(AT, table_base, switch_reg);
+         __ daddu(AT, T9, AT);
+         __ ld(T9, AT, 0);
+       }
+    }
+
+    __ jr(T9);
+    __ delayed()->nop();
+
+  %}
+  ins_pipe(pipe_jump);
+%}
+*/
+
+
+// Tail Jump; remove the return address; jump to target.
+// TailCall above leaves the return address around.
+// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
+// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
+// "restore" before this instruction (in Epilogue), we need to materialize it
+// in %i0.
+//FIXME
+instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{
+  match( TailJump jump_target ex_oop );
+  ins_cost(200);
+  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
+  ins_encode %{
+    Register target = $jump_target$$Register;
+
+    // V0, V1 are indicated in:
+    //     [stubGenerator_mips.cpp] generate_forward_exception()
+    //     [runtime_mips.cpp] OptoRuntime::generate_exception_blob()
+    //
+    Register oop  = $ex_oop$$Register;
+    Register exception_oop = V0;
+    Register exception_pc = V1;
+
+    __ move(exception_pc, RA);
+    __ move(exception_oop, oop);
+
+    __ jr(target);
+    __ delayed()->nop();
+  %}
+  ins_pipe( pipe_jump );
+%}
+
+// ============================================================================
+// Procedure Call/Return Instructions
+// Call Java Static Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
+instruct CallStaticJavaDirect(method meth) %{
+  match(CallStaticJava);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL,static #@CallStaticJavaDirect " %}
+  ins_encode( Java_Static_Call( meth ) );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(16);
+%}
+
+// Call Java Dynamic Instruction
+// Note: If this code changes, the corresponding ret_addr_offset() and
+//       compute_padding() functions will have to be adjusted.
+instruct CallDynamicJavaDirect(method meth) %{
+  match(CallDynamicJava);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
+           "CallDynamic @ CallDynamicJavaDirect" %}
+  ins_encode( Java_Dynamic_Call( meth ) );
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(16);
+%}
+
+instruct CallLeafNoFPDirect(method meth) %{
+  match(CallLeafNoFP);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL_LEAF_NOFP,runtime " %}
+  ins_encode(Java_To_Runtime(meth));
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(16);
+%}
+
+// Prefetch instructions.
+
+instruct prefetchrNTA( memory mem ) %{
+  match(PrefetchRead mem);
+  ins_cost(125);
+
+  format %{ "pref $mem\t# Prefetch into non-temporal cache for read @ prefetchrNTA" %}
+  ins_encode %{
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ daddu(AT, as_Register(base), as_Register(index));
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        __ daddu(AT, as_Register(base), AT);
+      }
+    } else {
+      __ move(AT, as_Register(base));
+    }
+    if( Assembler::is_simm16(disp) ) {
+      __ daddiu(AT, AT, disp);
+    } else {
+      __ move(T9, disp);
+      __ daddu(AT, AT, T9);
+    }
+    __ pref(0, AT, 0); //hint: 0:load
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+instruct prefetchwNTA( memory mem ) %{
+  match(PrefetchWrite mem);
+  ins_cost(125);
+  format %{ "pref $mem\t# Prefetch to non-temporal cache for write @ prefetchwNTA" %}
+  ins_encode %{
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (scale == 0) {
+        __ daddu(AT, as_Register(base), as_Register(index));
+      } else {
+        __ dsll(AT, as_Register(index), scale);
+        __ daddu(AT, as_Register(base), AT);
+      }
+    } else {
+      __ move(AT, as_Register(base));
+    }
+    if( Assembler::is_simm16(disp) ) {
+      __ daddiu(AT, AT, disp);
+    } else {
+      __ move(T9, disp);
+      __ daddu(AT, AT, T9);
+    }
+     __ pref(1, AT, 0); //hint: 1:store
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+// Prefetch instructions for allocation.
+
+instruct prefetchAllocNTA( memory mem ) %{
+  match(PrefetchAllocation mem);
+  ins_cost(125);
+  format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %}
+  ins_encode %{
+    int  base = $mem$$base;
+    int  index = $mem$$index;
+    int  scale = $mem$$scale;
+    int  disp = $mem$$disp;
+
+    Register dst = R0;
+
+    if ( index != 0 ) {
+      if ( Assembler::is_simm16(disp) ) {
+        if (UseLEXT1) {
+          if (scale == 0) {
+            __ gslbx(dst, as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ gslbx(dst, as_Register(base), AT, disp);
+          }
+        } else {
+          if (scale == 0) {
+            __ addu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(AT, as_Register(index), scale);
+            __ addu(AT, as_Register(base), AT);
+          }
+          __ lb(dst, AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ addu(AT, as_Register(base), as_Register(index));
+        } else {
+          __ dsll(AT, as_Register(index), scale);
+          __ addu(AT, as_Register(base), AT);
+        }
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gslbx(dst, AT, T9, 0);
+        } else {
+          __ addu(AT, AT, T9);
+          __ lb(dst, AT, 0);
+        }
+      }
+    } else {
+      if ( Assembler::is_simm16(disp) ) {
+        __ lb(dst, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        if (UseLEXT1) {
+          __ gslbx(dst, as_Register(base), T9, 0);
+        } else {
+          __ addu(AT, as_Register(base), T9);
+          __ lb(dst, AT, 0);
+        }
+      }
+    }
+  %}
+  ins_pipe(pipe_slow);
+%}
+
+
+// Call runtime without safepoint
+instruct CallLeafDirect(method meth) %{
+  match(CallLeaf);
+  effect(USE meth);
+
+  ins_cost(300);
+  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
+  ins_encode(Java_To_Runtime(meth));
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+  ins_alignment(16);
+%}
+
+// Load Char (16bit unsigned)
+instruct loadUS(mRegI dst, memory mem) %{
+  match(Set dst (LoadUS mem));
+
+  ins_cost(125);
+  format %{ "loadUS  $dst,$mem @ loadC" %}
+  ins_encode(load_C_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+instruct loadUS_convI2L(mRegL dst, memory mem) %{
+  match(Set dst (ConvI2L (LoadUS mem)));
+
+  ins_cost(125);
+  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
+  ins_encode(load_C_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+// Store Char (16bit unsigned)
+instruct storeC(memory mem, mRegI src) %{
+  match(Set mem (StoreC mem src));
+
+  ins_cost(125);
+  format %{ "storeC  $src, $mem @ storeC" %}
+  ins_encode(store_C_reg_enc(mem, src));
+  ins_pipe( ialu_loadI );
+%}
+
+instruct storeC_0(memory mem, immI_0 zero) %{
+  match(Set mem (StoreC mem zero));
+
+  ins_cost(125);
+  format %{ "storeC  $zero, $mem @ storeC_0" %}
+  ins_encode(store_C0_enc(mem));
+  ins_pipe( ialu_loadI );
+%}
+
+
+instruct loadConF_immF_0(regF dst, immF_0 zero) %{
+  match(Set dst zero);
+  ins_cost(100);
+
+  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
+  ins_encode %{
+    FloatRegister dst = $dst$$FloatRegister;
+
+    __ mtc1(R0, dst);
+  %}
+  ins_pipe( fpu_loadF );
+%}
+
+
+instruct loadConF(regF dst, immF src) %{
+  match(Set dst src);
+  ins_cost(125);
+
+  format %{ "lwc1  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
+  ins_encode %{
+    int con_offset = $constantoffset($src);
+
+    if (Assembler::is_simm16(con_offset)) {
+      __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset);
+    } else {
+      __ set64(AT, con_offset);
+      if (UseLEXT1) {
+        __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
+      } else {
+        __ daddu(AT, $constanttablebase, AT);
+        __ lwc1($dst$$FloatRegister, AT, 0);
+      }
+    }
+  %}
+  ins_pipe( fpu_loadF );
+%}
+
+
+instruct loadConD_immD_0(regD dst, immD_0 zero) %{
+  match(Set dst zero);
+  ins_cost(100);
+
+  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
+  ins_encode %{
+    FloatRegister dst = as_FloatRegister($dst$$reg);
+
+    __ dmtc1(R0, dst);
+  %}
+  ins_pipe( fpu_loadF );
+%}
+
+instruct loadConD(regD dst, immD src) %{
+  match(Set dst src);
+  ins_cost(125);
+
+  format %{ "ldc1  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
+  ins_encode %{
+    int con_offset = $constantoffset($src);
+
+    if (Assembler::is_simm16(con_offset)) {
+      __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset);
+    } else {
+      __ set64(AT, con_offset);
+      if (UseLEXT1) {
+        __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
+      } else {
+        __ daddu(AT, $constanttablebase, AT);
+        __ ldc1($dst$$FloatRegister, AT, 0);
+      }
+    }
+  %}
+  ins_pipe( fpu_loadF );
+%}
+
+// Store register Float value (it is faster than store from FPU register)
+instruct storeF_reg( memory mem, regF src) %{
+  match(Set mem (StoreF mem src));
+
+  ins_cost(50);
+  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
+  ins_encode(store_F_reg_enc(mem, src));
+  ins_pipe( fpu_storeF );
+%}
+
+instruct storeF_immF_0( memory mem, immF_0 zero) %{
+  match(Set mem (StoreF mem zero));
+
+  ins_cost(40);
+  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
+  ins_encode %{
+    int      base = $mem$$base;
+    int     index = $mem$$index;
+    int     scale = $mem$$scale;
+    int      disp = $mem$$disp;
+
+    if( index != 0 ) {
+      if (UseLEXT1) {
+        if ( Assembler::is_simm(disp, 8) ) {
+          if ( scale == 0 ) {
+            __ gsswx(R0, as_Register(base), as_Register(index), disp);
+          } else {
+            __ dsll(T9, as_Register(index), scale);
+            __ gsswx(R0, as_Register(base), T9, disp);
+          }
+        } else if ( Assembler::is_simm16(disp) ) {
+          if ( scale == 0 ) {
+            __ daddu(AT, as_Register(base), as_Register(index));
+          } else {
+            __ dsll(T9, as_Register(index), scale);
+            __ daddu(AT, as_Register(base), T9);
+          }
+          __ sw(R0, AT, disp);
+        } else {
+          if ( scale == 0 ) {
+            __ move(T9, disp);
+            __ daddu(AT, as_Register(index), T9);
+            __ gsswx(R0, as_Register(base), AT, 0);
+          } else {
+            __ dsll(T9, as_Register(index), scale);
+            __ move(AT, disp);
+            __ daddu(AT, AT, T9);
+            __ gsswx(R0, as_Register(base), AT, 0);
+          }
+        }
+      } else { //not use loongson isa
+        if(scale != 0) {
+          __ dsll(T9, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), T9);
+        } else {
+          __ daddu(AT, as_Register(base), as_Register(index));
+        }
+        if( Assembler::is_simm16(disp) ) {
+          __ sw(R0, AT, disp);
+        } else {
+          __ move(T9, disp);
+          __ daddu(AT, AT, T9);
+          __ sw(R0, AT, 0);
+        }
+      }
+    } else { //index is 0
+      if (UseLEXT1) {
+        if ( Assembler::is_simm16(disp) ) {
+          __ sw(R0, as_Register(base), disp);
+        } else {
+          __ move(T9, disp);
+          __ gsswx(R0, as_Register(base), T9, 0);
+        }
+      } else {
+        if( Assembler::is_simm16(disp) ) {
+          __ sw(R0, as_Register(base), disp);
+        } else {
+          __ move(T9, disp);
+          __ daddu(AT, as_Register(base), T9);
+          __ sw(R0, AT, 0);
+        }
+      }
+    }
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+// Load Double
+instruct loadD(regD dst, memory mem) %{
+  match(Set dst (LoadD mem));
+
+  ins_cost(150);
+  format %{ "loadD   $dst, $mem #@loadD" %}
+  ins_encode(load_D_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+// Load Double - UNaligned
+instruct loadD_unaligned(regD dst, memory mem ) %{
+  match(Set dst (LoadD_unaligned mem));
+  ins_cost(250);
+  // FIXME: Need more effective ldl/ldr
+  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
+  ins_encode(load_D_enc(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+instruct storeD_reg( memory mem, regD src) %{
+  match(Set mem (StoreD mem src));
+
+  ins_cost(50);
+  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
+  ins_encode(store_D_reg_enc(mem, src));
+  ins_pipe( fpu_storeF );
+%}
+
+instruct storeD_immD_0( memory mem, immD_0 zero) %{
+  match(Set mem (StoreD mem zero));
+
+  ins_cost(40);
+  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
+  ins_encode %{
+    int      base = $mem$$base;
+    int     index = $mem$$index;
+    int     scale = $mem$$scale;
+    int      disp = $mem$$disp;
+
+    __ mtc1(R0, F30);
+    __ cvt_d_w(F30, F30);
+
+    if( index != 0 ) {
+    if (UseLEXT1) {
+      if ( Assembler::is_simm(disp, 8) ) {
+        if (scale == 0) {
+          __ gssdxc1(F30, as_Register(base), as_Register(index), disp);
+        } else {
+          __ dsll(T9, as_Register(index), scale);
+          __ gssdxc1(F30, as_Register(base), T9, disp);
+        }
+      } else if ( Assembler::is_simm16(disp) ) {
+        if (scale == 0) {
+          __ daddu(AT, as_Register(base), as_Register(index));
+          __ sdc1(F30, AT, disp);
+        } else {
+          __ dsll(T9, as_Register(index), scale);
+          __ daddu(AT, as_Register(base), T9);
+          __ sdc1(F30, AT, disp);
+        }
+      } else {
+        if (scale == 0) {
+          __ move(T9, disp);
+          __ daddu(AT, as_Register(index), T9);
+          __ gssdxc1(F30, as_Register(base), AT, 0);
+        } else {
+          __ move(T9, disp);
+          __ dsll(AT, as_Register(index), scale);
+          __ daddu(AT, AT, T9);
+          __ gssdxc1(F30, as_Register(base), AT, 0);
+        }
+      }
+    } else { // not use loongson isa
+        if(scale != 0) {
+           __ dsll(T9, as_Register(index), scale);
+           __ daddu(AT, as_Register(base), T9);
+        } else {
+           __ daddu(AT, as_Register(base), as_Register(index));
+        }
+       if( Assembler::is_simm16(disp) ) {
+          __ sdc1(F30, AT, disp);
+       } else {
+          __ move(T9, disp);
+          __ daddu(AT, AT, T9);
+          __ sdc1(F30, AT, 0);
+       }
+    }
+    } else {// index is 0
+    if (UseLEXT1) {
+      if ( Assembler::is_simm16(disp) ) {
+        __ sdc1(F30, as_Register(base), disp);
+      } else {
+        __ move(T9, disp);
+        __ gssdxc1(F30, as_Register(base), T9, 0);
+      }
+    } else {
+       if( Assembler::is_simm16(disp) ) {
+          __ sdc1(F30, as_Register(base), disp);
+       } else {
+          __ move(T9, disp);
+          __ daddu(AT, as_Register(base), T9);
+          __ sdc1(F30, AT, 0);
+       }
+    }
+    }
+  %}
+  ins_pipe( ialu_storeI );
+%}
+
+instruct loadSSI(mRegI dst, stackSlotI src)
+%{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "lw    $dst, $src\t# int stk @ loadSSI" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !");
+    __ lw($dst$$Register, SP, $src$$disp);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct storeSSI(stackSlotI dst, mRegI src)
+%{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "sw    $dst, $src\t# int stk @ storeSSI" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !");
+    __ sw($src$$Register, SP, $dst$$disp);
+  %}
+  ins_pipe(ialu_storeI);
+%}
+
+instruct loadSSL(mRegL dst, stackSlotL src)
+%{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "ld    $dst, $src\t# long stk @ loadSSL" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !");
+    __ ld($dst$$Register, SP, $src$$disp);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct storeSSL(stackSlotL dst, mRegL src)
+%{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "sd    $dst, $src\t# long stk @ storeSSL" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !");
+    __ sd($src$$Register, SP, $dst$$disp);
+  %}
+  ins_pipe(ialu_storeI);
+%}
+
+instruct loadSSP(mRegP dst, stackSlotP src)
+%{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "ld    $dst, $src\t# ptr stk @ loadSSP" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !");
+    __ ld($dst$$Register, SP, $src$$disp);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct storeSSP(stackSlotP dst, mRegP src)
+%{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "sd    $dst, $src\t# ptr stk @ storeSSP" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !");
+    __ sd($src$$Register, SP, $dst$$disp);
+  %}
+  ins_pipe(ialu_storeI);
+%}
+
+instruct loadSSF(regF dst, stackSlotF src)
+%{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "lwc1   $dst, $src\t# float stk @ loadSSF" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !");
+    __ lwc1($dst$$FloatRegister, SP, $src$$disp);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct storeSSF(stackSlotF dst, regF src)
+%{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "swc1    $dst, $src\t# float stk @ storeSSF" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !");
+    __ swc1($src$$FloatRegister, SP, $dst$$disp);
+  %}
+  ins_pipe(fpu_storeF);
+%}
+
+// Use the same format since predicate() can not be used here.
+instruct loadSSD(regD dst, stackSlotD src)
+%{
+  match(Set dst src);
+
+  ins_cost(125);
+  format %{ "ldc1   $dst, $src\t# double stk @ loadSSD" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !");
+    __ ldc1($dst$$FloatRegister, SP, $src$$disp);
+  %}
+  ins_pipe(ialu_loadI);
+%}
+
+instruct storeSSD(stackSlotD dst, regD src)
+%{
+  match(Set dst src);
+
+  ins_cost(100);
+  format %{ "sdc1    $dst, $src\t# double stk @ storeSSD" %}
+  ins_encode %{
+    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !");
+    __ sdc1($src$$FloatRegister, SP, $dst$$disp);
+  %}
+  ins_pipe(fpu_storeF);
+%}
+
+instruct cmpFastLock( FlagsReg cr, mRegP object, s0_RegP box, mRegI tmp, mRegP scr) %{
+  match( Set cr (FastLock object box) );
+  effect( TEMP tmp, TEMP scr, USE_KILL box );
+  ins_cost(300);
+  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
+  ins_encode %{
+    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, $scr$$Register);
+    __ move($cr$$Register, AT);
+  %}
+
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+%}
+
+instruct cmpFastUnlock( FlagsReg cr, mRegP object, s0_RegP box, mRegP tmp ) %{
+  match( Set cr (FastUnlock object box) );
+  effect( TEMP tmp, USE_KILL box );
+  ins_cost(300);
+  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
+  ins_encode %{
+    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register);
+    __ move($cr$$Register, AT);
+  %}
+
+  ins_pipe( pipe_slow );
+  ins_pc_relative(1);
+%}
+
+// Store CMS card-mark Immediate
+instruct storeImmCM(memory mem, immI8 src) %{
+  match(Set mem (StoreCM mem src));
+
+  ins_cost(150);
+  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
+//  opcode(0xC6);
+  ins_encode(store_B_immI_enc_sync(mem, src));
+  ins_pipe( ialu_storeI );
+%}
+
+// Die now
+instruct ShouldNotReachHere( )
+%{
+  match(Halt);
+  ins_cost(300);
+
+  // Use the following format syntax
+  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
+  ins_encode %{
+    // Here we should emit illtrap !
+
+    __ stop("in ShoudNotReachHere");
+
+  %}
+  ins_pipe( pipe_jump );
+%}
+
+instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %}
+  ins_encode %{
+    Register  dst  = $dst$$Register;
+    Register  base = as_Register($mem$$base);
+    int       disp = $mem$$disp;
+
+    __ daddiu(dst, base, disp);
+  %}
+  ins_pipe( ialu_regI_imm16 );
+%}
+
+instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem)
+%{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# @ PosIdxScaleOff8" %}
+  ins_encode %{
+    Register  dst   = $dst$$Register;
+    Register  base  = as_Register($mem$$base);
+    Register  index = as_Register($mem$$index);
+    int       scale = $mem$$scale;
+    int       disp  = $mem$$disp;
+
+    if (scale == 0) {
+      __ daddu(AT, base, index);
+      __ daddiu(dst, AT, disp);
+    } else {
+      __ dsll(AT, index, scale);
+      __ daddu(AT, base, AT);
+      __ daddiu(dst, AT, disp);
+    }
+ %}
+
+  ins_pipe( ialu_regI_imm16 );
+%}
+
+instruct leaPIdxScale(mRegP dst, indIndexScale mem)
+%{
+  match(Set dst mem);
+
+  ins_cost(110);
+  format %{ "leaq    $dst, $mem\t# @ leaPIdxScale" %}
+  ins_encode %{
+    Register  dst   = $dst$$Register;
+    Register  base  = as_Register($mem$$base);
+    Register  index = as_Register($mem$$index);
+    int       scale = $mem$$scale;
+
+    if (scale == 0) {
+       __ daddu(dst, base, index);
+    } else {
+       __ dsll(AT, index, scale);
+       __ daddu(dst, base, AT);
+    }
+ %}
+
+  ins_pipe( ialu_regI_imm16 );
+%}
+
+
+// ============================================================================
+// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
+// array for an instance of the superklass.  Set a hidden internal cache on a
+// hit (cache is checked with exposed code in gen_subtype_check()).  Return
+// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
+instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
+  match(Set result (PartialSubtypeCheck sub super));
+  effect(KILL tmp);
+  ins_cost(1100);  // slightly larger than the next version
+  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
+
+  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
+  ins_pipe( pipe_slow );
+%}
+
+// Conditional-store of the updated heap-top.
+// Used during allocation of the shared heap.
+
+instruct storePConditional( memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr ) %{
+  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
+
+  format %{ "CMPXCHG $heap_top_ptr, $newval\t# (ptr) @storePConditional "
+            "If $oldval  == $heap_top_ptr then store $newval into $heap_top_ptr" %}
+  ins_encode%{
+    Register oldval = $oldval$$Register;
+    Register newval = $newval$$Register;
+    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
+
+    int     index = $heap_top_ptr$$index;
+    int     scale = $heap_top_ptr$$scale;
+    int      disp = $heap_top_ptr$$disp;
+
+    guarantee(Assembler::is_simm16(disp), "");
+
+    if( index != 0 ) {
+      __ stop("in storePConditional: index != 0");
+    } else {
+      __ cmpxchg(newval, addr, oldval);
+      __ move($cr$$Register, AT);
+    }
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+// Conditional-store of an int value.
+// AT flag is set on success, reset otherwise.
+instruct storeIConditional( memory mem, mRegI oldval, mRegI newval, FlagsReg cr ) %{
+  match(Set cr (StoreIConditional mem (Binary oldval newval)));
+//  effect(KILL oldval);
+  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
+
+  ins_encode %{
+    Register oldval = $oldval$$Register;
+    Register newval = $newval$$Register;
+    Address  addr(as_Register($mem$$base), $mem$$disp);
+    Label    again, failure;
+
+    int     index = $mem$$index;
+    int     scale = $mem$$scale;
+    int      disp = $mem$$disp;
+
+    guarantee(Assembler::is_simm16(disp), "");
+
+    if( index != 0 ) {
+      __ stop("in storeIConditional: index != 0");
+    } else {
+      __ bind(again);
+      if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) __ sync();
+      __ ll(AT, addr);
+      __ bne(AT, oldval, failure);
+      __ delayed()->addu(AT, R0, R0);
+
+      __ addu(AT, newval, R0);
+      __ sc(AT, addr);
+      __ beq(AT, R0, again);
+      __ delayed()->addiu(AT, R0, 0xFF);
+      __ bind(failure);
+      __ sync();
+
+      __ move($cr$$Register, AT);
+    }
+%}
+
+  ins_pipe( long_memory_op );
+%}
+
+// Conditional-store of a long value.
+// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
+instruct storeLConditional(memory mem, t2RegL oldval, mRegL newval, FlagsReg cr )
+%{
+  match(Set cr (StoreLConditional mem (Binary oldval newval)));
+  effect(KILL oldval);
+
+  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
+  ins_encode%{
+    Register oldval = $oldval$$Register;
+    Register newval = $newval$$Register;
+    Address addr(as_Register($mem$$base), $mem$$disp);
+
+    int     index = $mem$$index;
+    int     scale = $mem$$scale;
+    int      disp = $mem$$disp;
+
+    guarantee(Assembler::is_simm16(disp), "");
+
+    if( index != 0 ) {
+      __ stop("in storeIConditional: index != 0");
+    } else {
+      __ cmpxchg(newval, addr, oldval);
+      __ move($cr$$Register, AT);
+    }
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+// Implement LoadPLocked. Must be ordered against changes of the memory location
+// by storePConditional.
+instruct loadPLocked(mRegP dst, memory mem) %{
+  match(Set dst (LoadPLocked mem));
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "ld    $dst, $mem #@loadPLocked\n\t"
+            "sync" %}
+  size(12);
+  ins_encode (load_P_enc_ac(dst, mem));
+  ins_pipe( ialu_loadI );
+%}
+
+
+instruct compareAndSwapI( mRegI res, mRegP mem_ptr, mS2RegI oldval, mRegI newval) %{
+  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
+  effect(KILL oldval);
+//  match(CompareAndSwapI mem_ptr (Binary oldval newval));
+  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL\n\t"
+            "MOV    $res, 1 @ compareAndSwapI\n\t"
+            "BNE    AT, R0 @ compareAndSwapI\n\t"
+            "MOV    $res, 0 @ compareAndSwapI\n"
+          "L:" %}
+  ins_encode %{
+    Register newval = $newval$$Register;
+    Register oldval = $oldval$$Register;
+    Register res    = $res$$Register;
+    Address  addr($mem_ptr$$Register, 0);
+    Label L;
+
+    __ cmpxchg32(newval, addr, oldval);
+    __ move(res, AT);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct compareAndSwapL( mRegI res, mRegP mem_ptr, s2RegL oldval, mRegL newval) %{
+  predicate(VM_Version::supports_cx8());
+  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
+  effect(KILL oldval);
+  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI\n\t"
+            "MOV    $res, 1 @ compareAndSwapI\n\t"
+            "BNE    AT, R0 @ compareAndSwapI\n\t"
+            "MOV    $res, 0 @ compareAndSwapI\n"
+          "L:" %}
+  ins_encode %{
+    Register newval = $newval$$Register;
+    Register oldval = $oldval$$Register;
+    Register res    = $res$$Register;
+    Address  addr($mem_ptr$$Register, 0);
+    Label L;
+
+    __ cmpxchg(newval, addr, oldval);
+    __ move(res, AT);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+//FIXME:
+instruct compareAndSwapP( mRegI res, mRegP mem_ptr, s2_RegP oldval, mRegP newval) %{
+  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
+  effect(KILL oldval);
+  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP\n\t"
+            "MOV    $res, AT @ compareAndSwapP\n\t"
+          "L:" %}
+  ins_encode %{
+    Register newval = $newval$$Register;
+    Register oldval = $oldval$$Register;
+    Register res    = $res$$Register;
+    Address  addr($mem_ptr$$Register, 0);
+    Label L;
+
+    __ cmpxchg(newval, addr, oldval);
+    __ move(res, AT);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+instruct compareAndSwapN( mRegI res, mRegP mem_ptr, t2_RegN oldval, mRegN newval) %{
+  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
+  effect(KILL oldval);
+  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN\n\t"
+            "MOV    $res, AT @ compareAndSwapN\n\t"
+          "L:" %}
+  ins_encode %{
+    Register newval = $newval$$Register;
+    Register oldval = $oldval$$Register;
+    Register res    = $res$$Register;
+    Address  addr($mem_ptr$$Register, 0);
+    Label L;
+
+    // cmpxchg32 is implemented with ll/sc, which will do sign extension.
+    //      Thus, we should extend oldval's sign for correct comparision.
+    //
+    __ sll(oldval, oldval, 0);
+
+    __ cmpxchg32(newval, addr, oldval);
+    __ move(res, AT);
+  %}
+  ins_pipe( long_memory_op );
+%}
+
+//----------Max and Min--------------------------------------------------------
+// Min Instructions
+////
+//   *** Min and Max using the conditional move are slower than the
+//   *** branch version on a Pentium III.
+// // Conditional move for min
+//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
+//  effect( USE_DEF op2, USE op1, USE cr );
+//  format %{ "CMOVlt $op2,$op1\t! min" %}
+//  opcode(0x4C,0x0F);
+//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
+//  ins_pipe( pipe_cmov_reg );
+//%}
+//
+//// Min Register with Register (P6 version)
+//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
+//  predicate(VM_Version::supports_cmov() );
+//  match(Set op2 (MinI op1 op2));
+//  ins_cost(200);
+//  expand %{
+//    eFlagsReg cr;
+//    compI_eReg(cr,op1,op2);
+//    cmovI_reg_lt(op2,op1,cr);
+//  %}
+//%}
+
+// Min Register with Register (generic version)
+instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
+  match(Set dst (MinI dst src));
+  //effect(KILL flags);
+  ins_cost(80);
+
+  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
+  ins_encode %{
+    Register dst   = $dst$$Register;
+    Register src   = $src$$Register;
+
+    __ slt(AT, src, dst);
+    __ movn(dst, src, AT);
+
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+// Max Register with Register
+//   *** Min and Max using the conditional move are slower than the
+//   *** branch version on a Pentium III.
+// // Conditional move for max
+//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
+//  effect( USE_DEF op2, USE op1, USE cr );
+//  format %{ "CMOVgt $op2,$op1\t! max" %}
+//  opcode(0x4F,0x0F);
+//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
+//  ins_pipe( pipe_cmov_reg );
+//%}
+//
+// // Max Register with Register (P6 version)
+//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
+//  predicate(VM_Version::supports_cmov() );
+//  match(Set op2 (MaxI op1 op2));
+//  ins_cost(200);
+//  expand %{
+//    eFlagsReg cr;
+//    compI_eReg(cr,op1,op2);
+//    cmovI_reg_gt(op2,op1,cr);
+//  %}
+//%}
+
+// Max Register with Register (generic version)
+instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
+  match(Set dst (MaxI dst src));
+  ins_cost(80);
+
+  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
+
+  ins_encode %{
+    Register dst   = $dst$$Register;
+    Register src   = $src$$Register;
+
+    __ slt(AT, dst, src);
+    __ movn(dst, src, AT);
+
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
+  match(Set dst (MaxI dst zero));
+  ins_cost(50);
+
+  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
+
+  ins_encode %{
+    Register dst   = $dst$$Register;
+
+    __ slt(AT, dst, R0);
+    __ movn(dst, R0, AT);
+
+  %}
+
+  ins_pipe( pipe_slow );
+%}
+
+instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
+%{
+  match(Set dst (AndL src mask));
+
+  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+
+    __ dext(dst, src, 0, 32);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
+%{
+  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
+
+  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
+  ins_encode %{
+    Register dst  = $dst$$Register;
+    Register src1 = $src1$$Register;
+    Register src2 = $src2$$Register;
+
+    if (src1 == dst) {
+       __ dinsu(dst, src2, 32, 32);
+    } else if (src2 == dst) {
+       __ dsll32(dst, dst, 0);
+       __ dins(dst, src1, 0, 32);
+    } else {
+       __ dext(dst, src1, 0, 32);
+       __ dinsu(dst, src2, 32, 32);
+    }
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+// Zero-extend convert int to long
+instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
+%{
+  match(Set dst (AndL (ConvI2L src) mask));
+
+  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+
+    __ dext(dst, src, 0, 32);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
+%{
+  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
+
+  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
+  ins_encode %{
+    Register dst = $dst$$Register;
+    Register src = $src$$Register;
+
+    __ dext(dst, src, 0, 32);
+  %}
+  ins_pipe(ialu_regI_regI);
+%}
+
+// Match loading integer and casting it to unsigned int in long register.
+// LoadI + ConvI2L + AndL 0xffffffff.
+instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
+  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
+
+  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
+  ins_encode (load_N_enc(dst, mem));
+  ins_pipe(ialu_loadI);
+%}
+
+instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
+  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
+
+  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
+  ins_encode (load_N_enc(dst, mem));
+  ins_pipe(ialu_loadI);
+%}
+
+
+// ============================================================================
+// Safepoint Instruction
+instruct safePoint_poll_reg(mRegP poll) %{
+  match(SafePoint poll);
+  predicate(false);
+  effect(USE poll);
+
+  ins_cost(125);
+  format %{ "Safepoint @ [$poll] : poll for GC @ safePoint_poll_reg" %}
+
+  ins_encode %{
+    Register poll_reg = $poll$$Register;
+
+    __ block_comment("Safepoint:");
+    __ relocate(relocInfo::poll_type);
+    __ lw(AT, poll_reg, 0);
+  %}
+
+  ins_pipe( ialu_storeI );
+%}
+
+instruct safePoint_poll() %{
+  match(SafePoint);
+
+  ins_cost(105);
+  format %{ "poll for GC @ safePoint_poll" %}
+
+  ins_encode %{
+    __ block_comment("Safepoint:");
+    __ set64(T9, (long)os::get_polling_page());
+    __ relocate(relocInfo::poll_type);
+    __ lw(AT, T9, 0);
+  %}
+
+  ins_pipe( ialu_storeI );
+%}
+
+//----------Arithmetic Conversion Instructions---------------------------------
+
+instruct roundFloat_nop(regF dst)
+%{
+  match(Set dst (RoundFloat dst));
+
+  ins_cost(0);
+  ins_encode();
+  ins_pipe(empty);
+%}
+
+instruct roundDouble_nop(regD dst)
+%{
+  match(Set dst (RoundDouble dst));
+
+  ins_cost(0);
+  ins_encode();
+  ins_pipe(empty);
+%}
+
+//---------- Zeros Count Instructions ------------------------------------------
+// CountLeadingZerosINode CountTrailingZerosINode
+instruct countLeadingZerosI(mRegI dst, mRegI src) %{
+  predicate(UseCountLeadingZerosInstructionMIPS64);
+  match(Set dst (CountLeadingZerosI src));
+
+  format %{ "clz  $dst, $src\t# count leading zeros (int)" %}
+  ins_encode %{
+    __ clz($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct countLeadingZerosL(mRegI dst, mRegL src) %{
+  predicate(UseCountLeadingZerosInstructionMIPS64);
+  match(Set dst (CountLeadingZerosL src));
+
+  format %{ "dclz  $dst, $src\t# count leading zeros (long)" %}
+  ins_encode %{
+    __ dclz($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct countTrailingZerosI(mRegI dst, mRegI src) %{
+  predicate(UseCountTrailingZerosInstructionMIPS64);
+  match(Set dst (CountTrailingZerosI src));
+
+  format %{ "ctz    $dst, $src\t# count trailing zeros (int)" %}
+  ins_encode %{
+    // ctz and dctz is gs instructions.
+    __ ctz($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+instruct countTrailingZerosL(mRegI dst, mRegL src) %{
+  predicate(UseCountTrailingZerosInstructionMIPS64);
+  match(Set dst (CountTrailingZerosL src));
+
+  format %{ "dcto    $dst, $src\t# count trailing zeros (long)" %}
+  ins_encode %{
+    __ dctz($dst$$Register, $src$$Register);
+  %}
+  ins_pipe( ialu_regL_regL );
+%}
+
+// ====================VECTOR INSTRUCTIONS=====================================
+
+// Load vectors (8 bytes long)
+instruct loadV8(vecD dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 8);
+  match(Set dst (LoadVector mem));
+  ins_cost(125);
+  format %{ "load    $dst, $mem\t! load vector (8 bytes)" %}
+  ins_encode(load_D_enc(dst, mem));
+  ins_pipe( fpu_loadF );
+%}
+
+// Store vectors (8 bytes long)
+instruct storeV8(memory mem, vecD src) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem src));
+  ins_cost(145);
+  format %{ "store    $mem, $src\t! store vector (8 bytes)" %}
+  ins_encode(store_D_reg_enc(mem, src));
+  ins_pipe( fpu_storeF );
+%}
+
+instruct Repl8B_DSP(vecD dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 8 && UseLEXT3);
+  match(Set dst (ReplicateB src));
+  ins_cost(100);
+  format %{ "replv_ob    AT, $src\n\t"
+            "dmtc1 AT, $dst\t! replicate8B" %}
+  ins_encode %{
+    __ replv_ob(AT, $src$$Register);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl8B(vecD dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB src));
+  ins_cost(140);
+  format %{ "move       AT,  $src\n\t"
+            "dins  AT, AT,  8,  8\n\t"
+            "dins  AT, AT, 16, 16\n\t"
+            "dinsu AT, AT, 32, 32\n\t"
+            "dmtc1 AT, $dst\t! replicate8B" %}
+  ins_encode %{
+    __ move(AT, $src$$Register);
+    __ dins(AT, AT, 8, 8);
+    __ dins(AT, AT, 16, 16);
+    __ dinsu(AT, AT, 32, 32);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl8B_imm_DSP(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8 && UseLEXT3);
+  match(Set dst (ReplicateB con));
+  ins_cost(110);
+  format %{ "repl_ob    AT, [$con]\n\t"
+            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
+  ins_encode %{
+    int      val = $con$$constant;
+    __ repl_ob(AT, val);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl8B_imm(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB con));
+  ins_cost(150);
+  format %{ "move      AT, [$con]\n\t"
+            "dins  AT, AT,  8,  8\n\t"
+            "dins  AT, AT, 16, 16\n\t"
+            "dinsu AT, AT, 32, 32\n\t"
+            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
+  ins_encode %{
+    __ move(AT, $con$$constant);
+    __ dins(AT, AT, 8, 8);
+    __ dins(AT, AT, 16, 16);
+    __ dinsu(AT, AT, 32, 32);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl8B_zero(vecD dst, immI_0 zero) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB zero));
+  ins_cost(90);
+  format %{ "dmtc1    R0, $dst\t! replicate8B zero" %}
+  ins_encode %{
+    __ dmtc1(R0, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl8B_M1(vecD dst, immI_M1 M1) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB M1));
+  ins_cost(80);
+  format %{ "dmtc1    -1, $dst\t! replicate8B -1" %}
+  ins_encode %{
+    __ nor(AT, R0, R0);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl4S_DSP(vecD dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 4 && UseLEXT3);
+  match(Set dst (ReplicateS src));
+  ins_cost(100);
+  format %{ "replv_qh    AT, $src\n\t"
+            "dmtc1 AT, $dst\t! replicate4S" %}
+  ins_encode %{
+    __ replv_qh(AT, $src$$Register);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl4S(vecD dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS src));
+  ins_cost(120);
+  format %{ "move    AT,     $src  \n\t"
+            "dins    AT, AT, 16, 16\n\t"
+            "dinsu   AT, AT, 32, 32\n\t"
+            "dmtc1 AT, $dst\t! replicate4S" %}
+  ins_encode %{
+    __ move(AT, $src$$Register);
+    __ dins(AT, AT, 16, 16);
+    __ dinsu(AT, AT, 32, 32);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl4S_imm_DSP(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 4 && UseLEXT3);
+  match(Set dst (ReplicateS con));
+  ins_cost(100);
+  format %{ "repl_qh    AT, [$con]\n\t"
+            "dmtc1 AT, $dst\t! replicate4S($con)" %}
+  ins_encode %{
+    int      val = $con$$constant;
+    if ( Assembler::is_simm(val, 10)) {
+      //repl_qh supports 10 bits immediate
+      __ repl_qh(AT, val);
+    } else {
+      __ li32(AT, val);
+      __ replv_qh(AT, AT);
+    }
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl4S_imm(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS con));
+  ins_cost(110);
+  format %{ "move    AT,   [$con]\n\t"
+            "dins  AT, AT, 16, 16\n\t"
+            "dinsu AT, AT, 32, 32\n\t"
+            "dmtc1 AT, $dst\t! replicate4S($con)" %}
+  ins_encode %{
+    __ move(AT, $con$$constant);
+    __ dins(AT, AT, 16, 16);
+    __ dinsu(AT, AT, 32, 32);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl4S_zero(vecD dst, immI_0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS zero));
+  format %{ "dmtc1    R0, $dst\t! replicate4S zero" %}
+  ins_encode %{
+    __ dmtc1(R0, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+instruct Repl4S_M1(vecD dst, immI_M1 M1) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS M1));
+  format %{ "dmtc1    -1, $dst\t! replicate4S -1" %}
+  ins_encode %{
+    __ nor(AT, R0, R0);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+// Replicate integer (4 byte) scalar to be vector
+instruct Repl2I(vecD dst, mRegI src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI src));
+  format %{ "dins    AT, $src, 0, 32\n\t"
+            "dinsu   AT, $src, 32, 32\n\t"
+            "dmtc1   AT, $dst\t! replicate2I" %}
+  ins_encode %{
+    __ dins(AT, $src$$Register, 0, 32);
+    __ dinsu(AT, $src$$Register, 32, 32);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI con));
+  effect(KILL tmp);
+  format %{ "li32    AT, [$con], 32\n\t"
+            "dinsu   AT,         AT\n\t"
+            "dmtc1   AT, $dst\t! replicate2I($con)" %}
+  ins_encode %{
+    int      val = $con$$constant;
+    __ li32(AT, val);
+    __ dinsu(AT, AT, 32, 32);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+// Replicate integer (4 byte) scalar zero to be vector
+instruct Repl2I_zero(vecD dst, immI_0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI zero));
+  format %{ "dmtc1    R0, $dst\t! replicate2I zero" %}
+  ins_encode %{
+    __ dmtc1(R0, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+// Replicate integer (4 byte) scalar -1 to be vector
+instruct Repl2I_M1(vecD dst, immI_M1 M1) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI M1));
+  format %{ "dmtc1    -1, $dst\t! replicate2I -1, use AT" %}
+  ins_encode %{
+    __ nor(AT, R0, R0);
+    __ dmtc1(AT, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+// Replicate float (4 byte) scalar to be vector
+instruct Repl2F(vecD dst, regF src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF src));
+  format %{ "cvt.ps  $dst, $src, $src\t! replicate2F" %}
+  ins_encode %{
+    __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate float (4 byte) scalar zero to be vector
+instruct Repl2F_zero(vecD dst, immF_0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF zero));
+  format %{ "dmtc1   R0, $dst\t! replicate2F zero" %}
+  ins_encode %{
+    __ dmtc1(R0, $dst$$FloatRegister);
+  %}
+  ins_pipe( pipe_mtc1 );
+%}
+
+
+// ====================VECTOR ARITHMETIC=======================================
+
+// --------------------------------- ADD --------------------------------------
+
+// Floats vector add
+// kernel does not have emulation of PS instructions yet, so PS instructions is disabled.
+instruct vadd2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVF dst src));
+  format %{ "add.ps   $dst,$src\t! add packed2F" %}
+  ins_encode %{
+    __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVF src1 src2));
+  format %{ "add.ps   $dst,$src1,$src2\t! add packed2F" %}
+  ins_encode %{
+    __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+// --------------------------------- SUB --------------------------------------
+
+// Floats vector sub
+instruct vsub2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVF dst src));
+  format %{ "sub.ps   $dst,$src\t! sub packed2F" %}
+  ins_encode %{
+    __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+// --------------------------------- MUL --------------------------------------
+
+// Floats vector mul
+instruct vmul2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVF dst src));
+  format %{ "mul.ps   $dst, $src\t! mul packed2F" %}
+  ins_encode %{
+    __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVF src1 src2));
+  format %{ "mul.ps   $dst, $src1, $src2\t! mul packed2F" %}
+  ins_encode %{
+    __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+  %}
+  ins_pipe( fpu_regF_regF );
+%}
+
+// --------------------------------- DIV --------------------------------------
+// MIPS do not have div.ps
+
+// --------------------------------- MADD --------------------------------------
+// Floats vector madd
+//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{
+//  predicate(n->as_Vector()->length() == 2);
+//  match(Set dst (AddVF (MulVF src1 src2) src3));
+//  ins_cost(50);
+//  format %{ "madd.ps   $dst, $src3, $src1, $src2\t! madd packed2F" %}
+//  ins_encode %{
+//    __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
+//  %}
+//  ins_pipe( fpu_regF_regF );
+//%}
+
+
+//----------PEEPHOLE RULES-----------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+//
+// peepmatch ( root_instr_name [preceeding_instruction]* );
+//
+// peepconstraint %{
+// (instruction_number.operand_name relational_op instruction_number.operand_name
+//  [, ...] );
+// // instruction numbers are zero-based using left to right order in peepmatch
+//
+// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
+// // provide an instruction_number.operand_name for each operand that appears
+// // in the replacement instruction's match rule
+//
+// ---------VM FLAGS---------------------------------------------------------
+//
+// All peephole optimizations can be turned off using -XX:-OptoPeephole
+//
+// Each peephole rule is given an identifying number starting with zero and
+// increasing by one in the order seen by the parser.  An individual peephole
+// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
+// on the command-line.
+//
+// ---------CURRENT LIMITATIONS----------------------------------------------
+//
+// Only match adjacent instructions in same basic block
+// Only equality constraints
+// Only constraints between operands, not (0.dest_reg == EAX_enc)
+// Only one replacement instruction
+//
+// ---------EXAMPLE----------------------------------------------------------
+//
+// // pertinent parts of existing instructions in architecture description
+// instruct movI(eRegI dst, eRegI src) %{
+//   match(Set dst (CopyI src));
+// %}
+//
+// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
+//   match(Set dst (AddI dst src));
+//   effect(KILL cr);
+// %}
+//
+// // Change (inc mov) to lea
+// peephole %{
+//   // increment preceeded by register-register move
+//   peepmatch ( incI_eReg movI );
+//   // require that the destination register of the increment
+//   // match the destination register of the move
+//   peepconstraint ( 0.dst == 1.dst );
+//   // construct a replacement instruction that sets
+//   // the destination to ( move's source register + one )
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// Implementation no longer uses movX instructions since
+// machine-independent system no longer uses CopyX nodes.
+//
+// peephole %{
+//   peepmatch ( incI_eReg movI );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// peephole %{
+//   peepmatch ( decI_eReg movI );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// peephole %{
+//   peepmatch ( addI_eReg_imm movI );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+//
+// peephole %{
+//   peepmatch ( addP_eReg_imm movP );
+//   peepconstraint ( 0.dst == 1.dst );
+//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
+// %}
+
+// // Change load of spilled value to only a spill
+// instruct storeI(memory mem, eRegI src) %{
+//   match(Set mem (StoreI mem src));
+// %}
+//
+// instruct loadI(eRegI dst, memory mem) %{
+//   match(Set dst (LoadI mem));
+// %}
+//
+//peephole %{
+//  peepmatch ( loadI storeI );
+//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
+//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
+//%}
+
+//----------SMARTSPILL RULES---------------------------------------------------
+// These must follow all instruction definitions as they use the names
+// defined in the instructions definitions.
+
diff --git a/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp b/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp
new file mode 100644
index 00000000000..e1f7cd944df
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/nativeInst_mips.cpp
@@ -0,0 +1,1829 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/codeBlob.hpp"
+#include "code/codeCache.hpp"
+#include "compiler/disassembler.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_mips.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/ostream.hpp"
+
+#include <sys/mman.h>
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+void NativeInstruction::wrote(int offset) {
+  ICache::invalidate_word(addr_at(offset));
+}
+
+void NativeInstruction::set_long_at(int offset, long i) {
+  address addr = addr_at(offset);
+  *(long*)addr = i;
+  ICache::invalidate_range(addr, 8);
+}
+
+static int illegal_instruction_bits = 0;
+
+int NativeInstruction::illegal_instruction() {
+  if (illegal_instruction_bits == 0) {
+    ResourceMark rm;
+    char buf[40];
+    CodeBuffer cbuf((address)&buf[0], 20);
+    MacroAssembler* a = new MacroAssembler(&cbuf);
+    address ia = a->pc();
+    a->brk(11);
+    int bits = *(int*)ia;
+    illegal_instruction_bits = bits;
+  }
+  return illegal_instruction_bits;
+}
+
+bool NativeInstruction::is_int_branch() {
+  switch(Assembler::opcode(insn_word())) {
+    case Assembler::beq_op:
+    case Assembler::beql_op:
+    case Assembler::bgtz_op:
+    case Assembler::bgtzl_op:
+    case Assembler::blez_op:
+    case Assembler::blezl_op:
+    case Assembler::bne_op:
+    case Assembler::bnel_op:
+      return true;
+    case Assembler::regimm_op:
+      switch(Assembler::rt(insn_word())) {
+        case Assembler::bgez_op:
+        case Assembler::bgezal_op:
+        case Assembler::bgezall_op:
+        case Assembler::bgezl_op:
+        case Assembler::bltz_op:
+        case Assembler::bltzal_op:
+        case Assembler::bltzall_op:
+        case Assembler::bltzl_op:
+          return true;
+      }
+  }
+
+  return false;
+}
+
+bool NativeInstruction::is_float_branch() {
+  if (!is_op(Assembler::cop1_op) ||
+      !is_rs((Register)Assembler::bc1f_op)) return false;
+
+  switch(Assembler::rt(insn_word())) {
+    case Assembler::bcf_op:
+    case Assembler::bcfl_op:
+    case Assembler::bct_op:
+    case Assembler::bctl_op:
+      return true;
+  }
+
+  return false;
+}
+
+
+void NativeCall::verify() {
+  // make sure code pattern is actually a call instruction
+
+  // nop
+  // nop
+  // nop
+  // nop
+  // jal target
+  // nop
+  if ( is_nop() &&
+  nativeInstruction_at(addr_at(4))->is_nop()   &&
+  nativeInstruction_at(addr_at(8))->is_nop()   &&
+  nativeInstruction_at(addr_at(12))->is_nop()  &&
+  is_op(int_at(16), Assembler::jal_op)  &&
+  nativeInstruction_at(addr_at(20))->is_nop() ) {
+      return;
+  }
+
+  // jal targe
+  // nop
+  if ( is_op(int_at(0), Assembler::jal_op)  &&
+  nativeInstruction_at(addr_at(4))->is_nop() ) {
+      return;
+  }
+
+  // li64
+  if ( is_op(Assembler::lui_op) &&
+  is_op(int_at(4), Assembler::ori_op) &&
+  is_special_op(int_at(8), Assembler::dsll_op) &&
+  is_op(int_at(12), Assembler::ori_op) &&
+  is_special_op(int_at(16), Assembler::dsll_op) &&
+  is_op(int_at(20), Assembler::ori_op) &&
+        is_special_op(int_at(24), Assembler::jalr_op) ) {
+      return;
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  if (  is_op(Assembler::lui_op) &&
+    is_op  (int_at(4), Assembler::ori_op) &&
+    is_special_op(int_at(8), Assembler::dsll_op) &&
+    is_op  (int_at(12), Assembler::ori_op) &&
+          is_special_op(int_at(16), Assembler::jalr_op) ) {
+      return;
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+    is_special_op(int_at(4), Assembler::dsll_op) &&
+    is_op  (int_at(8), Assembler::ori_op) &&
+          nativeInstruction_at(addr_at(12))->is_nop() &&
+          is_special_op(int_at(16), Assembler::jalr_op) ) {
+      return;
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //nop
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+    is_special_op(int_at(4), Assembler::dsll_op) &&
+    nativeInstruction_at(addr_at(8))->is_nop()   &&
+          nativeInstruction_at(addr_at(12))->is_nop() &&
+          is_special_op(int_at(16), Assembler::jalr_op) ) {
+      return;
+  }
+
+  //daddiu dst, R0, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::daddiu_op) &&
+    nativeInstruction_at(addr_at(4))->is_nop() &&
+    nativeInstruction_at(addr_at(8))->is_nop() &&
+    nativeInstruction_at(addr_at(12))->is_nop() &&
+          is_special_op(int_at(16), Assembler::jalr_op) ) {
+      return;
+  }
+
+  // FIXME: why add jr_op here?
+  //daddiu dst, R0, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::daddiu_op) &&
+    nativeInstruction_at(addr_at(4))->is_nop() &&
+    nativeInstruction_at(addr_at(8))->is_nop() &&
+    nativeInstruction_at(addr_at(12))->is_nop() &&
+          is_special_op(int_at(16), Assembler::jr_op) ) {
+      return;
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+    is_op  (int_at(4), Assembler::ori_op) &&
+    nativeInstruction_at(addr_at(8))->is_nop() &&
+    nativeInstruction_at(addr_at(12))->is_nop() &&
+          is_special_op(int_at(16), Assembler::jalr_op) ) {
+      return;
+  }
+
+  //lui dst, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+    nativeInstruction_at(addr_at(4))->is_nop() &&
+    nativeInstruction_at(addr_at(8))->is_nop() &&
+    nativeInstruction_at(addr_at(12))->is_nop() &&
+          is_special_op(int_at(16), Assembler::jalr_op) ) {
+      return;
+  }
+
+  //daddiu dst, R0, imm16
+  //nop
+  if (  is_op(Assembler::daddiu_op) &&
+          nativeInstruction_at(addr_at(4))->is_nop() &&
+          is_special_op(int_at(8), Assembler::jalr_op) ) {
+      return;
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  if (  is_op(Assembler::lui_op) &&
+          is_op (int_at(4), Assembler::ori_op) &&
+          is_special_op(int_at(8), Assembler::jalr_op) ) {
+      return;
+  }
+
+  //lui dst, imm16
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+          nativeInstruction_at(addr_at(4))->is_nop() &&
+          is_special_op(int_at(8), Assembler::jalr_op) ) {
+      return;
+  }
+
+  if (nativeInstruction_at(addr_at(0))->is_trampoline_call())
+    return;
+
+  fatal("not a call");
+}
+
+address NativeCall::target_addr_for_insn() const {
+  // jal target
+  // nop
+  if ( is_op(int_at(0), Assembler::jal_op)         &&
+  nativeInstruction_at(addr_at(4))->is_nop()) {
+      int instr_index = int_at(0) & 0x3ffffff;
+      intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
+      intptr_t target = target_high | (instr_index << 2);
+      return (address)target;
+  }
+
+  // nop
+  // nop
+  // nop
+  // nop
+  // jal target
+  // nop
+  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
+  nativeInstruction_at(addr_at(4))->is_nop()   &&
+  nativeInstruction_at(addr_at(8))->is_nop()   &&
+  nativeInstruction_at(addr_at(12))->is_nop()  &&
+  is_op(int_at(16), Assembler::jal_op)         &&
+  nativeInstruction_at(addr_at(20))->is_nop()) {
+      int instr_index = int_at(16) & 0x3ffffff;
+      intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
+      intptr_t target = target_high | (instr_index << 2);
+      return (address)target;
+  }
+
+  // li64
+  if ( is_op(Assembler::lui_op) &&
+        is_op(int_at(4), Assembler::ori_op) &&
+        is_special_op(int_at(8), Assembler::dsll_op) &&
+        is_op(int_at(12), Assembler::ori_op) &&
+        is_special_op(int_at(16), Assembler::dsll_op) &&
+        is_op(int_at(20), Assembler::ori_op) ) {
+
+      return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
+                               (intptr_t)(int_at(12) & 0xffff),
+                               (intptr_t)(int_at(4) & 0xffff),
+                               (intptr_t)(int_at(0) & 0xffff));
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  if (  is_op(Assembler::lui_op) &&
+          is_op (int_at(4), Assembler::ori_op) &&
+          is_special_op(int_at(8), Assembler::dsll_op) &&
+          is_op (int_at(12), Assembler::ori_op) ) {
+
+      return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
+                               (intptr_t)(int_at(4) & 0xffff),
+                               (intptr_t)(int_at(0) & 0xffff),
+                               (intptr_t)0);
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //dsll dst, dst, 16
+  //ld dst, dst, imm16
+  if (  is_op(Assembler::lui_op) &&
+          is_op (int_at(4), Assembler::ori_op) &&
+          is_special_op(int_at(8), Assembler::dsll_op) &&
+          is_op (int_at(12), Assembler::ld_op) ) {
+
+      address dest = (address)Assembler::merge( (intptr_t)0,
+                               (intptr_t)(int_at(4) & 0xffff),
+                               (intptr_t)(int_at(0) & 0xffff),
+                               (intptr_t)0);
+      return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff);
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+          is_special_op(int_at(4), Assembler::dsll_op) &&
+          is_op (int_at(8), Assembler::ori_op) &&
+          nativeInstruction_at(addr_at(12))->is_nop()) {
+
+      return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
+                               (intptr_t)(int_at(0) & 0xffff),
+                               (intptr_t)0,
+                               (intptr_t)0);
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //nop
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+          is_special_op(int_at(4), Assembler::dsll_op) &&
+          nativeInstruction_at(addr_at(8))->is_nop()   &&
+          nativeInstruction_at(addr_at(12))->is_nop()) {
+
+      return (address)Assembler::merge( (intptr_t)(0),
+                               (intptr_t)(int_at(0) & 0xffff),
+                               (intptr_t)0,
+                               (intptr_t)0);
+  }
+
+  //daddiu dst, R0, imm16
+  //nop
+  //nop  <-- optional
+  //nop  <-- optional
+  if (  is_op(Assembler::daddiu_op) &&
+          nativeInstruction_at(addr_at(4))->is_nop() ) {
+
+      int sign = int_at(0) & 0x8000;
+      if (sign == 0) {
+         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
+                                  (intptr_t)0,
+                                  (intptr_t)0,
+                                  (intptr_t)0);
+      } else {
+         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
+                                  (intptr_t)(0xffff),
+                                  (intptr_t)(0xffff),
+                                  (intptr_t)(0xffff));
+      }
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //nop  <-- optional
+  //nop  <-- optional
+  if (  is_op(Assembler::lui_op) &&
+          is_op (int_at(4), Assembler::ori_op) ) {
+
+      int sign = int_at(0) & 0x8000;
+      if (sign == 0) {
+         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
+                                  (intptr_t)(int_at(0) & 0xffff),
+                                  (intptr_t)0,
+                                  (intptr_t)0);
+      } else {
+         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
+                                  (intptr_t)(int_at(0) & 0xffff),
+                                  (intptr_t)(0xffff),
+                                  (intptr_t)(0xffff));
+      }
+  }
+
+  //lui dst, imm16
+  //nop
+  //nop  <-- optional
+  //nop  <-- optional
+  if (  is_op(Assembler::lui_op) &&
+          nativeInstruction_at(addr_at(4))->is_nop() ) {
+
+      int sign = int_at(0) & 0x8000;
+      if (sign == 0) {
+         return (address)Assembler::merge( (intptr_t)0,
+                                  (intptr_t)(int_at(0) & 0xffff),
+                                  (intptr_t)0,
+                                  (intptr_t)0);
+      } else {
+         return (address)Assembler::merge( (intptr_t)0,
+                                  (intptr_t)(int_at(0) & 0xffff),
+                                  (intptr_t)(0xffff),
+                                  (intptr_t)(0xffff));
+      }
+  }
+
+  tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0)));
+  tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0)));
+  Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty);
+  tty->print_cr("======= End of decoding =======");
+  fatal("not a call");
+  return NULL;
+}
+
+// Extract call destination from a NativeCall. The call might use a trampoline stub.
+address NativeCall::destination() const {
+  address addr = (address)this;
+  address destination = target_addr_for_insn();
+  // Do we use a trampoline stub for this call?
+  // Trampoline stubs are located behind the main code.
+  if (destination > addr) {
+    // Filter out recursive method invocation (call to verified/unverified entry point).
+    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
+    assert(cb && cb->is_nmethod(), "sanity");
+    nmethod *nm = (nmethod *)cb;
+    NativeInstruction* ni = nativeInstruction_at(addr);
+    if (nm->stub_contains(destination) && ni->is_trampoline_call()) {
+      // Yes we do, so get the destination from the trampoline stub.
+      const address trampoline_stub_addr = destination;
+      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
+    }
+  }
+  return destination;
+}
+
+// Similar to replace_mt_safe, but just changes the destination. The
+// important thing is that free-running threads are able to execute this
+// call instruction at all times.
+//
+// Used in the runtime linkage of calls; see class CompiledIC.
+//
+// Add parameter assert_lock to switch off assertion
+// during code generation, where no patching lock is needed.
+void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
+  assert(!assert_lock ||
+         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
+         "concurrent code patching");
+
+  ResourceMark rm;
+  address addr_call = addr_at(0);
+  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
+  // Patch the constant in the call's trampoline stub.
+  if (MacroAssembler::reachable_from_cache()) {
+    set_destination(dest);
+  } else {
+    address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn();
+    assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline");
+    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
+  }
+}
+
+
+address NativeCall::get_trampoline() {
+  address call_addr = addr_at(0);
+
+  CodeBlob *code = CodeCache::find_blob(call_addr);
+  assert(code != NULL, "Could not find the containing code blob");
+
+  // If the codeBlob is not a nmethod, this is because we get here from the
+  // CodeBlob constructor, which is called within the nmethod constructor.
+  return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
+}
+
+// manual implementation of GSSQ
+//
+//  00000001200009c0 <atomic_store128>:
+//     1200009c0:   0085202d        daddu   a0, a0, a1
+//     1200009c4:   e8860027        gssq    a2, a3, 0(a0)
+//     1200009c8:   03e00008        jr      ra
+//     1200009cc:   00000000        nop
+//
+typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64);
+
+static int *buf;
+
+static atomic_store128_ptr get_atomic_store128_func() {
+  assert(UseLEXT1, "UseLEXT1 must be true");
+  static atomic_store128_ptr p = NULL;
+  if (p != NULL)
+    return p;
+
+  buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS,
+                       -1, 0);
+  buf[0] = 0x0085202d;
+  buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27;   /* gssq $a2, $a3, 0($a0) */
+  buf[2] = 0x03e00008;
+  buf[3] = 0;
+
+  asm("sync");
+  p = (atomic_store128_ptr)buf;
+  return p;
+}
+
+void  NativeCall::patch_on_jal_only(address dst) {
+  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
+  if ((dest >= 0) && (dest < (1<<26))) {
+    jint jal_inst = (Assembler::jal_op << 26) | dest;
+    set_int_at(0, jal_inst);
+    ICache::invalidate_range(addr_at(0), 4);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void  NativeCall::patch_on_trampoline(address dest) {
+  assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site");
+  jlong dst = (jlong) dest;
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //dsll dst, dst, 16
+  //ld dst, dst, imm16
+  if ((dst> 0) && Assembler::is_simm16(dst >> 32)) {
+    dst += (dst & 0x8000) << 1;
+    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff));
+    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff));
+    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff));
+
+    ICache::invalidate_range(addr_at(0), 24);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void  NativeCall::patch_on_jal_gs(address dst) {
+  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
+  if ((dest >= 0) && (dest < (1<<26))) {
+    jint jal_inst = (Assembler::jal_op << 26) | dest;
+    set_int_at(16, jal_inst);
+    ICache::invalidate_range(addr_at(16), 4);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void  NativeCall::patch_on_jal(address dst) {
+  patch_on_jal_gs(dst);
+}
+
+void  NativeCall::patch_on_jalr_gs(address dst) {
+  patch_set48_gs(dst);
+}
+
+void  NativeCall::patch_on_jalr(address dst) {
+  patch_set48(dst);
+}
+
+void  NativeCall::patch_set48_gs(address dest) {
+  jlong value = (jlong) dest;
+  int  rt_reg = (int_at(0) & (0x1f << 16));
+
+  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
+
+  int  rs_reg = rt_reg << 5;
+  int  rd_reg = rt_reg >> 5;
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+  int count = 0;
+  int insts[4] = {0, 0, 0, 0};
+
+  if (value == lo) {  // 32-bit integer
+    if (Assembler::is_simm16(value)) {
+      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
+      count += 1;
+    } else {
+      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
+      count += 1;
+      if (Assembler::split_low(value)) {
+        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
+        count += 1;
+      }
+    }
+  } else if (hi == 0) {  // hardware zero-extends to upper 32
+    insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
+    count += 1;
+    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
+    count += 1;
+    if (Assembler::split_low(value)) {
+      insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
+      count += 1;
+    }
+  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
+    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
+    count += 1;
+    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
+    count += 1;
+    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
+    count += 1;
+    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
+    count += 1;
+  } else {
+    tty->print_cr("dest = 0x%lx", value);
+    guarantee(false, "Not supported yet !");
+  }
+
+  while (count < 4) {
+    insts[count] = 0;
+    count++;
+  }
+
+  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
+  atomic_store128_ptr func = get_atomic_store128_func();
+  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
+
+  ICache::invalidate_range(addr_at(0), 16);
+}
+
+void NativeCall::patch_set32_gs(address dest) {
+  jlong value = (jlong) dest;
+  int  rt_reg = (int_at(0) & (0x1f << 16));
+
+  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
+
+  int  rs_reg = rt_reg << 5;
+  int  rd_reg = rt_reg >> 5;
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+
+  int count = 0;
+
+  int insts[2] = {0, 0};
+
+  if (value == lo) {  // 32-bit integer
+    if (Assembler::is_simm16(value)) {
+      //daddiu(d, R0, value);
+      //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
+      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
+      count += 1;
+    } else {
+      //lui(d, split_low(value >> 16));
+      //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
+      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
+      count += 1;
+      if (Assembler::split_low(value)) {
+        //ori(d, d, split_low(value));
+        //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
+        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
+        count += 1;
+      }
+    }
+  } else {
+    tty->print_cr("dest = 0x%lx", value);
+    guarantee(false, "Not supported yet !");
+  }
+
+  while (count < 2) {
+    //nop();
+    //set_int_at(count << 2, 0);
+    insts[count] = 0;
+    count++;
+  }
+
+  long inst = insts[1];
+  inst = inst << 32;
+  inst = inst + insts[0];
+
+  set_long_at(0, inst);
+}
+
+void NativeCall::patch_set48(address dest) {
+  jlong value = (jlong) dest;
+  int  rt_reg = (int_at(0) & (0x1f << 16));
+
+  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
+
+  int  rs_reg = rt_reg << 5;
+  int  rd_reg = rt_reg >> 5;
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+
+  int count = 0;
+
+  if (value == lo) {  // 32-bit integer
+    if (Assembler::is_simm16(value)) {
+      //daddiu(d, R0, value);
+      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
+      count += 1;
+    } else {
+      //lui(d, split_low(value >> 16));
+      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
+      count += 1;
+      if (Assembler::split_low(value)) {
+        //ori(d, d, split_low(value));
+        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
+        count += 1;
+      }
+    }
+  } else if (hi == 0) {  // hardware zero-extends to upper 32
+      //ori(d, R0, julong(value) >> 16);
+      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
+      count += 1;
+      //dsll(d, d, 16);
+      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
+      count += 1;
+      if (Assembler::split_low(value)) {
+        //ori(d, d, split_low(value));
+        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
+        count += 1;
+      }
+  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
+    //lui(d, value >> 32);
+    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
+    count += 1;
+    //ori(d, d, split_low(value >> 16));
+    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
+    count += 1;
+    //dsll(d, d, 16);
+    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
+    count += 1;
+    //ori(d, d, split_low(value));
+    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
+    count += 1;
+  } else {
+    tty->print_cr("dest = 0x%lx", value);
+    guarantee(false, "Not supported yet !");
+  }
+
+  while (count < 4) {
+    //nop();
+    set_int_at(count << 2, 0);
+    count++;
+  }
+
+  ICache::invalidate_range(addr_at(0), 16);
+}
+
+void  NativeCall::patch_set32(address dest) {
+  patch_set32_gs(dest);
+}
+
+void  NativeCall::set_destination(address dest) {
+  OrderAccess::fence();
+
+  // li64
+  if (is_special_op(int_at(16), Assembler::dsll_op)) {
+    int first_word = int_at(0);
+    set_int_at(0, 0x1000ffff); /* .1: b .1 */
+    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff));
+    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff));
+    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff));
+    set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff));
+    ICache::invalidate_range(addr_at(0), 24);
+  } else if (is_op(int_at(16), Assembler::jal_op)) {
+    if (UseLEXT1) {
+      patch_on_jal_gs(dest);
+    } else {
+      patch_on_jal(dest);
+    }
+  } else if (is_op(int_at(0), Assembler::jal_op)) {
+    patch_on_jal_only(dest);
+  } else if (is_special_op(int_at(16), Assembler::jalr_op)) {
+    if (UseLEXT1) {
+      patch_on_jalr_gs(dest);
+    } else {
+      patch_on_jalr(dest);
+    }
+  } else if (is_special_op(int_at(8), Assembler::jalr_op)) {
+    guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8");
+    if (UseLEXT1) {
+      patch_set32_gs(dest);
+    } else {
+      patch_set32(dest);
+    }
+    ICache::invalidate_range(addr_at(0), 8);
+  } else {
+      fatal("not a call");
+  }
+}
+
+void NativeCall::print() {
+  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
+                p2i(instruction_address()), p2i(destination()));
+}
+
+// Inserts a native call instruction at a given pc
+void NativeCall::insert(address code_pos, address entry) {
+  NativeCall *call = nativeCall_at(code_pos);
+  CodeBuffer cb(call->addr_at(0), instruction_size);
+  MacroAssembler masm(&cb);
+#define __ masm.
+  __ li48(T9, (long)entry);
+  __ jalr ();
+  __ delayed()->nop();
+#undef __
+
+  ICache::invalidate_range(call->addr_at(0), instruction_size);
+}
+
+// MT-safe patching of a call instruction.
+// First patches first word of instruction to two jmp's that jmps to them
+// selfs (spinlock). Then patches the last byte, and then atomicly replaces
+// the jmp's with the first 4 byte of the new instruction.
+void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
+  Unimplemented();
+}
+
+//-------------------------------------------------------------------
+
+void NativeMovConstReg::verify() {
+  // li64
+  if ( is_op(Assembler::lui_op) &&
+       is_op(int_at(4), Assembler::ori_op) &&
+       is_special_op(int_at(8), Assembler::dsll_op) &&
+       is_op(int_at(12), Assembler::ori_op) &&
+       is_special_op(int_at(16), Assembler::dsll_op) &&
+       is_op(int_at(20), Assembler::ori_op) ) {
+    return;
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  if (  is_op(Assembler::lui_op) &&
+        is_op  (int_at(4), Assembler::ori_op) &&
+        is_special_op(int_at(8), Assembler::dsll_op) &&
+        is_op  (int_at(12), Assembler::ori_op) ) {
+    return;
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+        is_special_op(int_at(4), Assembler::dsll_op) &&
+        is_op  (int_at(8), Assembler::ori_op) &&
+        nativeInstruction_at(addr_at(12))->is_nop()) {
+    return;
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //nop
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+        is_special_op(int_at(4), Assembler::dsll_op) &&
+        nativeInstruction_at(addr_at(8))->is_nop()   &&
+        nativeInstruction_at(addr_at(12))->is_nop()) {
+    return;
+  }
+
+  //daddiu dst, R0, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::daddiu_op) &&
+        nativeInstruction_at(addr_at(4))->is_nop() &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() ) {
+    return;
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+        is_op  (int_at(4), Assembler::ori_op) &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() ) {
+    return;
+  }
+
+  //lui dst, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+        nativeInstruction_at(addr_at(4))->is_nop() &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() ) {
+    return;
+  }
+
+  fatal("not a mov reg, imm64/imm48");
+}
+
+void NativeMovConstReg::print() {
+  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
+                p2i(instruction_address()), data());
+}
+
+intptr_t NativeMovConstReg::data() const {
+  // li64
+  if ( is_op(Assembler::lui_op) &&
+        is_op(int_at(4), Assembler::ori_op) &&
+        is_special_op(int_at(8), Assembler::dsll_op) &&
+        is_op(int_at(12), Assembler::ori_op) &&
+        is_special_op(int_at(16), Assembler::dsll_op) &&
+        is_op(int_at(20), Assembler::ori_op) ) {
+
+    return Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
+                             (intptr_t)(int_at(12) & 0xffff),
+                             (intptr_t)(int_at(4) & 0xffff),
+                             (intptr_t)(int_at(0) & 0xffff));
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  if (  is_op(Assembler::lui_op) &&
+          is_op (int_at(4), Assembler::ori_op) &&
+          is_special_op(int_at(8), Assembler::dsll_op) &&
+          is_op (int_at(12), Assembler::ori_op) ) {
+
+    return Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
+                 (intptr_t)(int_at(4) & 0xffff),
+           (intptr_t)(int_at(0) & 0xffff),
+           (intptr_t)0);
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+          is_special_op(int_at(4), Assembler::dsll_op) &&
+          is_op (int_at(8), Assembler::ori_op) &&
+          nativeInstruction_at(addr_at(12))->is_nop()) {
+
+    return Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
+                             (intptr_t)(int_at(0) & 0xffff),
+                             (intptr_t)0,
+                             (intptr_t)0);
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //nop
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+          is_special_op(int_at(4), Assembler::dsll_op) &&
+          nativeInstruction_at(addr_at(8))->is_nop()   &&
+          nativeInstruction_at(addr_at(12))->is_nop()) {
+
+    return Assembler::merge( (intptr_t)(0),
+                             (intptr_t)(int_at(0) & 0xffff),
+                             (intptr_t)0,
+                             (intptr_t)0);
+  }
+
+  //daddiu dst, R0, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::daddiu_op) &&
+          nativeInstruction_at(addr_at(4))->is_nop() &&
+          nativeInstruction_at(addr_at(8))->is_nop() &&
+          nativeInstruction_at(addr_at(12))->is_nop() ) {
+
+    int sign = int_at(0) & 0x8000;
+    if (sign == 0) {
+     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
+                              (intptr_t)0,
+                              (intptr_t)0,
+                              (intptr_t)0);
+    } else {
+     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
+                              (intptr_t)(0xffff),
+                              (intptr_t)(0xffff),
+                              (intptr_t)(0xffff));
+    }
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+        is_op (int_at(4), Assembler::ori_op) &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() ) {
+
+    int sign = int_at(0) & 0x8000;
+    if (sign == 0) {
+      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
+                               (intptr_t)(int_at(0) & 0xffff),
+                               (intptr_t)0,
+                               (intptr_t)0);
+    } else {
+      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
+                               (intptr_t)(int_at(0) & 0xffff),
+                               (intptr_t)(0xffff),
+                               (intptr_t)(0xffff));
+    }
+  }
+
+  //lui dst, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+        nativeInstruction_at(addr_at(4))->is_nop() &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() ) {
+
+    int sign = int_at(0) & 0x8000;
+    if (sign == 0) {
+      return Assembler::merge( (intptr_t)0,
+                               (intptr_t)(int_at(0) & 0xffff),
+                               (intptr_t)0,
+                               (intptr_t)0);
+    } else {
+      return Assembler::merge( (intptr_t)0,
+                               (intptr_t)(int_at(0) & 0xffff),
+                               (intptr_t)(0xffff),
+                               (intptr_t)(0xffff));
+    }
+  }
+
+  fatal("not a mov reg, imm64/imm48");
+  return 0; // unreachable
+}
+
+void NativeMovConstReg::patch_set48(intptr_t x) {
+  jlong value = (jlong) x;
+  int  rt_reg = (int_at(0) & (0x1f << 16));
+  int  rs_reg = rt_reg << 5;
+  int  rd_reg = rt_reg >> 5;
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+
+  int count = 0;
+
+  if (value == lo) {  // 32-bit integer
+    if (Assembler::is_simm16(value)) {
+      //daddiu(d, R0, value);
+      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
+      count += 1;
+    } else {
+      //lui(d, split_low(value >> 16));
+      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
+      count += 1;
+      if (Assembler::split_low(value)) {
+        //ori(d, d, split_low(value));
+        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
+        count += 1;
+      }
+    }
+  } else if (hi == 0) {  // hardware zero-extends to upper 32
+    set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
+    count += 1;
+    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
+    count += 1;
+    if (Assembler::split_low(value)) {
+      set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
+      count += 1;
+    }
+  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
+    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
+    count += 1;
+    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
+    count += 1;
+    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
+    count += 1;
+    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
+    count += 1;
+  } else {
+    tty->print_cr("value = 0x%lx", value);
+    guarantee(false, "Not supported yet !");
+  }
+
+  while (count < 4) {
+    set_int_at(count << 2, 0);
+    count++;
+  }
+}
+
+void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
+  // li64 or li48
+  if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) {
+    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff));
+    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff));
+    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff));
+    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff));
+  } else {
+    patch_set48(x);
+  }
+
+  ICache::invalidate_range(addr_at(0), 24);
+
+  // Find and replace the oop/metadata corresponding to this
+  // instruction in oops section.
+  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
+  nmethod* nm = blob->as_nmethod_or_null();
+  if (nm != NULL) {
+    o = o ? o : x;
+    RelocIterator iter(nm, instruction_address(), next_instruction_address());
+    while (iter.next()) {
+      if (iter.type() == relocInfo::oop_type) {
+        oop* oop_addr = iter.oop_reloc()->oop_addr();
+        *oop_addr = cast_to_oop(o);
+        break;
+      } else if (iter.type() == relocInfo::metadata_type) {
+        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
+        *metadata_addr = (Metadata*)o;
+        break;
+      }
+    }
+  }
+}
+
+//-------------------------------------------------------------------
+
+int NativeMovRegMem::offset() const{
+  if (is_immediate())
+    return (short)(int_at(instruction_offset)&0xffff);
+  else
+    return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff);
+}
+
+void NativeMovRegMem::set_offset(int x) {
+  if (is_immediate()) {
+    assert(Assembler::is_simm16(x), "just check");
+    set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) );
+    if (is_64ldst()) {
+      assert(Assembler::is_simm16(x+4), "just check");
+      set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) );
+    }
+  } else {
+    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff));
+    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff));
+  }
+  ICache::invalidate_range(addr_at(0), 8);
+}
+
+void NativeMovRegMem::verify() {
+  int offset = 0;
+
+  if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) {
+
+    if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) {
+      fatal ("not a mov [reg+offs], reg instruction");
+    }
+
+    offset += 12;
+  }
+
+  switch(Assembler::opcode(int_at(offset))) {
+    case Assembler::lb_op:
+    case Assembler::lbu_op:
+    case Assembler::lh_op:
+    case Assembler::lhu_op:
+    case Assembler::lw_op:
+    case Assembler::lwu_op:
+    case Assembler::ld_op:
+    case Assembler::lwc1_op:
+    case Assembler::ldc1_op:
+    case Assembler::sb_op:
+    case Assembler::sh_op:
+    case Assembler::sw_op:
+    case Assembler::sd_op:
+    case Assembler::swc1_op:
+    case Assembler::sdc1_op:
+      break;
+    default:
+      fatal ("not a mov [reg+offs], reg instruction");
+  }
+}
+
+
+void NativeMovRegMem::print() {
+  tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset());
+}
+
+bool NativeInstruction::is_sigill_zombie_not_entrant() {
+  return uint_at(0) == NativeIllegalInstruction::instruction_code;
+}
+
+void NativeIllegalInstruction::insert(address code_pos) {
+  *(juint*)code_pos = instruction_code;
+  ICache::invalidate_range(code_pos, instruction_size);
+}
+
+void NativeJump::verify() {
+  assert(((NativeInstruction *)this)->is_jump() ||
+         ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction");
+}
+
+void  NativeJump::patch_set48_gs(address dest) {
+  jlong value = (jlong) dest;
+  int  rt_reg = (int_at(0) & (0x1f << 16));
+
+  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
+
+  int  rs_reg = rt_reg << 5;
+  int  rd_reg = rt_reg >> 5;
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+
+  int count = 0;
+
+  int insts[4] = {0, 0, 0, 0};
+
+  if (value == lo) {  // 32-bit integer
+    if (Assembler::is_simm16(value)) {
+      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
+      count += 1;
+    } else {
+      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
+      count += 1;
+      if (Assembler::split_low(value)) {
+        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
+        count += 1;
+      }
+    }
+  } else if (hi == 0) {  // hardware zero-extends to upper 32
+      insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
+      count += 1;
+      insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
+      count += 1;
+      if (Assembler::split_low(value)) {
+        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
+        count += 1;
+      }
+  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
+    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
+    count += 1;
+    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
+    count += 1;
+    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
+    count += 1;
+    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
+    count += 1;
+  } else {
+    tty->print_cr("dest = 0x%lx", value);
+    guarantee(false, "Not supported yet !");
+  }
+
+  while (count < 4) {
+    insts[count] = 0;
+    count++;
+  }
+
+  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
+  atomic_store128_ptr func = get_atomic_store128_func();
+  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
+
+  ICache::invalidate_range(addr_at(0), 16);
+}
+
+void  NativeJump::patch_set48(address dest) {
+  jlong value = (jlong) dest;
+  int  rt_reg = (int_at(0) & (0x1f << 16));
+  int  rs_reg = rt_reg << 5;
+  int  rd_reg = rt_reg >> 5;
+
+  int hi = (int)(value >> 32);
+  int lo = (int)(value & ~0);
+
+  int count = 0;
+
+  if (value == lo) {  // 32-bit integer
+    if (Assembler::is_simm16(value)) {
+      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
+      count += 1;
+    } else {
+      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
+      count += 1;
+      if (Assembler::split_low(value)) {
+        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
+        count += 1;
+      }
+    }
+  } else if (hi == 0) {  // hardware zero-extends to upper 32
+      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
+      count += 1;
+      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
+      count += 1;
+      if (Assembler::split_low(value)) {
+        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
+        count += 1;
+      }
+  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
+    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
+    count += 1;
+    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
+    count += 1;
+    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
+    count += 1;
+    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
+    count += 1;
+  } else {
+    tty->print_cr("dest = 0x%lx", value);
+    guarantee(false, "Not supported yet !");
+  }
+
+  while (count < 4) {
+    set_int_at(count << 2, 0);
+    count++;
+  }
+
+  ICache::invalidate_range(addr_at(0), 16);
+}
+
+void  NativeJump::patch_on_j_only(address dst) {
+  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
+  if ((dest >= 0) && (dest < (1<<26))) {
+    jint j_inst = (Assembler::j_op << 26) | dest;
+    set_int_at(0, j_inst);
+    ICache::invalidate_range(addr_at(0), 4);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void  NativeJump::patch_on_j_gs(address dst) {
+  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
+  if ((dest >= 0) && (dest < (1<<26))) {
+    jint j_inst = (Assembler::j_op << 26) | dest;
+    set_int_at(16, j_inst);
+    ICache::invalidate_range(addr_at(16), 4);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+void  NativeJump::patch_on_j(address dst) {
+  patch_on_j_gs(dst);
+}
+
+void  NativeJump::patch_on_jr_gs(address dst) {
+  patch_set48_gs(dst);
+  ICache::invalidate_range(addr_at(0), 16);
+}
+
+void  NativeJump::patch_on_jr(address dst) {
+  patch_set48(dst);
+  ICache::invalidate_range(addr_at(0), 16);
+}
+
+
+void  NativeJump::set_jump_destination(address dest) {
+  OrderAccess::fence();
+
+  if (is_short()) {
+    assert(Assembler::is_simm16(dest-addr_at(4)), "change this code");
+    set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff );
+    ICache::invalidate_range(addr_at(0), 4);
+  } else if (is_b_far()) {
+    int offset = dest - addr_at(12);
+    set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16));
+    set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff));
+  } else {
+    if (is_op(int_at(16), Assembler::j_op)) {
+      if (UseLEXT1) {
+        patch_on_j_gs(dest);
+      } else {
+        patch_on_j(dest);
+      }
+    } else if (is_op(int_at(0), Assembler::j_op)) {
+      patch_on_j_only(dest);
+    } else if (is_special_op(int_at(16), Assembler::jr_op)) {
+      if (UseLEXT1) {
+        //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD");
+        //patch_on_jr_gs(dest);
+        patch_on_jr(dest);
+      } else {
+        patch_on_jr(dest);
+      }
+    } else {
+      fatal("not a jump");
+    }
+  }
+}
+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+  CodeBuffer cb(code_pos, instruction_size);
+  MacroAssembler masm(&cb);
+#define __ masm.
+  if (Assembler::is_simm16((entry - code_pos - 4) / 4)) {
+    __ b(entry);
+    __ delayed()->nop();
+  } else {
+    // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here.
+    int offset = entry - code_pos;
+
+    Label L;
+    __ bgezal(R0, L);
+    __ delayed()->lui(T9, (offset - 8) >> 16);
+    __ bind(L);
+    __ ori(T9, T9, (offset - 8) & 0xffff);
+    __ daddu(T9, T9, RA);
+    __ jr(T9);
+    __ delayed()->nop();
+  }
+
+#undef __
+
+  ICache::invalidate_range(code_pos, instruction_size);
+}
+
+bool NativeJump::is_b_far() {
+//
+//   0x000000556809f198: daddu at, ra, zero
+//   0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4
+//
+//   0x000000556809f1a0: nop
+//   0x000000556809f1a4: lui t9, 0xfffffffd
+//   0x000000556809f1a8: ori t9, t9, 0x14dc
+//   0x000000556809f1ac: daddu t9, t9, ra
+//   0x000000556809f1b0: daddu ra, at, zero
+//   0x000000556809f1b4: jr t9
+//   0x000000556809f1b8: nop
+//  ;; ImplicitNullCheckStub slow case
+//   0x000000556809f1bc: lui t9, 0x55
+//
+  return is_op(int_at(12), Assembler::lui_op);
+}
+
+address NativeJump::jump_destination() {
+  if ( is_short() ) {
+    return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4;
+  }
+  // Assembler::merge() is not correct in MIPS_64!
+  //
+  //   Example:
+  //     hi16 = 0xfffd,
+  //     lo16 = f7a4,
+  //
+  //     offset=0xfffdf7a4 (Right)
+  //     Assembler::merge = 0xfffcf7a4 (Wrong)
+  //
+  if ( is_b_far() ) {
+    int hi16 = int_at(12)&0xffff;
+    int low16 = int_at(16)&0xffff;
+    address target = addr_at(12) + (hi16 << 16) + low16;
+    return target;
+  }
+
+  // nop
+  // nop
+  // nop
+  // nop
+  // j target
+  // nop
+  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
+        nativeInstruction_at(addr_at(4))->is_nop()   &&
+        nativeInstruction_at(addr_at(8))->is_nop()   &&
+        nativeInstruction_at(addr_at(12))->is_nop()  &&
+        is_op(int_at(16), Assembler::j_op)         &&
+        nativeInstruction_at(addr_at(20))->is_nop()) {
+    int instr_index = int_at(16) & 0x3ffffff;
+    intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
+    intptr_t target = target_high | (instr_index << 2);
+    return (address)target;
+  }
+
+  // j target
+  // nop
+  if ( is_op(int_at(0), Assembler::j_op)         &&
+        nativeInstruction_at(addr_at(4))->is_nop()) {
+    int instr_index = int_at(0) & 0x3ffffff;
+    intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
+    intptr_t target = target_high | (instr_index << 2);
+    return (address)target;
+  }
+
+  // li64
+  if ( is_op(Assembler::lui_op) &&
+        is_op(int_at(4), Assembler::ori_op) &&
+        is_special_op(int_at(8), Assembler::dsll_op) &&
+        is_op(int_at(12), Assembler::ori_op) &&
+        is_special_op(int_at(16), Assembler::dsll_op) &&
+        is_op(int_at(20), Assembler::ori_op) ) {
+
+    return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
+                             (intptr_t)(int_at(12) & 0xffff),
+                             (intptr_t)(int_at(4) & 0xffff),
+                             (intptr_t)(int_at(0) & 0xffff));
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  if (  is_op(Assembler::lui_op) &&
+          is_op (int_at(4), Assembler::ori_op) &&
+          is_special_op(int_at(8), Assembler::dsll_op) &&
+          is_op (int_at(12), Assembler::ori_op) ) {
+
+    return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
+                 (intptr_t)(int_at(4) & 0xffff),
+           (intptr_t)(int_at(0) & 0xffff),
+           (intptr_t)0);
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+          is_special_op(int_at(4), Assembler::dsll_op) &&
+          is_op (int_at(8), Assembler::ori_op) &&
+          nativeInstruction_at(addr_at(12))->is_nop()) {
+
+    return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
+                             (intptr_t)(int_at(0) & 0xffff),
+                             (intptr_t)0,
+                             (intptr_t)0);
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //nop
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+          is_special_op(int_at(4), Assembler::dsll_op) &&
+          nativeInstruction_at(addr_at(8))->is_nop()   &&
+          nativeInstruction_at(addr_at(12))->is_nop()) {
+
+    return (address)Assembler::merge( (intptr_t)(0),
+                             (intptr_t)(int_at(0) & 0xffff),
+                             (intptr_t)0,
+                             (intptr_t)0);
+  }
+
+  //daddiu dst, R0, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::daddiu_op) &&
+          nativeInstruction_at(addr_at(4))->is_nop() &&
+          nativeInstruction_at(addr_at(8))->is_nop() &&
+          nativeInstruction_at(addr_at(12))->is_nop() ) {
+
+    int sign = int_at(0) & 0x8000;
+    if (sign == 0) {
+      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
+                                        (intptr_t)0,
+                                        (intptr_t)0,
+                                        (intptr_t)0);
+    } else {
+      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
+                                        (intptr_t)(0xffff),
+                                        (intptr_t)(0xffff),
+                                        (intptr_t)(0xffff));
+    }
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+          is_op (int_at(4), Assembler::ori_op) &&
+          nativeInstruction_at(addr_at(8))->is_nop() &&
+          nativeInstruction_at(addr_at(12))->is_nop() ) {
+
+    int sign = int_at(0) & 0x8000;
+    if (sign == 0) {
+      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
+                                        (intptr_t)(int_at(0) & 0xffff),
+                                        (intptr_t)0,
+                                        (intptr_t)0);
+    } else {
+      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
+                                        (intptr_t)(int_at(0) & 0xffff),
+                                        (intptr_t)(0xffff),
+                                        (intptr_t)(0xffff));
+    }
+  }
+
+  //lui dst, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+          nativeInstruction_at(addr_at(4))->is_nop() &&
+          nativeInstruction_at(addr_at(8))->is_nop() &&
+          nativeInstruction_at(addr_at(12))->is_nop() ) {
+
+    int sign = int_at(0) & 0x8000;
+    if (sign == 0) {
+      return (address)Assembler::merge( (intptr_t)0,
+                                        (intptr_t)(int_at(0) & 0xffff),
+                                        (intptr_t)0,
+                                        (intptr_t)0);
+    } else {
+      return (address)Assembler::merge( (intptr_t)0,
+                                        (intptr_t)(int_at(0) & 0xffff),
+                                        (intptr_t)(0xffff),
+                                        (intptr_t)(0xffff));
+    }
+  }
+
+  fatal("not a jump");
+  return NULL; // unreachable
+}
+
+// MT-safe patching of a long jump instruction.
+// First patches first word of instruction to two jmp's that jmps to them
+// selfs (spinlock). Then patches the last byte, and then atomicly replaces
+// the jmp's with the first 4 byte of the new instruction.
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+  NativeGeneralJump* h_jump =  nativeGeneralJump_at (instr_addr);
+  assert((int)instruction_size == (int)NativeCall::instruction_size,
+          "note::Runtime1::patch_code uses NativeCall::instruction_size");
+
+  // ensure 100% atomicity
+  guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD");
+
+  int *p = (int *)instr_addr;
+  int jr_word = p[4];
+
+  p[4] = 0x1000fffb;   /* .1: --; --; --; --; b .1; nop */
+  memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8);
+  *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16);
+}
+
+// Must ensure atomicity
+void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
+  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
+  assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump");
+
+  if (MacroAssembler::reachable_from_cache(dest)) {
+    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
+    MacroAssembler masm(&cb);
+    masm.j(dest);
+  } else {
+    // We use an illegal instruction for marking a method as
+    // not_entrant or zombie
+    NativeIllegalInstruction::insert(verified_entry);
+  }
+
+  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
+}
+
+bool NativeInstruction::is_jump()
+{
+  if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode)
+    return true;
+  if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far
+    return true;
+  if (is_op(int_at(12), Assembler::lui_op)) // original b_far
+    return true;
+
+  // nop
+  // nop
+  // nop
+  // nop
+  // j target
+  // nop
+  if ( is_nop() &&
+         nativeInstruction_at(addr_at(4))->is_nop()  &&
+         nativeInstruction_at(addr_at(8))->is_nop()  &&
+         nativeInstruction_at(addr_at(12))->is_nop() &&
+         nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) &&
+         nativeInstruction_at(addr_at(20))->is_nop() ) {
+    return true;
+  }
+
+  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) &&
+         nativeInstruction_at(addr_at(4))->is_nop() ) {
+    return true;
+  }
+
+  // lui   rd, imm(63...48);
+  // ori   rd, rd, imm(47...32);
+  // dsll  rd, rd, 16;
+  // ori   rd, rd, imm(31...16);
+  // dsll  rd, rd, 16;
+  // ori   rd, rd, imm(15...0);
+  // jr    rd
+  // nop
+  if (is_op(int_at(0), Assembler::lui_op) &&
+          is_op(int_at(4), Assembler::ori_op) &&
+          is_special_op(int_at(8), Assembler::dsll_op) &&
+          is_op(int_at(12), Assembler::ori_op) &&
+          is_special_op(int_at(16), Assembler::dsll_op) &&
+          is_op(int_at(20), Assembler::ori_op) &&
+          is_special_op(int_at(24), Assembler::jr_op)) {
+    return true;
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  if (is_op(int_at(0), Assembler::lui_op) &&
+          is_op(int_at(4), Assembler::ori_op) &&
+          is_special_op(int_at(8), Assembler::dsll_op) &&
+          is_op(int_at(12), Assembler::ori_op) &&
+          is_special_op(int_at(16), Assembler::jr_op)) {
+    return true;
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+        is_special_op(int_at(4), Assembler::dsll_op) &&
+        is_op  (int_at(8), Assembler::ori_op) &&
+        nativeInstruction_at(addr_at(12))->is_nop() &&
+        is_special_op(int_at(16), Assembler::jr_op)) {
+    return true;
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //nop
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+        is_special_op(int_at(4), Assembler::dsll_op) &&
+        nativeInstruction_at(addr_at(8))->is_nop()   &&
+        nativeInstruction_at(addr_at(12))->is_nop() &&
+        is_special_op(int_at(16), Assembler::jr_op)) {
+      return true;
+  }
+
+  //daddiu dst, R0, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::daddiu_op) &&
+        nativeInstruction_at(addr_at(4))->is_nop() &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() &&
+        is_special_op(int_at(16), Assembler::jr_op)) {
+    return true;
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+        is_op  (int_at(4), Assembler::ori_op) &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() &&
+        is_special_op(int_at(16), Assembler::jr_op)) {
+    return true;
+  }
+
+  //lui dst, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+        nativeInstruction_at(addr_at(4))->is_nop() &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() &&
+        is_special_op(int_at(16), Assembler::jr_op)) {
+    return true;
+  }
+
+  return false;
+}
+
+bool NativeInstruction::is_dtrace_trap() {
+  //return (*(int32_t*)this & 0xff) == 0xcc;
+  Unimplemented();
+  return false;
+}
+
+bool NativeInstruction::is_safepoint_poll() {
+  //
+  // 390     li   T2, 0x0000000000400000 #@loadConP
+  // 394     sw    [SP + #12], V1    # spill 9
+  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
+  //
+  //  0x000000ffe5815130: lui t2, 0x40
+  //  0x000000ffe5815134: sw v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
+  //                                        ;*goto
+  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
+  //
+  //  0x000000ffe5815138: lw at, 0x0(t2)    ;*goto       <---  PC
+  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
+  //
+
+  // Since there may be some spill instructions between the safePoint_poll and loadConP,
+  // we check the safepoint instruction like the this.
+  return is_op(Assembler::lw_op) && is_rt(AT);
+}
diff --git a/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp b/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp
new file mode 100644
index 00000000000..13a4cb4ef1c
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/nativeInst_mips.hpp
@@ -0,0 +1,735 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP
+#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP
+
+#include "asm/assembler.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/os.hpp"
+#include "utilities/top.hpp"
+
+// We have interfaces for the following instructions:
+// - NativeInstruction
+// - - NativeCall
+// - - NativeMovConstReg
+// - - NativeMovConstRegPatching
+// - - NativeMovRegMem
+// - - NativeMovRegMemPatching
+// - - NativeJump
+// - - NativeIllegalOpCode
+// - - NativeGeneralJump
+// - - NativeReturn
+// - - NativeReturnX (return with argument)
+// - - NativePushConst
+// - - NativeTstRegMem
+
+// The base class for different kinds of native instruction abstractions.
+// Provides the primitive operations to manipulate code relative to this.
+
+class NativeInstruction VALUE_OBJ_CLASS_SPEC {
+  friend class Relocation;
+
+ public:
+  enum mips_specific_constants {
+    nop_instruction_code        =    0,
+    nop_instruction_size        =    4,
+    sync_instruction_code       =    0xf
+  };
+
+  bool is_nop()                        { return long_at(0) == nop_instruction_code; }
+  bool is_sync()                       { return long_at(0) == sync_instruction_code; }
+  bool is_dtrace_trap();
+  inline bool is_call();
+  inline bool is_illegal();
+  inline bool is_return();
+  bool is_jump();
+  inline bool is_cond_jump();
+  bool is_safepoint_poll();
+
+  //mips has no instruction to generate a illegal instrucion exception
+  //we define ours: break 11
+  static int illegal_instruction();
+
+  bool is_int_branch();
+  bool is_float_branch();
+
+  inline bool is_trampoline_call();
+
+  //We use an illegal instruction for marking a method as not_entrant or zombie.
+  bool is_sigill_zombie_not_entrant();
+
+ protected:
+  address addr_at(int offset) const    { return address(this) + offset; }
+  address instruction_address() const       { return addr_at(0); }
+  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
+  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
+
+  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
+  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
+
+  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
+  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
+
+  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
+
+  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
+  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
+
+
+  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
+  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
+  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
+  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
+  void set_long_at(int offset, long  i);
+
+  int  insn_word() const { return long_at(0); }
+  static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; }
+  bool is_op (Assembler::ops op)     const { return is_op(insn_word(), op); }
+  bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); }
+  bool is_rs (Register rs)           const { return is_rs(insn_word(), rs); }
+  bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); }
+  bool is_rt (Register rt)        const { return is_rt(insn_word(), rt); }
+
+  static bool is_special_op (int insn, Assembler::special_ops op) {
+    return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op;
+  }
+  bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); }
+
+  void wrote(int offset);
+
+ public:
+
+  // unit test stuff
+  static void test() {}                 // override for testing
+
+  inline friend NativeInstruction* nativeInstruction_at(address address);
+};
+
+inline NativeInstruction* nativeInstruction_at(address address) {
+  NativeInstruction* inst = (NativeInstruction*)address;
+#ifdef ASSERT
+  //inst->verify();
+#endif
+  return inst;
+}
+
+inline NativeCall* nativeCall_at(address address);
+// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64
+// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
+// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this:
+// 32 bits:
+//       lui     rt, imm16
+//       addiu    rt, rt, imm16
+//       jalr     rt
+//       nop
+//
+// 64 bits:
+//       lui   rd, imm(63...48);
+//       ori   rd, rd, imm(47...32);
+//       dsll  rd, rd, 16;
+//       ori   rd, rd, imm(31...16);
+//       dsll  rd, rd, 16;
+//       ori   rd, rd, imm(15...0);
+//       jalr  rd
+//       nop
+//
+
+// we just consider the above for instruction as one call instruction
+class NativeCall: public NativeInstruction {
+ public:
+  enum mips_specific_constants {
+    instruction_offset          =    0,
+    instruction_size            =   6 * BytesPerInstWord,
+    return_address_offset_short =   4 * BytesPerInstWord,
+    return_address_offset_long  =   6 * BytesPerInstWord,
+    displacement_offset         =   0
+  };
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+
+  address next_instruction_address() const  {
+    if (is_special_op(int_at(8), Assembler::jalr_op)) {
+      return addr_at(return_address_offset_short);
+    } else {
+      return addr_at(return_address_offset_long);
+    }
+  }
+
+  address return_address() const            {
+    return next_instruction_address();
+  }
+
+  address target_addr_for_insn() const;
+  address destination() const;
+  void  set_destination(address dest);
+
+  void  patch_set48_gs(address dest);
+  void  patch_set48(address dest);
+
+  void  patch_on_jalr_gs(address dest);
+  void  patch_on_jalr(address dest);
+
+  void  patch_on_jal_gs(address dest);
+  void  patch_on_jal(address dest);
+
+  void  patch_on_trampoline(address dest);
+
+  void  patch_on_jal_only(address dest);
+
+  void  patch_set32_gs(address dest);
+  void  patch_set32(address dest);
+
+  void  verify_alignment() {  }
+  void  verify();
+  void  print();
+
+  // Creation
+  inline friend NativeCall* nativeCall_at(address address);
+  inline friend NativeCall* nativeCall_before(address return_address);
+
+  static bool is_call_at(address instr) {
+    return nativeInstruction_at(instr)->is_call();
+  }
+
+  static bool is_call_before(address return_address) {
+    return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long);
+  }
+
+  static bool is_call_to(address instr, address target) {
+    return nativeInstruction_at(instr)->is_call() &&
+nativeCall_at(instr)->destination() == target;
+  }
+
+  // MT-safe patching of a call instruction.
+  static void insert(address code_pos, address entry);
+
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+
+  // Similar to replace_mt_safe, but just changes the destination.  The
+  // important thing is that free-running threads are able to execute
+  // this call instruction at all times.  If the call is an immediate jal
+  // instruction we can simply rely on atomicity of 32-bit writes to
+  // make sure other threads will see no intermediate states.
+
+  // We cannot rely on locks here, since the free-running threads must run at
+  // full speed.
+  //
+  // Used in the runtime linkage of calls; see class CompiledIC.
+
+  // The parameter assert_lock disables the assertion during code generation.
+  void set_destination_mt_safe(address dest, bool assert_lock = true);
+
+  address get_trampoline();
+};
+
+inline NativeCall* nativeCall_at(address address) {
+  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
+#ifdef ASSERT
+  call->verify();
+#endif
+  return call;
+}
+
+inline NativeCall* nativeCall_before(address return_address) {
+  NativeCall* call = NULL;
+  if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) {
+    call = (NativeCall*)(return_address - NativeCall::return_address_offset_long);
+  } else {
+    call = (NativeCall*)(return_address - NativeCall::return_address_offset_short);
+  }
+#ifdef ASSERT
+  call->verify();
+#endif
+  return call;
+}
+
+class NativeMovConstReg: public NativeInstruction {
+ public:
+  enum mips_specific_constants {
+    instruction_offset    =    0,
+    instruction_size            =    4 * BytesPerInstWord,
+    next_instruction_offset   =    4 * BytesPerInstWord,
+  };
+
+  int     insn_word() const                 { return long_at(instruction_offset); }
+  address instruction_address() const       { return addr_at(0); }
+  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
+  intptr_t data() const;
+  void    set_data(intptr_t x, intptr_t o = 0);
+
+  void    patch_set48(intptr_t x);
+
+  void  verify();
+  void  print();
+
+  // unit test stuff
+  static void test() {}
+
+  // Creation
+  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
+  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
+};
+
+inline NativeMovConstReg* nativeMovConstReg_at(address address) {
+  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+inline NativeMovConstReg* nativeMovConstReg_before(address address) {
+  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+class NativeMovConstRegPatching: public NativeMovConstReg {
+ private:
+    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
+    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
+    #ifdef ASSERT
+      test->verify();
+    #endif
+    return test;
+  }
+};
+
+// An interface for accessing/manipulating native moves of the form:
+//       lui   AT, split_high(offset)
+//       addiu AT, split_low(offset)
+//       addu   reg, reg, AT
+//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0
+//       [lw/sw/lwc1/swc1                    dest, reg, 4]
+//     or
+//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset
+//       [lw/sw/lwc1/swc1                    dest, reg, offset+4]
+//
+// Warning: These routines must be able to handle any instruction sequences
+// that are generated as a result of the load/store byte,word,long
+// macros.
+
+class NativeMovRegMem: public NativeInstruction {
+ public:
+  enum mips_specific_constants {
+    instruction_offset  = 0,
+    hiword_offset   = 4,
+    ldst_offset     = 12,
+    immediate_size  = 4,
+    ldst_size       = 16
+  };
+
+  //offset is less than 16 bits.
+  bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); }
+  bool is_64ldst() const {
+    if (is_immediate()) {
+      return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) &&
+       (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize);
+    } else {
+      return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) &&
+       (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize);
+    }
+  }
+
+  address instruction_address() const       { return addr_at(instruction_offset); }
+  address next_instruction_address() const  {
+    return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0));
+  }
+
+  int   offset() const;
+
+  void  set_offset(int x);
+
+  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
+
+  void verify();
+  void print ();
+
+  // unit test stuff
+  static void test() {}
+
+ private:
+  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
+};
+
+inline NativeMovRegMem* nativeMovRegMem_at (address address) {
+  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
+#ifdef ASSERT
+  test->verify();
+#endif
+  return test;
+}
+
+class NativeMovRegMemPatching: public NativeMovRegMem {
+ private:
+  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
+    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
+    #ifdef ASSERT
+      test->verify();
+    #endif
+    return test;
+  }
+};
+
+
+// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional
+// 32 bits:
+//    far jump:
+//        lui   reg, split_high(addr)
+//        addiu reg, split_low(addr)
+//        jr    reg
+//        nop
+//    or
+//        beq   ZERO, ZERO, offset
+//        nop
+//
+
+//64 bits:
+//    far jump:
+//          lui   rd, imm(63...48);
+//          ori   rd, rd, imm(47...32);
+//          dsll  rd, rd, 16;
+//          ori   rd, rd, imm(31...16);
+//          dsll  rd, rd, 16;
+//          ori   rd, rd, imm(15...0);
+//          jalr  rd
+//          nop
+//
+class NativeJump: public NativeInstruction {
+ public:
+  enum mips_specific_constants {
+    instruction_offset   =    0,
+    beq_opcode           =    0x10000000,//000100|00000|00000|offset
+    b_mask               =    0xffff0000,
+    short_size           =    8,
+    instruction_size     =    6 * BytesPerInstWord
+  };
+
+  bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; }
+  bool is_b_far();
+  address instruction_address() const { return addr_at(instruction_offset); }
+  address jump_destination();
+
+  void  patch_set48_gs(address dest);
+  void  patch_set48(address dest);
+
+  void  patch_on_jr_gs(address dest);
+  void  patch_on_jr(address dest);
+
+  void  patch_on_j_gs(address dest);
+  void  patch_on_j(address dest);
+
+  void  patch_on_j_only(address dest);
+
+  void  set_jump_destination(address dest);
+
+  // Creation
+  inline friend NativeJump* nativeJump_at(address address);
+
+  // Insertion of native jump instruction
+  static void insert(address code_pos, address entry) { Unimplemented(); }
+  // MT-safe insertion of native jump at verified method entry
+  static void check_verified_entry_alignment(address entry, address verified_entry) {}
+  static void patch_verified_entry(address entry, address verified_entry, address dest);
+
+  void verify();
+};
+
+inline NativeJump* nativeJump_at(address address) {
+  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
+  debug_only(jump->verify();)
+  return jump;
+}
+
+class NativeGeneralJump: public NativeJump {
+ public:
+  // Creation
+  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
+
+  // Insertion of native general jump instruction
+  static void insert_unconditional(address code_pos, address entry);
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+};
+
+inline NativeGeneralJump* nativeGeneralJump_at(address address) {
+  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
+  debug_only(jump->verify();)
+  return jump;
+}
+
+class NativeIllegalInstruction: public NativeInstruction {
+public:
+  enum mips_specific_constants {
+    instruction_code          =    0x42000029,    // mips reserved instruction
+    instruction_size          =    4,
+    instruction_offset        =    0,
+    next_instruction_offset   =    4
+  };
+
+  // Insert illegal opcode as specific address
+  static void insert(address code_pos);
+};
+
+// return instruction that does not pop values of the stack
+// jr RA
+// delay slot
+class NativeReturn: public NativeInstruction {
+ public:
+  enum mips_specific_constants {
+    instruction_size          =    8,
+    instruction_offset        =    0,
+    next_instruction_offset   =    8
+  };
+};
+
+
+
+
+class NativeCondJump;
+inline NativeCondJump* nativeCondJump_at(address address);
+class NativeCondJump: public NativeInstruction {
+ public:
+  enum mips_specific_constants {
+    instruction_size         = 16,
+    instruction_offset        = 12,
+    next_instruction_offset   = 20
+  };
+
+
+  int insn_word() const  { return long_at(instruction_offset); }
+  address instruction_address() const { return addr_at(0); }
+  address next_instruction_address() const { return addr_at(next_instruction_offset); }
+
+  // Creation
+  inline friend NativeCondJump* nativeCondJump_at(address address);
+
+  address jump_destination()  const {
+    return ::nativeCondJump_at(addr_at(12))->jump_destination();
+  }
+
+  void set_jump_destination(address dest) {
+    ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest);
+  }
+
+};
+
+inline NativeCondJump* nativeCondJump_at(address address) {
+  NativeCondJump* jump = (NativeCondJump*)(address);
+  return jump;
+}
+
+
+
+inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
+
+inline bool NativeInstruction::is_call()    {
+  // jal target
+  // nop
+  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) &&
+         nativeInstruction_at(addr_at(4))->is_nop() ) {
+      return true;
+  }
+
+  // nop
+  // nop
+  // nop
+  // nop
+  // jal target
+  // nop
+  if ( is_nop() &&
+         nativeInstruction_at(addr_at(4))->is_nop()  &&
+         nativeInstruction_at(addr_at(8))->is_nop()  &&
+         nativeInstruction_at(addr_at(12))->is_nop() &&
+         nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) &&
+         nativeInstruction_at(addr_at(20))->is_nop() ) {
+    return true;
+  }
+
+  // li64
+  if ( is_op(Assembler::lui_op) &&
+       is_op(int_at(4), Assembler::ori_op) &&
+       is_special_op(int_at(8), Assembler::dsll_op) &&
+       is_op(int_at(12), Assembler::ori_op) &&
+       is_special_op(int_at(16), Assembler::dsll_op) &&
+       is_op(int_at(20), Assembler::ori_op) &&
+       is_special_op(int_at(24), Assembler::jalr_op) ) {
+    return true;
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  if (  is_op(Assembler::lui_op) &&
+        is_op  (int_at(4), Assembler::ori_op) &&
+        is_special_op(int_at(8), Assembler::dsll_op) &&
+        is_op  (int_at(12), Assembler::ori_op) &&
+        is_special_op(int_at(16), Assembler::jalr_op) ) {
+    return true;
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //ori dst, dst, imm16
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+        is_special_op(int_at(4), Assembler::dsll_op) &&
+        is_op  (int_at(8), Assembler::ori_op) &&
+        nativeInstruction_at(addr_at(12))->is_nop() &&
+        is_special_op(int_at(16), Assembler::jalr_op) ) {
+    return true;
+  }
+
+  //ori dst, R0, imm16
+  //dsll dst, dst, 16
+  //nop
+  //nop
+  if (  is_op(Assembler::ori_op) &&
+        is_special_op(int_at(4), Assembler::dsll_op) &&
+        nativeInstruction_at(addr_at(8))->is_nop()   &&
+        nativeInstruction_at(addr_at(12))->is_nop() &&
+        is_special_op(int_at(16), Assembler::jalr_op) ) {
+    return true;
+  }
+
+  //daddiu dst, R0, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::daddiu_op) &&
+        nativeInstruction_at(addr_at(4))->is_nop() &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() &&
+        is_special_op(int_at(16), Assembler::jalr_op) ) {
+    return true;
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+        is_op  (int_at(4), Assembler::ori_op) &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() &&
+        is_special_op(int_at(16), Assembler::jalr_op) ) {
+    return true;
+  }
+
+  //lui dst, imm16
+  //nop
+  //nop
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+        nativeInstruction_at(addr_at(4))->is_nop() &&
+        nativeInstruction_at(addr_at(8))->is_nop() &&
+        nativeInstruction_at(addr_at(12))->is_nop() &&
+        is_special_op(int_at(16), Assembler::jalr_op) ) {
+    return true;
+  }
+
+
+  //daddiu dst, R0, imm16
+  //nop
+  if (  is_op(Assembler::daddiu_op) &&
+        nativeInstruction_at(addr_at(4))->is_nop() &&
+        is_special_op(int_at(8), Assembler::jalr_op) ) {
+    return true;
+  }
+
+  //lui dst, imm16
+  //ori dst, dst, imm16
+  if (  is_op(Assembler::lui_op) &&
+        is_op  (int_at(4), Assembler::ori_op) &&
+        is_special_op(int_at(8), Assembler::jalr_op) ) {
+    return true;
+  }
+
+  //lui dst, imm16
+  //nop
+  if (  is_op(Assembler::lui_op) &&
+        nativeInstruction_at(addr_at(4))->is_nop() &&
+        is_special_op(int_at(8), Assembler::jalr_op) ) {
+    return true;
+  }
+
+  if(is_trampoline_call())
+    return true;
+
+  return false;
+
+}
+
+inline bool NativeInstruction::is_return()  { return is_special_op(Assembler::jr_op) && is_rs(RA);}
+
+inline bool NativeInstruction::is_cond_jump()    { return is_int_branch() || is_float_branch(); }
+
+// Call trampoline stubs.
+class NativeCallTrampolineStub : public NativeInstruction {
+ public:
+
+  enum mips_specific_constants {
+    instruction_size            =    2 * BytesPerInstWord,
+    instruction_offset          =    0,
+    next_instruction_offset     =    2 * BytesPerInstWord
+  };
+
+  address destination() const {
+    return (address)ptr_at(0);
+  }
+
+  void set_destination(address new_destination) {
+    set_ptr_at(0, (intptr_t)new_destination);
+  }
+};
+
+inline bool NativeInstruction::is_trampoline_call() {
+  // lui dst, imm16
+  // ori dst, dst, imm16
+  // dsll dst, dst, 16
+  // ld target, dst, imm16
+  // jalr target
+  // nop
+  if (  is_op(Assembler::lui_op) &&
+        is_op(int_at(4), Assembler::ori_op) &&
+        is_special_op(int_at(8), Assembler::dsll_op) &&
+        is_op(int_at(12), Assembler::ld_op) &&
+        is_special_op(int_at(16), Assembler::jalr_op) &&
+        nativeInstruction_at(addr_at(20))->is_nop() ) {
+    return true;
+  }
+
+  return false;
+}
+
+inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
+  return (NativeCallTrampolineStub*)addr;
+}
+
+#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/registerMap_mips.hpp b/hotspot/src/cpu/mips/vm/registerMap_mips.hpp
new file mode 100644
index 00000000000..7f800eb1070
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/registerMap_mips.hpp
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
+#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
+
+// machine-dependent implemention for register maps
+  friend class frame;
+
+ private:
+#ifndef CORE
+  // This is the hook for finding a register in an "well-known" location,
+  // such as a register block of a predetermined format.
+  // Since there is none, we just return NULL.
+  // See registerMap_sparc.hpp for an example of grabbing registers
+  // from register save areas of a standard layout.
+   address pd_location(VMReg reg) const {return NULL;}
+#endif
+
+  // no PD state to clear or copy:
+  void pd_clear() {}
+  void pd_initialize() {}
+  void pd_initialize_from(const RegisterMap* map) {}
+
+#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp b/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp
new file mode 100644
index 00000000000..4af25318346
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/register_definitions_mips.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/register.hpp"
+#include "register_mips.hpp"
+#ifdef TARGET_ARCH_MODEL_mips_32
+# include "interp_masm_mips_32.hpp"
+#endif
+#ifdef TARGET_ARCH_MODEL_mips_64
+# include "interp_masm_mips_64.hpp"
+#endif
+
+REGISTER_DEFINITION(Register, noreg);
+REGISTER_DEFINITION(Register, i0);
+REGISTER_DEFINITION(Register, i1);
+REGISTER_DEFINITION(Register, i2);
+REGISTER_DEFINITION(Register, i3);
+REGISTER_DEFINITION(Register, i4);
+REGISTER_DEFINITION(Register, i5);
+REGISTER_DEFINITION(Register, i6);
+REGISTER_DEFINITION(Register, i7);
+REGISTER_DEFINITION(Register, i8);
+REGISTER_DEFINITION(Register, i9);
+REGISTER_DEFINITION(Register, i10);
+REGISTER_DEFINITION(Register, i11);
+REGISTER_DEFINITION(Register, i12);
+REGISTER_DEFINITION(Register, i13);
+REGISTER_DEFINITION(Register, i14);
+REGISTER_DEFINITION(Register, i15);
+REGISTER_DEFINITION(Register, i16);
+REGISTER_DEFINITION(Register, i17);
+REGISTER_DEFINITION(Register, i18);
+REGISTER_DEFINITION(Register, i19);
+REGISTER_DEFINITION(Register, i20);
+REGISTER_DEFINITION(Register, i21);
+REGISTER_DEFINITION(Register, i22);
+REGISTER_DEFINITION(Register, i23);
+REGISTER_DEFINITION(Register, i24);
+REGISTER_DEFINITION(Register, i25);
+REGISTER_DEFINITION(Register, i26);
+REGISTER_DEFINITION(Register, i27);
+REGISTER_DEFINITION(Register, i28);
+REGISTER_DEFINITION(Register, i29);
+REGISTER_DEFINITION(Register, i30);
+REGISTER_DEFINITION(Register, i31);
+
+REGISTER_DEFINITION(FloatRegister, fnoreg);
+REGISTER_DEFINITION(FloatRegister, f0);
+REGISTER_DEFINITION(FloatRegister, f1);
+REGISTER_DEFINITION(FloatRegister, f2);
+REGISTER_DEFINITION(FloatRegister, f3);
+REGISTER_DEFINITION(FloatRegister, f4);
+REGISTER_DEFINITION(FloatRegister, f5);
+REGISTER_DEFINITION(FloatRegister, f6);
+REGISTER_DEFINITION(FloatRegister, f7);
+REGISTER_DEFINITION(FloatRegister, f8);
+REGISTER_DEFINITION(FloatRegister, f9);
+REGISTER_DEFINITION(FloatRegister, f10);
+REGISTER_DEFINITION(FloatRegister, f11);
+REGISTER_DEFINITION(FloatRegister, f12);
+REGISTER_DEFINITION(FloatRegister, f13);
+REGISTER_DEFINITION(FloatRegister, f14);
+REGISTER_DEFINITION(FloatRegister, f15);
+REGISTER_DEFINITION(FloatRegister, f16);
+REGISTER_DEFINITION(FloatRegister, f17);
+REGISTER_DEFINITION(FloatRegister, f18);
+REGISTER_DEFINITION(FloatRegister, f19);
+REGISTER_DEFINITION(FloatRegister, f20);
+REGISTER_DEFINITION(FloatRegister, f21);
+REGISTER_DEFINITION(FloatRegister, f22);
+REGISTER_DEFINITION(FloatRegister, f23);
+REGISTER_DEFINITION(FloatRegister, f24);
+REGISTER_DEFINITION(FloatRegister, f25);
+REGISTER_DEFINITION(FloatRegister, f26);
+REGISTER_DEFINITION(FloatRegister, f27);
+REGISTER_DEFINITION(FloatRegister, f28);
+REGISTER_DEFINITION(FloatRegister, f29);
+REGISTER_DEFINITION(FloatRegister, f30);
+REGISTER_DEFINITION(FloatRegister, f31);
diff --git a/hotspot/src/cpu/mips/vm/register_mips.cpp b/hotspot/src/cpu/mips/vm/register_mips.cpp
new file mode 100644
index 00000000000..4a9b22bfef2
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/register_mips.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "register_mips.hpp"
+
+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
+const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
+                                                                 2 * FloatRegisterImpl::number_of_registers;
+
+const char* RegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3",
+    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
+    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+    "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
+  };
+  return is_valid() ? names[encoding()] : "noreg";
+}
+
+const char* FloatRegisterImpl::name() const {
+  const char* names[number_of_registers] = {
+    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
+    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
+    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
+    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
+  };
+  return is_valid() ? names[encoding()] : "fnoreg";
+}
+
diff --git a/hotspot/src/cpu/mips/vm/register_mips.hpp b/hotspot/src/cpu/mips/vm/register_mips.hpp
new file mode 100644
index 00000000000..88bf2d68cc9
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/register_mips.hpp
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP
+#define CPU_MIPS_VM_REGISTER_MIPS_HPP
+
+#include "asm/register.hpp"
+#include "vm_version_mips.hpp"
+
+class VMRegImpl;
+typedef VMRegImpl* VMReg;
+
+// Use Register as shortcut
+class RegisterImpl;
+typedef RegisterImpl* Register;
+
+
+// The implementation of integer registers for the mips architecture
+inline Register as_Register(int encoding) {
+  return (Register)(intptr_t) encoding;
+}
+
+class RegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    number_of_registers     = 32
+  };
+
+  // derived registers, offsets, and addresses
+  Register successor() const                          { return as_Register(encoding() + 1); }
+
+  // construction
+  inline friend Register as_Register(int encoding);
+
+  VMReg as_VMReg();
+
+  // accessors
+  int   encoding() const                         { assert(is_valid(),err_msg( "invalid register (%d)", (int)(intptr_t)this)); return (intptr_t)this; }
+  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  const char* name() const;
+};
+
+
+// The integer registers of the MIPS32 architecture
+CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+
+
+CONSTANT_REGISTER_DECLARATION(Register, i0,    (0));
+CONSTANT_REGISTER_DECLARATION(Register, i1,    (1));
+CONSTANT_REGISTER_DECLARATION(Register, i2,    (2));
+CONSTANT_REGISTER_DECLARATION(Register, i3,    (3));
+CONSTANT_REGISTER_DECLARATION(Register, i4,    (4));
+CONSTANT_REGISTER_DECLARATION(Register, i5,    (5));
+CONSTANT_REGISTER_DECLARATION(Register, i6,    (6));
+CONSTANT_REGISTER_DECLARATION(Register, i7,    (7));
+CONSTANT_REGISTER_DECLARATION(Register, i8,    (8));
+CONSTANT_REGISTER_DECLARATION(Register, i9,    (9));
+CONSTANT_REGISTER_DECLARATION(Register, i10,   (10));
+CONSTANT_REGISTER_DECLARATION(Register, i11,   (11));
+CONSTANT_REGISTER_DECLARATION(Register, i12,   (12));
+CONSTANT_REGISTER_DECLARATION(Register, i13,   (13));
+CONSTANT_REGISTER_DECLARATION(Register, i14,   (14));
+CONSTANT_REGISTER_DECLARATION(Register, i15,   (15));
+CONSTANT_REGISTER_DECLARATION(Register, i16,   (16));
+CONSTANT_REGISTER_DECLARATION(Register, i17,   (17));
+CONSTANT_REGISTER_DECLARATION(Register, i18,   (18));
+CONSTANT_REGISTER_DECLARATION(Register, i19,   (19));
+CONSTANT_REGISTER_DECLARATION(Register, i20,   (20));
+CONSTANT_REGISTER_DECLARATION(Register, i21,   (21));
+CONSTANT_REGISTER_DECLARATION(Register, i22,   (22));
+CONSTANT_REGISTER_DECLARATION(Register, i23,   (23));
+CONSTANT_REGISTER_DECLARATION(Register, i24,   (24));
+CONSTANT_REGISTER_DECLARATION(Register, i25,   (25));
+CONSTANT_REGISTER_DECLARATION(Register, i26,   (26));
+CONSTANT_REGISTER_DECLARATION(Register, i27,   (27));
+CONSTANT_REGISTER_DECLARATION(Register, i28,   (28));
+CONSTANT_REGISTER_DECLARATION(Register, i29,   (29));
+CONSTANT_REGISTER_DECLARATION(Register, i30,   (30));
+CONSTANT_REGISTER_DECLARATION(Register, i31,   (31));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define NOREG ((Register)(noreg_RegisterEnumValue))
+
+#define I0 ((Register)(i0_RegisterEnumValue))
+#define I1 ((Register)(i1_RegisterEnumValue))
+#define I2 ((Register)(i2_RegisterEnumValue))
+#define I3 ((Register)(i3_RegisterEnumValue))
+#define I4 ((Register)(i4_RegisterEnumValue))
+#define I5 ((Register)(i5_RegisterEnumValue))
+#define I6 ((Register)(i6_RegisterEnumValue))
+#define I7 ((Register)(i7_RegisterEnumValue))
+#define I8 ((Register)(i8_RegisterEnumValue))
+#define I9 ((Register)(i9_RegisterEnumValue))
+#define I10 ((Register)(i10_RegisterEnumValue))
+#define I11 ((Register)(i11_RegisterEnumValue))
+#define I12 ((Register)(i12_RegisterEnumValue))
+#define I13 ((Register)(i13_RegisterEnumValue))
+#define I14 ((Register)(i14_RegisterEnumValue))
+#define I15 ((Register)(i15_RegisterEnumValue))
+#define I16 ((Register)(i16_RegisterEnumValue))
+#define I17 ((Register)(i17_RegisterEnumValue))
+#define I18 ((Register)(i18_RegisterEnumValue))
+#define I19 ((Register)(i19_RegisterEnumValue))
+#define I20 ((Register)(i20_RegisterEnumValue))
+#define I21 ((Register)(i21_RegisterEnumValue))
+#define I22 ((Register)(i22_RegisterEnumValue))
+#define I23 ((Register)(i23_RegisterEnumValue))
+#define I24 ((Register)(i24_RegisterEnumValue))
+#define I25 ((Register)(i25_RegisterEnumValue))
+#define I26 ((Register)(i26_RegisterEnumValue))
+#define I27 ((Register)(i27_RegisterEnumValue))
+#define I28 ((Register)(i28_RegisterEnumValue))
+#define I29 ((Register)(i29_RegisterEnumValue))
+#define I30 ((Register)(i30_RegisterEnumValue))
+#define I31 ((Register)(i31_RegisterEnumValue))
+
+#define R0 ((Register)(i0_RegisterEnumValue))
+#define AT ((Register)(i1_RegisterEnumValue))
+#define V0 ((Register)(i2_RegisterEnumValue))
+#define V1 ((Register)(i3_RegisterEnumValue))
+#define RA0 ((Register)(i4_RegisterEnumValue))
+#define RA1 ((Register)(i5_RegisterEnumValue))
+#define RA2 ((Register)(i6_RegisterEnumValue))
+#define RA3 ((Register)(i7_RegisterEnumValue))
+#define RA4 ((Register)(i8_RegisterEnumValue))
+#define RA5 ((Register)(i9_RegisterEnumValue))
+#define RA6 ((Register)(i10_RegisterEnumValue))
+#define RA7 ((Register)(i11_RegisterEnumValue))
+#define RT0 ((Register)(i12_RegisterEnumValue))
+#define RT1 ((Register)(i13_RegisterEnumValue))
+#define RT2 ((Register)(i14_RegisterEnumValue))
+#define RT3 ((Register)(i15_RegisterEnumValue))
+#define S0 ((Register)(i16_RegisterEnumValue))
+#define S1 ((Register)(i17_RegisterEnumValue))
+#define S2 ((Register)(i18_RegisterEnumValue))
+#define S3 ((Register)(i19_RegisterEnumValue))
+#define S4 ((Register)(i20_RegisterEnumValue))
+#define S5 ((Register)(i21_RegisterEnumValue))
+#define S6 ((Register)(i22_RegisterEnumValue))
+#define S7 ((Register)(i23_RegisterEnumValue))
+#define RT8 ((Register)(i24_RegisterEnumValue))
+#define RT9 ((Register)(i25_RegisterEnumValue))
+#define K0 ((Register)(i26_RegisterEnumValue))
+#define K1 ((Register)(i27_RegisterEnumValue))
+#define GP ((Register)(i28_RegisterEnumValue))
+#define SP ((Register)(i29_RegisterEnumValue))
+#define FP ((Register)(i30_RegisterEnumValue))
+#define S8 ((Register)(i30_RegisterEnumValue))
+#define RA ((Register)(i31_RegisterEnumValue))
+
+#define c_rarg0       RT0
+#define c_rarg1       RT1
+#define Rmethod       S3
+#define Rsender       S4
+#define Rnext         S1
+
+/*
+#define RT0       T0
+#define RT1       T1
+#define RT2       T2
+#define RT3       T3
+#define RT4       T8
+#define RT5       T9
+*/
+
+
+//for interpreter frame
+// bytecode pointer register
+#define BCP            S0
+// local variable pointer register
+#define LVP            S7
+// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
+// be sure to save and restore its value in call_stub
+#define TSR            S2
+
+//OPT_SAFEPOINT not supported yet
+#define OPT_SAFEPOINT 1
+
+#define OPT_THREAD 1
+
+#define TREG           S6
+
+#define  S5_heapbase   S5
+
+#define mh_SP_save     SP
+
+#define FSR            V0
+#define SSR            V1
+#define FSF            F0
+#define SSF            F1
+#define FTF            F14
+#define STF            F15
+
+#define AFT            F30
+
+#define RECEIVER       T0
+#define IC_Klass       T1
+
+#define SHIFT_count    T3
+
+#endif // DONT_USE_REGISTER_DEFINES
+
+// Use FloatRegister as shortcut
+class FloatRegisterImpl;
+typedef FloatRegisterImpl* FloatRegister;
+
+inline FloatRegister as_FloatRegister(int encoding) {
+  return (FloatRegister)(intptr_t) encoding;
+}
+
+// The implementation of floating point registers for the mips architecture
+class FloatRegisterImpl: public AbstractRegisterImpl {
+ public:
+  enum {
+    float_arg_base      = 12,
+    number_of_registers = 32
+  };
+
+  // construction
+  inline friend FloatRegister as_FloatRegister(int encoding);
+
+  VMReg as_VMReg();
+
+  // derived registers, offsets, and addresses
+  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
+
+  // accessors
+  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
+  const char* name() const;
+
+};
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
+
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
+CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
+
+#ifndef DONT_USE_REGISTER_DEFINES
+#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
+#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
+#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
+#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
+#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
+#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
+#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
+#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
+#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
+#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
+#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
+#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
+#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
+#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
+#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
+#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
+#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
+#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
+#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
+#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
+#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
+#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
+#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
+#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
+#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
+#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
+#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
+#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
+#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
+#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
+#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
+#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
+#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
+#endif // DONT_USE_REGISTER_DEFINES
+
+
+const int MIPS_ARGS_IN_REGS_NUM = 4;
+
+// Need to know the total number of registers of all sorts for SharedInfo.
+// Define a class that exports it.
+class ConcreteRegisterImpl : public AbstractRegisterImpl {
+ public:
+  enum {
+  // A big enough number for C2: all the registers plus flags
+  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
+  // There is no requirement that any ordering here matches any ordering c2 gives
+  // it's optoregs.
+    number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2
+  };
+
+  static const int max_gpr;
+  static const int max_fpr;
+};
+
+#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp b/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp
new file mode 100644
index 00000000000..cae43b2d96b
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/relocInfo_mips.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/relocInfo.hpp"
+#include "compiler/disassembler.hpp"
+#include "nativeInst_mips.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/safepoint.hpp"
+
+
+void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
+  x += o;
+  typedef Assembler::WhichOperand WhichOperand;
+  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
+  assert(which == Assembler::disp32_operand ||
+         which == Assembler::narrow_oop_operand ||
+         which == Assembler::imm_operand, "format unpacks ok");
+  if (which == Assembler::imm_operand) {
+    if (verify_only) {
+      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
+    } else {
+      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
+    }
+  } else if (which == Assembler::narrow_oop_operand) {
+    // both compressed oops and compressed classes look the same
+    if (Universe::heap()->is_in_reserved((oop)x)) {
+      if (verify_only) {
+        assert(nativeMovConstReg_at(addr())->data() == (long)oopDesc::encode_heap_oop((oop)x), "instructions must match");
+      } else {
+        nativeMovConstReg_at(addr())->set_data((intptr_t)(oopDesc::encode_heap_oop((oop)x)), (intptr_t)(x));
+      }
+    } else {
+      if (verify_only) {
+        assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match");
+      } else {
+        nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x));
+      }
+    }
+  } else {
+    // Note:  Use runtime_call_type relocations for call32_operand.
+    assert(0, "call32_operand not supported in MIPS64");
+  }
+}
+
+
+//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target,
+//Maybe We should FORGET CALL RELOCATION
+address Relocation::pd_call_destination(address orig_addr) {
+  intptr_t adj = 0;
+  NativeInstruction* ni = nativeInstruction_at(addr());
+  if (ni->is_call()) {
+    if (!ni->is_trampoline_call()) {
+      return nativeCall_at(addr())->target_addr_for_insn();
+    } else {
+      address trampoline = nativeCall_at(addr())->get_trampoline();
+      if (trampoline) {
+        return nativeCallTrampolineStub_at(trampoline)->destination();
+      } else {
+        return (address) -1;
+      }
+    }
+  } else if (ni->is_jump()) {
+    return nativeGeneralJump_at(addr())->jump_destination() + adj;
+  } else if (ni->is_cond_jump()) {
+    return nativeCondJump_at(addr())->jump_destination() +adj;
+  } else {
+    tty->print_cr("\nError!\ncall destination: 0x%lx", p2i(addr()));
+    Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty);
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
+
+
+void Relocation::pd_set_call_destination(address x) {
+  NativeInstruction* ni = nativeInstruction_at(addr());
+  if (ni->is_call()) {
+    NativeCall* call = nativeCall_at(addr());
+    if (!ni->is_trampoline_call()) {
+      call->set_destination(x);
+    } else {
+      address trampoline_stub_addr = call->get_trampoline();
+      if (trampoline_stub_addr != NULL) {
+        address orig = call->target_addr_for_insn();
+        if (orig != trampoline_stub_addr) {
+          call->patch_on_trampoline(trampoline_stub_addr);
+        }
+        call->set_destination_mt_safe(x, false);
+      }
+    }
+  } else if (ni->is_jump())
+    nativeGeneralJump_at(addr())->set_jump_destination(x);
+  else if (ni->is_cond_jump())
+    nativeCondJump_at(addr())->set_jump_destination(x);
+  else
+    { ShouldNotReachHere(); }
+
+    // Unresolved jumps are recognized by a destination of -1
+    // However 64bit can't actually produce such an address
+    // and encodes a jump to self but jump_destination will
+    // return a -1 as the signal. We must not relocate this
+    // jmp or the ic code will not see it as unresolved.
+}
+
+
+address* Relocation::pd_address_in_code() {
+  return (address*)addr();
+}
+
+
+address Relocation::pd_get_address_from_code() {
+  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
+  return (address)ni->data();
+}
+
+
+
+void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+}
+
+void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+}
+
+void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
+  address target =0;
+  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
+  target = new_addr_for((address)ni->data(), src, dest);
+  ni->set_data((intptr_t)target);
+}
+
+void metadata_Relocation::pd_fix_value(address x) {
+}
diff --git a/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp b/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp
new file mode 100644
index 00000000000..04ad5dac96c
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/relocInfo_mips.hpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP
+#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP
+
+  // machine-dependent parts of class relocInfo
+ private:
+  enum {
+    // Since MIPS instructions are whole words,
+    // the two low-order offset bits can always be discarded.
+    offset_unit        =  4,
+
+    // imm_oop_operand vs. narrow_oop_operand
+    format_width       =  2
+  };
+
+#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp b/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp
new file mode 100644
index 00000000000..bb9269b423f
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/runtime_mips_64.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#ifdef COMPILER2
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "code/vmreg.hpp"
+#include "interpreter/interpreter.hpp"
+#include "opto/runtime.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/globalDefinitions.hpp"
+#include "vmreg_mips.inline.hpp"
+#endif
+
+#define __ masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+//-------------- generate_exception_blob -----------
+// creates _exception_blob.
+// The exception blob is jumped to from a compiled method.
+// (see emit_exception_handler in sparc.ad file)
+//
+// Given an exception pc at a call we call into the runtime for the
+// handler in this method. This handler might merely restore state
+// (i.e. callee save registers) unwind the frame and jump to the
+// exception handler for the nmethod if there is no Java level handler
+// for the nmethod.
+//
+// This code is entered with a jump, and left with a jump.
+//
+// Arguments:
+//   V0: exception oop
+//   V1: exception pc
+//
+// Results:
+//   A0: exception oop
+//   A1: exception pc in caller or ???
+//   jumps to: exception handler of caller
+//
+// Note: the exception pc MUST be at a call (precise debug information)
+//
+//  [stubGenerator_mips.cpp] generate_forward_exception()
+//      |- V0, V1 are created
+//      |- T9 <= SharedRuntime::exception_handler_for_return_address
+//      `- jr T9
+//           `- the caller's exception_handler
+//                 `- jr OptoRuntime::exception_blob
+//                        `- here
+//
+void OptoRuntime::generate_exception_blob() {
+  // Capture info about frame layout
+  enum layout {
+    fp_off,
+    return_off,                 // slot for return address
+    framesize
+  };
+
+  // allocate space for the code
+  ResourceMark rm;
+  // setup code generation tools
+  CodeBuffer   buffer("exception_blob", 5120, 5120);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+
+  address start = __ pc();
+
+  __ daddiu(SP, SP, -1 * framesize * wordSize);   // Prolog!
+
+  // this frame will be treated as the original caller method.
+  // So, the return pc should be filled with the original exception pc.
+  //   ref: X86's implementation
+  __ sd(V1, SP, return_off  *wordSize);  // return address
+  __ sd(FP, SP, fp_off  *wordSize);
+
+  // Save callee saved registers.  None for UseSSE=0,
+  // floats-only for UseSSE=1, and doubles for UseSSE=2.
+
+  __ daddiu(FP, SP, fp_off * wordSize);
+
+  // Store exception in Thread object. We cannot pass any arguments to the
+  // handle_exception call, since we do not want to make any assumption
+  // about the size of the frame where the exception happened in.
+  Register thread = TREG;
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+  __ sd(V0, Address(thread, JavaThread::exception_oop_offset()));
+  __ sd(V1, Address(thread, JavaThread::exception_pc_offset()));
+
+  // This call does all the hard work.  It checks if an exception handler
+  // exists in the method.
+  // If so, it returns the handler address.
+  // If not, it prepares for stack-unwinding, restoring the callee-save
+  // registers of the frame being removed.
+  __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
+
+  __ move(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
+
+  __ relocate(relocInfo::internal_pc_type);
+
+  {
+    long save_pc = (long)__ pc() + 48;
+    __ patchable_set48(AT, save_pc);
+  }
+  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+  __ move(A0, thread);
+  __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C);
+  __ jalr(T9);
+  __ delayed()->nop();
+
+  // Set an oopmap for the call site
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map =  new OopMap( framesize, 0 );
+
+  oop_maps->add_gc_map( __ offset(), map);
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ reset_last_Java_frame(thread, true);
+
+  // Pop self-frame.
+  __ leave();     // Epilog!
+
+  // V0: exception handler
+
+  // We have a handler in V0, (could be deopt blob)
+  __ move(T9, V0);
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  // Get the exception
+  __ ld(A0, Address(thread, JavaThread::exception_oop_offset()));
+  // Get the exception pc in case we are deoptimized
+  __ ld(A1, Address(thread, JavaThread::exception_pc_offset()));
+#ifdef ASSERT
+  __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
+  __ sd(R0, Address(thread, JavaThread::exception_pc_offset()));
+#endif
+  // Clear the exception oop so GC no longer processes it as a root.
+  __ sd(R0, Address(thread, JavaThread::exception_oop_offset()));
+
+  // Fix seg fault when running:
+  //    Eclipse + Plugin + Debug As
+  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
+  //
+  __ move(V0, A0);
+  __ move(V1, A1);
+
+  // V0: exception oop
+  // T9: exception handler
+  // A1: exception pc
+  __ jr(T9);
+  __ delayed()->nop();
+
+  // make sure all code is generated
+  masm->flush();
+
+  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
+}
diff --git a/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp b/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp
new file mode 100644
index 00000000000..daf04c44229
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/sharedRuntime_mips_64.cpp
@@ -0,0 +1,3816 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/debugInfoRec.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "prims/jvmtiRedefineClassesTrace.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/vframeArray.hpp"
+#include "vmreg_mips.inline.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+#include <alloca.h>
+
+#define __ masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
+
+class RegisterSaver {
+  enum { FPU_regs_live = 32 };
+  // Capture info about frame layout
+  enum layout {
+#define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
+    DEF_LAYOUT_OFFS(for_16_bytes_aligned)
+    DEF_LAYOUT_OFFS(fpr0)
+    DEF_LAYOUT_OFFS(fpr1)
+    DEF_LAYOUT_OFFS(fpr2)
+    DEF_LAYOUT_OFFS(fpr3)
+    DEF_LAYOUT_OFFS(fpr4)
+    DEF_LAYOUT_OFFS(fpr5)
+    DEF_LAYOUT_OFFS(fpr6)
+    DEF_LAYOUT_OFFS(fpr7)
+    DEF_LAYOUT_OFFS(fpr8)
+    DEF_LAYOUT_OFFS(fpr9)
+    DEF_LAYOUT_OFFS(fpr10)
+    DEF_LAYOUT_OFFS(fpr11)
+    DEF_LAYOUT_OFFS(fpr12)
+    DEF_LAYOUT_OFFS(fpr13)
+    DEF_LAYOUT_OFFS(fpr14)
+    DEF_LAYOUT_OFFS(fpr15)
+    DEF_LAYOUT_OFFS(fpr16)
+    DEF_LAYOUT_OFFS(fpr17)
+    DEF_LAYOUT_OFFS(fpr18)
+    DEF_LAYOUT_OFFS(fpr19)
+    DEF_LAYOUT_OFFS(fpr20)
+    DEF_LAYOUT_OFFS(fpr21)
+    DEF_LAYOUT_OFFS(fpr22)
+    DEF_LAYOUT_OFFS(fpr23)
+    DEF_LAYOUT_OFFS(fpr24)
+    DEF_LAYOUT_OFFS(fpr25)
+    DEF_LAYOUT_OFFS(fpr26)
+    DEF_LAYOUT_OFFS(fpr27)
+    DEF_LAYOUT_OFFS(fpr28)
+    DEF_LAYOUT_OFFS(fpr29)
+    DEF_LAYOUT_OFFS(fpr30)
+    DEF_LAYOUT_OFFS(fpr31)
+
+    DEF_LAYOUT_OFFS(v0)
+    DEF_LAYOUT_OFFS(v1)
+    DEF_LAYOUT_OFFS(a0)
+    DEF_LAYOUT_OFFS(a1)
+    DEF_LAYOUT_OFFS(a2)
+    DEF_LAYOUT_OFFS(a3)
+    DEF_LAYOUT_OFFS(a4)
+    DEF_LAYOUT_OFFS(a5)
+    DEF_LAYOUT_OFFS(a6)
+    DEF_LAYOUT_OFFS(a7)
+    DEF_LAYOUT_OFFS(t0)
+    DEF_LAYOUT_OFFS(t1)
+    DEF_LAYOUT_OFFS(t2)
+    DEF_LAYOUT_OFFS(t3)
+    DEF_LAYOUT_OFFS(s0)
+    DEF_LAYOUT_OFFS(s1)
+    DEF_LAYOUT_OFFS(s2)
+    DEF_LAYOUT_OFFS(s3)
+    DEF_LAYOUT_OFFS(s4)
+    DEF_LAYOUT_OFFS(s5)
+    DEF_LAYOUT_OFFS(s6)
+    DEF_LAYOUT_OFFS(s7)
+    DEF_LAYOUT_OFFS(t8)
+    DEF_LAYOUT_OFFS(t9)
+
+    DEF_LAYOUT_OFFS(gp)
+    DEF_LAYOUT_OFFS(fp)
+    DEF_LAYOUT_OFFS(return)
+    reg_save_size
+  };
+
+  public:
+
+  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
+  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
+  static int raOffset(void) { return return_off / 2; }
+  //Rmethod
+  static int methodOffset(void) { return s3_off / 2; }
+
+  static int v0Offset(void) { return v0_off / 2; }
+  static int v1Offset(void) { return v1_off / 2; }
+
+  static int fpResultOffset(void) { return fpr0_off / 2; }
+
+  // During deoptimization only the result register need to be restored
+  // all the other values have already been extracted.
+  static void restore_result_registers(MacroAssembler* masm);
+};
+
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
+
+  // Always make the frame size 16-byte aligned
+  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
+                                     reg_save_size*BytesPerInt, 16);
+  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
+  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
+  // The caller will allocate additional_frame_words
+  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
+  // CodeBlob frame size is in words.
+  int frame_size_in_words = frame_size_in_bytes / wordSize;
+  *total_frame_words = frame_size_in_words;
+
+  // save registers
+
+  __ daddiu(SP, SP, - reg_save_size * jintSize);
+
+  __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
+  __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
+  __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
+  __ sdc1(F6, SP, fpr6_off * jintSize);  __ sdc1(F7, SP, fpr7_off * jintSize);
+  __ sdc1(F8, SP, fpr8_off * jintSize);  __ sdc1(F9, SP, fpr9_off * jintSize);
+  __ sdc1(F10, SP, fpr10_off * jintSize);  __ sdc1(F11, SP, fpr11_off * jintSize);
+  __ sdc1(F12, SP, fpr12_off * jintSize);  __ sdc1(F13, SP, fpr13_off * jintSize);
+  __ sdc1(F14, SP, fpr14_off * jintSize);  __ sdc1(F15, SP, fpr15_off * jintSize);
+  __ sdc1(F16, SP, fpr16_off * jintSize);  __ sdc1(F17, SP, fpr17_off * jintSize);
+  __ sdc1(F18, SP, fpr18_off * jintSize);  __ sdc1(F19, SP, fpr19_off * jintSize);
+  __ sdc1(F20, SP, fpr20_off * jintSize);  __ sdc1(F21, SP, fpr21_off * jintSize);
+  __ sdc1(F22, SP, fpr22_off * jintSize);  __ sdc1(F23, SP, fpr23_off * jintSize);
+  __ sdc1(F24, SP, fpr24_off * jintSize);  __ sdc1(F25, SP, fpr25_off * jintSize);
+  __ sdc1(F26, SP, fpr26_off * jintSize);  __ sdc1(F27, SP, fpr27_off * jintSize);
+  __ sdc1(F28, SP, fpr28_off * jintSize);  __ sdc1(F29, SP, fpr29_off * jintSize);
+  __ sdc1(F30, SP, fpr30_off * jintSize);  __ sdc1(F31, SP, fpr31_off * jintSize);
+  __ sd(V0, SP, v0_off * jintSize);  __ sd(V1, SP, v1_off * jintSize);
+  __ sd(A0, SP, a0_off * jintSize);  __ sd(A1, SP, a1_off * jintSize);
+  __ sd(A2, SP, a2_off * jintSize);  __ sd(A3, SP, a3_off * jintSize);
+  __ sd(A4, SP, a4_off * jintSize);  __ sd(A5, SP, a5_off * jintSize);
+  __ sd(A6, SP, a6_off * jintSize);  __ sd(A7, SP, a7_off * jintSize);
+  __ sd(T0, SP, t0_off * jintSize);
+  __ sd(T1, SP, t1_off * jintSize);
+  __ sd(T2, SP, t2_off * jintSize);
+  __ sd(T3, SP, t3_off * jintSize);
+  __ sd(S0, SP, s0_off * jintSize);
+  __ sd(S1, SP, s1_off * jintSize);
+  __ sd(S2, SP, s2_off * jintSize);
+  __ sd(S3, SP, s3_off * jintSize);
+  __ sd(S4, SP, s4_off * jintSize);
+  __ sd(S5, SP, s5_off * jintSize);
+  __ sd(S6, SP, s6_off * jintSize);
+  __ sd(S7, SP, s7_off * jintSize);
+
+  __ sd(T8, SP, t8_off * jintSize);
+  __ sd(T9, SP, t9_off * jintSize);
+
+  __ sd(GP, SP, gp_off * jintSize);
+  __ sd(FP, SP, fp_off * jintSize);
+  __ sd(RA, SP, return_off * jintSize);
+  __ daddiu(FP, SP, fp_off * jintSize);
+
+  OopMapSet *oop_maps = new OopMapSet();
+  //OopMap* map =  new OopMap( frame_words, 0 );
+  OopMap* map =  new OopMap( frame_size_in_slots, 0 );
+
+
+//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
+#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
+  map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
+
+  map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
+  map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
+
+#undef STACK_OFFSET
+  return map;
+}
+
+
+// Pop the current frame and restore all the registers that we
+// saved.
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
+  __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
+  __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
+  __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
+  __ ldc1(F6, SP, fpr6_off * jintSize);  __ ldc1(F7, SP, fpr7_off * jintSize);
+  __ ldc1(F8, SP, fpr8_off * jintSize);  __ ldc1(F9, SP, fpr9_off * jintSize);
+  __ ldc1(F10, SP, fpr10_off * jintSize);  __ ldc1(F11, SP, fpr11_off * jintSize);
+  __ ldc1(F12, SP, fpr12_off * jintSize);  __ ldc1(F13, SP, fpr13_off * jintSize);
+  __ ldc1(F14, SP, fpr14_off * jintSize);  __ ldc1(F15, SP, fpr15_off * jintSize);
+  __ ldc1(F16, SP, fpr16_off * jintSize);  __ ldc1(F17, SP, fpr17_off * jintSize);
+  __ ldc1(F18, SP, fpr18_off * jintSize);  __ ldc1(F19, SP, fpr19_off * jintSize);
+  __ ldc1(F20, SP, fpr20_off * jintSize);  __ ldc1(F21, SP, fpr21_off * jintSize);
+  __ ldc1(F22, SP, fpr22_off * jintSize);  __ ldc1(F23, SP, fpr23_off * jintSize);
+  __ ldc1(F24, SP, fpr24_off * jintSize);  __ ldc1(F25, SP, fpr25_off * jintSize);
+  __ ldc1(F26, SP, fpr26_off * jintSize);  __ ldc1(F27, SP, fpr27_off * jintSize);
+  __ ldc1(F28, SP, fpr28_off * jintSize);  __ ldc1(F29, SP, fpr29_off * jintSize);
+  __ ldc1(F30, SP, fpr30_off * jintSize);  __ ldc1(F31, SP, fpr31_off * jintSize);
+
+  __ ld(V0, SP, v0_off * jintSize);  __ ld(V1, SP, v1_off * jintSize);
+  __ ld(A0, SP, a0_off * jintSize);  __ ld(A1, SP, a1_off * jintSize);
+  __ ld(A2, SP, a2_off * jintSize);  __ ld(A3, SP, a3_off * jintSize);
+  __ ld(A4, SP, a4_off * jintSize);  __ ld(A5, SP, a5_off * jintSize);
+  __ ld(A6, SP, a6_off * jintSize);  __ ld(A7, SP, a7_off * jintSize);
+  __ ld(T0, SP, t0_off * jintSize);
+  __ ld(T1, SP, t1_off * jintSize);
+  __ ld(T2, SP, t2_off * jintSize);
+  __ ld(T3, SP, t3_off * jintSize);
+  __ ld(S0, SP, s0_off * jintSize);
+  __ ld(S1, SP, s1_off * jintSize);
+  __ ld(S2, SP, s2_off * jintSize);
+  __ ld(S3, SP, s3_off * jintSize);
+  __ ld(S4, SP, s4_off * jintSize);
+  __ ld(S5, SP, s5_off * jintSize);
+  __ ld(S6, SP, s6_off * jintSize);
+  __ ld(S7, SP, s7_off * jintSize);
+
+  __ ld(T8, SP, t8_off * jintSize);
+  __ ld(T9, SP, t9_off * jintSize);
+
+  __ ld(GP, SP, gp_off * jintSize);
+  __ ld(FP, SP, fp_off * jintSize);
+  __ ld(RA, SP, return_off * jintSize);
+
+  __ addiu(SP, SP, reg_save_size * jintSize);
+}
+
+// Pop the current frame and restore the registers that might be holding
+// a result.
+void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
+
+  // Just restore result register. Only used by deoptimization. By
+  // now any callee save register that needs to be restore to a c2
+  // caller of the deoptee has been extracted into the vframeArray
+  // and will be stuffed into the c2i adapter we create for later
+  // restoration so only result registers need to be restored here.
+
+  __ ld(V0, SP, v0_off * jintSize);
+  __ ld(V1, SP, v1_off * jintSize);
+  __ ldc1(F0, SP, fpr0_off * jintSize);
+  __ ldc1(F1, SP, fpr1_off * jintSize);
+  __ addiu(SP, SP, return_off * jintSize);
+}
+
+// Is vector's size (in bytes) bigger than a size saved by default?
+// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
+bool SharedRuntime::is_wide_vector(int size) {
+  return size > 16;
+}
+
+// The java_calling_convention describes stack locations as ideal slots on
+// a frame with no abi restrictions. Since we must observe abi restrictions
+// (like the placement of the register window) the slots must be biased by
+// the following value.
+
+static int reg2offset_in(VMReg r) {
+  // Account for saved fp and return address
+  // This should really be in_preserve_stack_slots
+  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
+}
+
+static int reg2offset_out(VMReg r) {
+  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+}
+
+// ---------------------------------------------------------------------------
+// Read the array of BasicTypes from a signature, and compute where the
+// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
+// quantities.  Values less than SharedInfo::stack0 are registers, those above
+// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
+// as framesizes are fixed.
+// VMRegImpl::stack0 refers to the first slot 0(sp).
+// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
+// up to RegisterImpl::number_of_registers) are the 32-bit
+// integer registers.
+
+// Pass first five oop/int args in registers T0, A0 - A3.
+// Pass float/double/long args in stack.
+// Doubles have precedence, so if you pass a mix of floats and doubles
+// the doubles will grab the registers before the floats will.
+
+// Note: the INPUTS in sig_bt are in units of Java argument words, which are
+// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
+// units regardless of build.
+
+
+// ---------------------------------------------------------------------------
+// The compiled Java calling convention.
+// Pass first five oop/int args in registers T0, A0 - A3.
+// Pass float/double/long args in stack.
+// Doubles have precedence, so if you pass a mix of floats and doubles
+// the doubles will grab the registers before the floats will.
+
+int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
+                                           VMRegPair *regs,
+                                           int total_args_passed,
+                                           int is_outgoing) {
+
+  // Create the mapping between argument positions and registers.
+  static const Register INT_ArgReg[Argument::n_register_parameters] = {
+    T0, A0, A1, A2, A3, A4, A5, A6
+  };
+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
+    F12, F13, F14, F15, F16, F17, F18, F19
+  };
+
+  uint args = 0;
+  uint stk_args = 0; // inc by 2 each time
+
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_VOID:
+      // halves of T_LONG or T_DOUBLE
+      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+      regs[i].set_bad();
+      break;
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+      if (args < Argument::n_register_parameters) {
+        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_LONG:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      // fall through
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS:
+      if (args < Argument::n_register_parameters) {
+        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_FLOAT:
+      if (args < Argument::n_float_register_parameters) {
+        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_DOUBLE:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      if (args < Argument::n_float_register_parameters) {
+        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+
+  return round_to(stk_args, 2);
+}
+
+// Patch the callers callsite with entry to compiled code if it exists.
+static void patch_callers_callsite(MacroAssembler *masm) {
+  Label L;
+  __ verify_oop(Rmethod);
+  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
+  __ beq(AT, R0, L);
+  __ delayed()->nop();
+  // Schedule the branch target address early.
+  // Call into the VM to patch the caller, then jump to compiled callee
+  // V0 isn't live so capture return address while we easily can
+  __ move(V0, RA);
+
+  __ pushad();
+#ifdef COMPILER2
+  // C2 may leave the stack dirty if not in SSE2+ mode
+  __ empty_FPU_stack();
+#endif
+
+  // VM needs caller's callsite
+  // VM needs target method
+
+  __ move(A0, Rmethod);
+  __ move(A1, V0);
+  // we should preserve the return address
+  __ verify_oop(Rmethod);
+  __ move(S0, SP);
+  __ move(AT, -(StackAlignmentInBytes));   // align the stack
+  __ andr(SP, SP, AT);
+  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
+          relocInfo::runtime_call_type);
+
+  __ delayed()->nop();
+  __ move(SP, S0);
+  __ popad();
+  __ bind(L);
+}
+
+static void gen_c2i_adapter(MacroAssembler *masm,
+                            int total_args_passed,
+                            int comp_args_on_stack,
+                            const BasicType *sig_bt,
+                            const VMRegPair *regs,
+                            Label& skip_fixup) {
+
+  // Before we get into the guts of the C2I adapter, see if we should be here
+  // at all.  We've come from compiled code and are attempting to jump to the
+  // interpreter, which means the caller made a static call to get here
+  // (vcalls always get a compiled target if there is one).  Check for a
+  // compiled target.  If there is one, we need to patch the caller's call.
+  // However we will run interpreted if we come thru here. The next pass
+  // thru the call site will run compiled. If we ran compiled here then
+  // we can (theorectically) do endless i2c->c2i->i2c transitions during
+  // deopt/uncommon trap cycles. If we always go interpreted here then
+  // we can have at most one and don't need to play any tricks to keep
+  // from endlessly growing the stack.
+  //
+  // Actually if we detected that we had an i2c->c2i transition here we
+  // ought to be able to reset the world back to the state of the interpreted
+  // call and not bother building another interpreter arg area. We don't
+  // do that at this point.
+
+  patch_callers_callsite(masm);
+  __ bind(skip_fixup);
+
+#ifdef COMPILER2
+  __ empty_FPU_stack();
+#endif
+  //this is for native ?
+  // Since all args are passed on the stack, total_args_passed * interpreter_
+  // stack_element_size  is the
+  // space we need.
+  int extraspace = total_args_passed * Interpreter::stackElementSize;
+
+  // stack is aligned, keep it that way
+  extraspace = round_to(extraspace, 2*wordSize);
+
+  // Get return address
+  __ move(V0, RA);
+  // set senderSP value
+  //refer to interpreter_mips.cpp:generate_asm_entry
+  __ move(Rsender, SP);
+  __ addiu(SP, SP, -extraspace);
+
+  // Now write the args into the outgoing interpreter space
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_VOID) {
+      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+      continue;
+    }
+
+    // st_off points to lowest address on stack.
+    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
+    // Say 4 args:
+    // i   st_off
+    // 0   12 T_LONG
+    // 1    8 T_VOID
+    // 2    4 T_OBJECT
+    // 3    0 T_BOOL
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_stack()) {
+      // memory to memory use fpu stack top
+      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
+      if (!r_2->is_valid()) {
+        __ ld_ptr(AT, SP, ld_off);
+        __ st_ptr(AT, SP, st_off);
+
+      } else {
+
+
+        int next_off = st_off - Interpreter::stackElementSize;
+        __ ld_ptr(AT, SP, ld_off);
+        __ st_ptr(AT, SP, st_off);
+
+        // Ref to is_Register condition
+        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
+          __ st_ptr(AT, SP, st_off - 8);
+      }
+    } else if (r_1->is_Register()) {
+      Register r = r_1->as_Register();
+      if (!r_2->is_valid()) {
+          __ sd(r, SP, st_off);
+      } else {
+        //FIXME, mips will not enter here
+        // long/double in gpr
+        __ sd(r, SP, st_off);
+        // In [java/util/zip/ZipFile.java]
+        //
+        //    private static native long open(String name, int mode, long lastModified);
+        //    private static native int getTotal(long jzfile);
+        //
+        // We need to transfer T_LONG paramenters from a compiled method to a native method.
+        // It's a complex process:
+        //
+        // Caller -> lir_static_call -> gen_resolve_stub
+        //      -> -- resolve_static_call_C
+        //         `- gen_c2i_adapter()  [*]
+        //             |
+        //       `- AdapterHandlerLibrary::get_create_apapter_index
+        //      -> generate_native_entry
+        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
+        //
+        // In [**], T_Long parameter is stored in stack as:
+        //
+        //   (high)
+        //    |         |
+        //    -----------
+        //    | 8 bytes |
+        //    | (void)  |
+        //    -----------
+        //    | 8 bytes |
+        //    | (long)  |
+        //    -----------
+        //    |         |
+        //   (low)
+        //
+        // However, the sequence is reversed here:
+        //
+        //   (high)
+        //    |         |
+        //    -----------
+        //    | 8 bytes |
+        //    | (long)  |
+        //    -----------
+        //    | 8 bytes |
+        //    | (void)  |
+        //    -----------
+        //    |         |
+        //   (low)
+        //
+        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
+        //
+        if (sig_bt[i] == T_LONG)
+          __ sd(r, SP, st_off - 8);
+      }
+    } else if (r_1->is_FloatRegister()) {
+      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
+
+      FloatRegister fr = r_1->as_FloatRegister();
+      if (sig_bt[i] == T_FLOAT)
+        __ swc1(fr, SP, st_off);
+      else {
+        __ sdc1(fr, SP, st_off);
+        __ sdc1(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
+      }
+    }
+  }
+
+  // Schedule the branch target address early.
+  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
+  // And repush original return address
+  __ move(RA, V0);
+  __ jr (AT);
+  __ delayed()->nop();
+}
+
+static void gen_i2c_adapter(MacroAssembler *masm,
+                            int total_args_passed,
+                            int comp_args_on_stack,
+                            const BasicType *sig_bt,
+                            const VMRegPair *regs) {
+
+  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
+  // layout.  Lesp was saved by the calling I-frame and will be restored on
+  // return.  Meanwhile, outgoing arg space is all owned by the callee
+  // C-frame, so we can mangle it at will.  After adjusting the frame size,
+  // hoist register arguments and repack other args according to the compiled
+  // code convention.  Finally, end in a jump to the compiled code.  The entry
+  // point address is the start of the buffer.
+
+  // We will only enter here from an interpreted frame and never from after
+  // passing thru a c2i. Azul allowed this but we do not. If we lose the
+  // race and use a c2i we will remain interpreted for the race loser(s).
+  // This removes all sorts of headaches on the mips side and also eliminates
+  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
+
+
+  __ move(T9, SP);
+
+  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
+  // in registers, we will occasionally have no stack args.
+  int comp_words_on_stack = 0;
+  if (comp_args_on_stack) {
+    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
+    // registers are below.  By subtracting stack0, we either get a negative
+    // number (all values in registers) or the maximum stack slot accessed.
+    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
+    // Convert 4-byte stack slots to words.
+    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
+    // Round up to miminum stack alignment, in wordSize
+    comp_words_on_stack = round_to(comp_words_on_stack, 2);
+    __ daddiu(SP, SP, -comp_words_on_stack * wordSize);
+  }
+
+  // Align the outgoing SP
+  __ move(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);
+  // push the return address on the stack (note that pushing, rather
+  // than storing it, yields the correct frame alignment for the callee)
+  // Put saved SP in another register
+  const Register saved_sp = V0;
+  __ move(saved_sp, T9);
+
+
+  // Will jump to the compiled code just as if compiled code was doing it.
+  // Pre-load the register-jump target early, to schedule it better.
+  __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
+
+  // Now generate the shuffle code.  Pick up all register args and move the
+  // rest through the floating point stack top.
+  for (int i = 0; i < total_args_passed; i++) {
+    if (sig_bt[i] == T_VOID) {
+      // Longs and doubles are passed in native word order, but misaligned
+      // in the 32-bit build.
+      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
+      continue;
+    }
+
+    // Pick up 0, 1 or 2 words from SP+offset.
+
+    //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
+    // Load in argument order going down.
+    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
+    // Point to interpreter value (vs. tag)
+    int next_off = ld_off - Interpreter::stackElementSize;
+    VMReg r_1 = regs[i].first();
+    VMReg r_2 = regs[i].second();
+    if (!r_1->is_valid()) {
+      assert(!r_2->is_valid(), "");
+      continue;
+    }
+    if (r_1->is_stack()) {
+      // Convert stack slot to an SP offset (+ wordSize to
+      // account for return address )
+      // NOTICE HERE!!!! I sub a wordSize here
+      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
+      //+ wordSize;
+
+      if (!r_2->is_valid()) {
+        __ ld(AT, saved_sp, ld_off);
+        __ sd(AT, SP, st_off);
+      } else {
+        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
+        // are accessed as negative so LSW is at LOW address
+
+        // ld_off is MSW so get LSW
+        // st_off is LSW (i.e. reg.first())
+
+        // [./org/eclipse/swt/graphics/GC.java]
+        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
+        //  int destX, int destY, int destWidth, int destHeight,
+        //  boolean simple,
+        //  int imgWidth, int imgHeight,
+        //  long maskPixmap,  <-- Pass T_LONG in stack
+        //  int maskType);
+        // Before this modification, Eclipse displays icons with solid black background.
+        //
+        __ ld(AT, saved_sp, ld_off);
+        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
+          __ ld(AT, saved_sp, ld_off - 8);
+        __ sd(AT, SP, st_off);
+      }
+    } else if (r_1->is_Register()) {  // Register argument
+      Register r = r_1->as_Register();
+      if (r_2->is_valid()) {
+        // Remember r_1 is low address (and LSB on mips)
+        // So r_2 gets loaded from high address regardless of the platform
+        assert(r_2->as_Register() == r_1->as_Register(), "");
+        __ ld(r, saved_sp, ld_off);
+
+        //
+        // For T_LONG type, the real layout is as below:
+        //
+        //   (high)
+        //    |         |
+        //    -----------
+        //    | 8 bytes |
+        //    | (void)  |
+        //    -----------
+        //    | 8 bytes |
+        //    | (long)  |
+        //    -----------
+        //    |         |
+        //   (low)
+        //
+        // We should load the low-8 bytes.
+        //
+        if (sig_bt[i] == T_LONG)
+          __ ld(r, saved_sp, ld_off - 8);
+      } else {
+        __ lw(r, saved_sp, ld_off);
+      }
+    } else if (r_1->is_FloatRegister()) { // Float Register
+      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
+
+      FloatRegister fr = r_1->as_FloatRegister();
+      if (sig_bt[i] == T_FLOAT)
+          __ lwc1(fr, saved_sp, ld_off);
+      else {
+          __ ldc1(fr, saved_sp, ld_off);
+          __ ldc1(fr, saved_sp, ld_off - 8);
+      }
+    }
+  }
+
+  // 6243940 We might end up in handle_wrong_method if
+  // the callee is deoptimized as we race thru here. If that
+  // happens we don't want to take a safepoint because the
+  // caller frame will look interpreted and arguments are now
+  // "compiled" so it is much better to make this transition
+  // invisible to the stack walking code. Unfortunately if
+  // we try and find the callee by normal means a safepoint
+  // is possible. So we stash the desired callee in the thread
+  // and the vm will find there should this case occur.
+  __ get_thread(T8);
+  __ sd(Rmethod, T8, in_bytes(JavaThread::callee_target_offset()));
+
+  // move methodOop to V0 in case we end up in an c2i adapter.
+  // the c2i adapters expect methodOop in V0 (c2) because c2's
+  // resolve stubs return the result (the method) in V0.
+  // I'd love to fix this.
+  __ move(V0, Rmethod);
+  __ jr(T9);
+  __ delayed()->nop();
+}
+
+// ---------------------------------------------------------------
+AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
+                                                            int total_args_passed,
+                                                            int comp_args_on_stack,
+                                                            const BasicType *sig_bt,
+                                                            const VMRegPair *regs,
+                                                            AdapterFingerPrint* fingerprint) {
+  address i2c_entry = __ pc();
+
+  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+
+  // -------------------------------------------------------------------------
+  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
+  // args start out packed in the compiled layout.  They need to be unpacked
+  // into the interpreter layout.  This will almost always require some stack
+  // space.  We grow the current (compiled) stack, then repack the args.  We
+  // finally end in a jump to the generic interpreter entry point.  On exit
+  // from the interpreter, the interpreter will restore our SP (lest the
+  // compiled code, which relys solely on SP and not FP, get sick).
+
+  address c2i_unverified_entry = __ pc();
+  Label skip_fixup;
+  {
+    Register holder = T1;
+    Register receiver = T0;
+    Register temp = T8;
+    address ic_miss = SharedRuntime::get_ic_miss_stub();
+
+    Label missed;
+
+    __ verify_oop(holder);
+    //add for compressedoops
+    __ load_klass(temp, receiver);
+    __ verify_oop(temp);
+
+    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
+    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
+    __ bne(AT, temp, missed);
+    __ delayed()->nop();
+    // Method might have been compiled since the call site was patched to
+    // interpreted if that is the case treat it as a miss so we can get
+    // the call site corrected.
+    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
+    __ beq(AT, R0, skip_fixup);
+    __ delayed()->nop();
+    __ bind(missed);
+
+    __ jmp(ic_miss, relocInfo::runtime_call_type);
+    __ delayed()->nop();
+  }
+
+  address c2i_entry = __ pc();
+
+  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+
+  __ flush();
+  return  AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+}
+
+int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
+                                         VMRegPair *regs,
+                                         VMRegPair *regs2,
+                                         int total_args_passed) {
+  assert(regs2 == NULL, "not needed on MIPS");
+  // Return the number of VMReg stack_slots needed for the args.
+  // This value does not include an abi space (like register window
+  // save area).
+
+  // We return the amount of VMReg stack slots we need to reserve for all
+  // the arguments NOT counting out_preserve_stack_slots. Since we always
+  // have space for storing at least 6 registers to memory we start with that.
+  // See int_stk_helper for a further discussion.
+  // We return the amount of VMRegImpl stack slots we need to reserve for all
+  // the arguments NOT counting out_preserve_stack_slots.
+  static const Register INT_ArgReg[Argument::n_register_parameters] = {
+    A0, A1, A2, A3, A4, A5, A6, A7
+  };
+  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
+    F12, F13, F14, F15, F16, F17, F18, F19
+  };
+  uint args = 0;
+  uint stk_args = 0; // inc by 2 each time
+
+// Example:
+//    n   java.lang.UNIXProcess::forkAndExec
+//     private native int forkAndExec(byte[] prog,
+//                                    byte[] argBlock, int argc,
+//                                    byte[] envBlock, int envc,
+//                                    byte[] dir,
+//                                    boolean redirectErrorStream,
+//                                    FileDescriptor stdin_fd,
+//                                    FileDescriptor stdout_fd,
+//                                    FileDescriptor stderr_fd)
+// JNIEXPORT jint JNICALL
+// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
+//                                        jobject process,
+//                                        jbyteArray prog,
+//                                        jbyteArray argBlock, jint argc,
+//                                        jbyteArray envBlock, jint envc,
+//                                        jbyteArray dir,
+//                                        jboolean redirectErrorStream,
+//                                        jobject stdin_fd,
+//                                        jobject stdout_fd,
+//                                        jobject stderr_fd)
+//
+// ::c_calling_convention
+//  0:      // env                 <--       a0
+//  1: L    // klass/obj           <-- t0 => a1
+//  2: [    // prog[]              <-- a0 => a2
+//  3: [    // argBlock[]          <-- a1 => a3
+//  4: I    // argc                <-- a2 => a4
+//  5: [    // envBlock[]          <-- a3 => a5
+//  6: I    // envc                <-- a4 => a5
+//  7: [    // dir[]               <-- a5 => a7
+//  8: Z    // redirectErrorStream <-- a6 => sp[0]
+//  9: L    // stdin               fp[16] => sp[8]
+// 10: L    // stdout              fp[24] => sp[16]
+// 11: L    // stderr              fp[32] => sp[24]
+//
+  for (int i = 0; i < total_args_passed; i++) {
+    switch (sig_bt[i]) {
+    case T_VOID: // Halves of longs and doubles
+      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+      regs[i].set_bad();
+      break;
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+      if (args < Argument::n_register_parameters) {
+        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_LONG:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      // fall through
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_ADDRESS:
+    case T_METADATA:
+      if (args < Argument::n_register_parameters) {
+        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_FLOAT:
+      if (args < Argument::n_float_register_parameters) {
+        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set1(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    case T_DOUBLE:
+      assert(sig_bt[i + 1] == T_VOID, "expecting half");
+      if (args < Argument::n_float_register_parameters) {
+        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
+      } else {
+        regs[i].set2(VMRegImpl::stack2reg(stk_args));
+        stk_args += 2;
+      }
+      break;
+    default:
+      ShouldNotReachHere();
+      break;
+    }
+  }
+
+  return round_to(stk_args, 2);
+}
+
+// ---------------------------------------------------------------------------
+void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+  // We always ignore the frame_slots arg and just use the space just below frame pointer
+  // which by this time is free to use
+  switch (ret_type) {
+    case T_FLOAT:
+      __ swc1(FSF, FP, -wordSize);
+      break;
+    case T_DOUBLE:
+      __ sdc1(FSF, FP, -wordSize );
+      break;
+    case T_VOID:  break;
+    case T_LONG:
+      __ sd(V0, FP, -wordSize);
+      break;
+    case T_OBJECT:
+    case T_ARRAY:
+      __ sd(V0, FP, -wordSize);
+      break;
+    default: {
+      __ sw(V0, FP, -wordSize);
+      }
+  }
+}
+
+void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
+  // We always ignore the frame_slots arg and just use the space just below frame pointer
+  // which by this time is free to use
+  switch (ret_type) {
+    case T_FLOAT:
+      __ lwc1(FSF, FP, -wordSize);
+      break;
+    case T_DOUBLE:
+      __ ldc1(FSF, FP, -wordSize );
+      break;
+    case T_LONG:
+      __ ld(V0, FP, -wordSize);
+      break;
+    case T_VOID:  break;
+    case T_OBJECT:
+    case T_ARRAY:
+      __ ld(V0, FP, -wordSize);
+      break;
+    default: {
+      __ lw(V0, FP, -wordSize);
+      }
+  }
+}
+
+static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
+  for ( int i = first_arg ; i < arg_count ; i++ ) {
+    if (args[i].first()->is_Register()) {
+      __ push(args[i].first()->as_Register());
+    } else if (args[i].first()->is_FloatRegister()) {
+      __ push(args[i].first()->as_FloatRegister());
+    }
+  }
+}
+
+static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
+  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
+    if (args[i].first()->is_Register()) {
+      __ pop(args[i].first()->as_Register());
+    } else if (args[i].first()->is_FloatRegister()) {
+      __ pop(args[i].first()->as_FloatRegister());
+    }
+  }
+}
+
+// A simple move of integer like type
+static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      // stack to stack
+      __ lw(AT, FP, reg2offset_in(src.first()));
+      __ sd(AT, SP, reg2offset_out(dst.first()));
+    } else {
+      // stack to reg
+      __ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
+    }
+  } else if (dst.first()->is_stack()) {
+    // reg to stack
+    __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
+  } else {
+    if (dst.first() != src.first()){
+      __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
+    }
+  }
+}
+
+// An oop arg. Must pass a handle not the oop itself
+static void object_move(MacroAssembler* masm,
+                        OopMap* map,
+                        int oop_handle_offset,
+                        int framesize_in_slots,
+                        VMRegPair src,
+                        VMRegPair dst,
+                        bool is_receiver,
+                        int* receiver_offset) {
+
+  // must pass a handle. First figure out the location we use as a handle
+
+  //FIXME, for mips, dst can be register
+  if (src.first()->is_stack()) {
+    // Oop is already on the stack as an argument
+    Register rHandle = V0;
+    Label nil;
+    __ xorr(rHandle, rHandle, rHandle);
+    __ ld(AT, FP, reg2offset_in(src.first()));
+    __ beq(AT, R0, nil);
+    __ delayed()->nop();
+    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
+    __ bind(nil);
+    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
+    else                       __ move( (dst.first())->as_Register(), rHandle);
+    //if dst is register
+    //FIXME, do mips need out preserve stack slots?
+    int offset_in_older_frame = src.first()->reg2stack()
+      + SharedRuntime::out_preserve_stack_slots();
+    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
+    if (is_receiver) {
+      *receiver_offset = (offset_in_older_frame
+          + framesize_in_slots) * VMRegImpl::stack_slot_size;
+    }
+  } else {
+    // Oop is in an a register we must store it to the space we reserve
+    // on the stack for oop_handles
+    const Register rOop = src.first()->as_Register();
+    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
+    const Register rHandle = V0;
+    //Important: refer to java_calling_convertion
+    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
+    int offset = oop_slot*VMRegImpl::stack_slot_size;
+    Label skip;
+    __ sd( rOop , SP, offset );
+    map->set_oop(VMRegImpl::stack2reg(oop_slot));
+    __ xorr( rHandle, rHandle, rHandle);
+    __ beq(rOop, R0, skip);
+    __ delayed()->nop();
+    __ lea(rHandle, Address(SP, offset));
+    __ bind(skip);
+    // Store the handle parameter
+    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
+    else                       __ move((dst.first())->as_Register(), rHandle);
+    //if dst is register
+
+    if (is_receiver) {
+      *receiver_offset = offset;
+    }
+  }
+}
+
+// A float arg may have to do float reg int reg conversion
+static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
+
+  if (src.first()->is_stack()) {
+    if (dst.first()->is_stack()) {
+      __ lw(AT, FP, reg2offset_in(src.first()));
+      __ sw(AT, SP, reg2offset_out(dst.first()));
+    }
+    else
+      __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
+  } else {
+    // reg to stack
+    if(dst.first()->is_stack())
+      __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
+    else
+      __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+  }
+}
+
+// A long move
+static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+
+  // The only legal possibility for a long_move VMRegPair is:
+  // 1: two stack slots (possibly unaligned)
+  // as neither the java  or C calling convention will use registers
+  // for longs.
+
+  if (src.first()->is_stack()) {
+    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
+    if( dst.first()->is_stack()){
+      __ ld(AT, FP, reg2offset_in(src.first()));
+      __ sd(AT, SP, reg2offset_out(dst.first()));
+    } else {
+      __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
+    }
+  } else {
+    if( dst.first()->is_stack()){
+      __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
+    } else {
+      __ move( (dst.first())->as_Register() , (src.first())->as_Register());
+    }
+  }
+}
+
+// A double move
+static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
+
+  // The only legal possibilities for a double_move VMRegPair are:
+  // The painful thing here is that like long_move a VMRegPair might be
+
+  // Because of the calling convention we know that src is either
+  //   1: a single physical register (xmm registers only)
+  //   2: two stack slots (possibly unaligned)
+  // dst can only be a pair of stack slots.
+
+
+  if (src.first()->is_stack()) {
+    // source is all stack
+    if( dst.first()->is_stack()){
+      __ ld(AT, FP, reg2offset_in(src.first()));
+      __ sd(AT, SP, reg2offset_out(dst.first()));
+    } else {
+      __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
+    }
+
+  } else {
+    // reg to stack
+    // No worries about stack alignment
+    if( dst.first()->is_stack()){
+      __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
+    }
+    else
+      __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
+
+  }
+}
+
+static void verify_oop_args(MacroAssembler* masm,
+                            methodHandle method,
+                            const BasicType* sig_bt,
+                            const VMRegPair* regs) {
+  Register temp_reg = T9;  // not part of any compiled calling seq
+  if (VerifyOops) {
+    for (int i = 0; i < method->size_of_parameters(); i++) {
+      if (sig_bt[i] == T_OBJECT ||
+          sig_bt[i] == T_ARRAY) {
+        VMReg r = regs[i].first();
+        assert(r->is_valid(), "bad oop arg");
+        if (r->is_stack()) {
+          __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
+          __ verify_oop(temp_reg);
+        } else {
+          __ verify_oop(r->as_Register());
+        }
+      }
+    }
+  }
+}
+
+static void gen_special_dispatch(MacroAssembler* masm,
+                                 methodHandle method,
+                                 const BasicType* sig_bt,
+                                 const VMRegPair* regs) {
+  verify_oop_args(masm, method, sig_bt, regs);
+  vmIntrinsics::ID iid = method->intrinsic_id();
+
+  // Now write the args into the outgoing interpreter space
+  bool     has_receiver   = false;
+  Register receiver_reg   = noreg;
+  int      member_arg_pos = -1;
+  Register member_reg     = noreg;
+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
+  if (ref_kind != 0) {
+    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
+    member_reg = S3;  // known to be free at this point
+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+  } else if (iid == vmIntrinsics::_invokeBasic) {
+    has_receiver = true;
+  } else {
+    fatal(err_msg_res("unexpected intrinsic id %d", iid));
+  }
+
+  if (member_reg != noreg) {
+    // Load the member_arg into register, if necessary.
+    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
+    VMReg r = regs[member_arg_pos].first();
+    if (r->is_stack()) {
+      __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
+    } else {
+      // no data motion is needed
+      member_reg = r->as_Register();
+    }
+  }
+
+  if (has_receiver) {
+    // Make sure the receiver is loaded into a register.
+    assert(method->size_of_parameters() > 0, "oob");
+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+    VMReg r = regs[0].first();
+    assert(r->is_valid(), "bad receiver arg");
+    if (r->is_stack()) {
+      // Porting note:  This assumes that compiled calling conventions always
+      // pass the receiver oop in a register.  If this is not true on some
+      // platform, pick a temp and load the receiver from stack.
+      fatal("receiver always in a register");
+      receiver_reg = SSR;  // known to be free at this point
+      __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
+    } else {
+      // no data motion is needed
+      receiver_reg = r->as_Register();
+    }
+  }
+
+  // Figure out which address we are really jumping to:
+  MethodHandles::generate_method_handle_dispatch(masm, iid,
+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
+}
+
+// ---------------------------------------------------------------------------
+// Generate a native wrapper for a given method.  The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// convention (handlizes oops, etc), transitions to native, makes the call,
+// returns to java state (possibly blocking), unhandlizes any result and
+// returns.
+nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
+                                                methodHandle method,
+                                                int compile_id,
+                                                BasicType* in_sig_bt,
+                                                VMRegPair* in_regs,
+                                                BasicType ret_type) {
+  if (method->is_method_handle_intrinsic()) {
+    vmIntrinsics::ID iid = method->intrinsic_id();
+    intptr_t start = (intptr_t)__ pc();
+    int vep_offset = ((intptr_t)__ pc()) - start;
+    // Make enough room for patch_verified_entry
+    __ nop();
+    __ nop();
+    gen_special_dispatch(masm,
+                         method,
+                         in_sig_bt,
+                         in_regs);
+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
+    __ flush();
+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
+    return nmethod::new_native_nmethod(method,
+                                       compile_id,
+                                       masm->code(),
+                                       vep_offset,
+                                       frame_complete,
+                                       stack_slots / VMRegImpl::slots_per_word,
+                                       in_ByteSize(-1),
+                                       in_ByteSize(-1),
+                                       (OopMapSet*)NULL);
+  }
+  bool is_critical_native = true;
+  address native_func = method->critical_native_function();
+  if (native_func == NULL) {
+    native_func = method->native_function();
+    is_critical_native = false;
+  }
+  assert(native_func != NULL, "must have function");
+
+  // Native nmethod wrappers never take possesion of the oop arguments.
+  // So the caller will gc the arguments. The only thing we need an
+  // oopMap for is if the call is static
+  //
+  // An OopMap for lock (and class if static), and one for the VM call itself
+  OopMapSet *oop_maps = new OopMapSet();
+
+  // We have received a description of where all the java arg are located
+  // on entry to the wrapper. We need to convert these args to where
+  // the jni function will expect them. To figure out where they go
+  // we convert the java signature to a C signature by inserting
+  // the hidden arguments as arg[0] and possibly arg[1] (static method)
+
+  const int total_in_args = method->size_of_parameters();
+  int total_c_args = total_in_args;
+  if (!is_critical_native) {
+    total_c_args += 1;
+    if (method->is_static()) {
+      total_c_args++;
+    }
+  } else {
+    for (int i = 0; i < total_in_args; i++) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        total_c_args++;
+      }
+    }
+  }
+
+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
+  BasicType* in_elem_bt = NULL;
+
+  int argc = 0;
+  if (!is_critical_native) {
+    out_sig_bt[argc++] = T_ADDRESS;
+    if (method->is_static()) {
+      out_sig_bt[argc++] = T_OBJECT;
+    }
+
+    for (int i = 0; i < total_in_args ; i++ ) {
+      out_sig_bt[argc++] = in_sig_bt[i];
+    }
+  } else {
+    Thread* THREAD = Thread::current();
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
+    SignatureStream ss(method->signature());
+    for (int i = 0; i < total_in_args ; i++ ) {
+      if (in_sig_bt[i] == T_ARRAY) {
+        // Arrays are passed as int, elem* pair
+        out_sig_bt[argc++] = T_INT;
+        out_sig_bt[argc++] = T_ADDRESS;
+        Symbol* atype = ss.as_symbol(CHECK_NULL);
+        const char* at = atype->as_C_string();
+        if (strlen(at) == 2) {
+          assert(at[0] == '[', "must be");
+          switch (at[1]) {
+            case 'B': in_elem_bt[i]  = T_BYTE; break;
+            case 'C': in_elem_bt[i]  = T_CHAR; break;
+            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
+            case 'F': in_elem_bt[i]  = T_FLOAT; break;
+            case 'I': in_elem_bt[i]  = T_INT; break;
+            case 'J': in_elem_bt[i]  = T_LONG; break;
+            case 'S': in_elem_bt[i]  = T_SHORT; break;
+            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
+            default: ShouldNotReachHere();
+          }
+        }
+      } else {
+        out_sig_bt[argc++] = in_sig_bt[i];
+        in_elem_bt[i] = T_VOID;
+      }
+      if (in_sig_bt[i] != T_VOID) {
+        assert(in_sig_bt[i] == ss.type(), "must match");
+        ss.next();
+      }
+    }
+  }
+
+  // Now figure out where the args must be stored and how much stack space
+  // they require (neglecting out_preserve_stack_slots but space for storing
+  // the 1st six register arguments). It's weird see int_stk_helper.
+  //
+  int out_arg_slots;
+  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+  // Compute framesize for the wrapper.  We need to handlize all oops in
+  // registers. We must create space for them here that is disjoint from
+  // the windowed save area because we have no control over when we might
+  // flush the window again and overwrite values that gc has since modified.
+  // (The live window race)
+  //
+  // We always just allocate 6 word for storing down these object. This allow
+  // us to simply record the base and use the Ireg number to decide which
+  // slot to use. (Note that the reg number is the inbound number not the
+  // outbound number).
+  // We must shuffle args to match the native convention, and include var-args space.
+
+  // Calculate the total number of stack slots we will need.
+
+  // First count the abi requirement plus all of the outgoing args
+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+
+  // Now the space for the inbound oop handle area
+  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
+  if (is_critical_native) {
+    // Critical natives may have to call out so they need a save area
+    // for register arguments.
+    int double_slots = 0;
+    int single_slots = 0;
+    for ( int i = 0; i < total_in_args; i++) {
+      if (in_regs[i].first()->is_Register()) {
+        const Register reg = in_regs[i].first()->as_Register();
+        switch (in_sig_bt[i]) {
+          case T_BOOLEAN:
+          case T_BYTE:
+          case T_SHORT:
+          case T_CHAR:
+          case T_INT:  single_slots++; break;
+          case T_ARRAY:
+          case T_LONG: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      } else if (in_regs[i].first()->is_FloatRegister()) {
+        switch (in_sig_bt[i]) {
+          case T_FLOAT:  single_slots++; break;
+          case T_DOUBLE: double_slots++; break;
+          default:  ShouldNotReachHere();
+        }
+      }
+    }
+    total_save_slots = double_slots * 2 + single_slots;
+    // align the save area
+    if (double_slots != 0) {
+      stack_slots = round_to(stack_slots, 2);
+    }
+  }
+
+  int oop_handle_offset = stack_slots;
+  stack_slots += total_save_slots;
+
+  // Now any space we need for handlizing a klass if static method
+
+  int klass_slot_offset = 0;
+  int klass_offset = -1;
+  int lock_slot_offset = 0;
+  bool is_static = false;
+
+  if (method->is_static()) {
+    klass_slot_offset = stack_slots;
+    stack_slots += VMRegImpl::slots_per_word;
+    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
+    is_static = true;
+  }
+
+  // Plus a lock if needed
+
+  if (method->is_synchronized()) {
+    lock_slot_offset = stack_slots;
+    stack_slots += VMRegImpl::slots_per_word;
+  }
+
+  // Now a place to save return value or as a temporary for any gpr -> fpr moves
+  // + 2 for return address (which we own) and saved fp
+  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
+
+  // Ok The space we have allocated will look like:
+  //
+  //
+  // FP-> |                     |
+  //      |---------------------|
+  //      | 2 slots for moves   |
+  //      |---------------------|
+  //      | lock box (if sync)  |
+  //      |---------------------| <- lock_slot_offset
+  //      | klass (if static)   |
+  //      |---------------------| <- klass_slot_offset
+  //      | oopHandle area      |
+  //      |---------------------| <- oop_handle_offset
+  //      | outbound memory     |
+  //      | based arguments     |
+  //      |                     |
+  //      |---------------------|
+  //      | vararg area         |
+  //      |---------------------|
+  //      |                     |
+  // SP-> | out_preserved_slots |
+  //
+  //
+
+
+  // Now compute actual number of stack words we need rounding to make
+  // stack properly aligned.
+  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
+
+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+
+  intptr_t start = (intptr_t)__ pc();
+
+
+
+  // First thing make an ic check to see if we should even be here
+  address ic_miss = SharedRuntime::get_ic_miss_stub();
+
+  // We are free to use all registers as temps without saving them and
+  // restoring them except fp. fp is the only callee save register
+  // as far as the interpreter and the compiler(s) are concerned.
+
+  //refer to register_mips.hpp:IC_Klass
+  const Register ic_reg = T1;
+  const Register receiver = T0;
+
+  Label hit;
+  Label exception_pending;
+
+  __ verify_oop(receiver);
+  //add for compressedoops
+  __ load_klass(T9, receiver);
+  __ beq(T9, ic_reg, hit);
+  __ delayed()->nop();
+  __ jmp(ic_miss, relocInfo::runtime_call_type);
+  __ delayed()->nop();
+  __ bind(hit);
+
+  int vep_offset = ((intptr_t)__ pc()) - start;
+
+  // Make enough room for patch_verified_entry
+  __ nop();
+  __ nop();
+
+  // Generate stack overflow check
+  if (UseStackBanging) {
+    __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
+  }
+
+  // Generate a new frame for the wrapper.
+  // do mips need this ?
+#ifndef OPT_THREAD
+  __ get_thread(TREG);
+#endif
+  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
+  __ move(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);
+
+  __ enter();
+  // -2 because return address is already present and so is saved fp
+  __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
+
+  // Frame is now completed as far a size and linkage.
+
+  int frame_complete = ((intptr_t)__ pc()) - start;
+
+  // Calculate the difference between sp and fp. We need to know it
+  // after the native call because on windows Java Natives will pop
+  // the arguments and it is painful to do sp relative addressing
+  // in a platform independent way. So after the call we switch to
+  // fp relative addressing.
+  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
+  //the SP
+  int fp_adjustment = stack_size - 2*wordSize;
+
+#ifdef COMPILER2
+  // C2 may leave the stack dirty if not in SSE2+ mode
+  __ empty_FPU_stack();
+#endif
+
+  // Compute the fp offset for any slots used after the jni call
+
+  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
+  // We use TREG as a thread pointer because it is callee save and
+  // if we load it once it is usable thru the entire wrapper
+  const Register thread = TREG;
+
+  // We use S4 as the oop handle for the receiver/klass
+  // It is callee save so it survives the call to native
+
+  const Register oop_handle_reg = S4;
+  if (is_critical_native) {
+    Unimplemented();
+    // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
+    //                                   oop_handle_offset, oop_maps, in_regs, in_sig_bt);
+  }
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+  //
+  // We immediately shuffle the arguments so that any vm call we have to
+  // make from here on out (sync slow path, jvmpi, etc.) we will have
+  // captured the oops from our caller and have a valid oopMap for
+  // them.
+
+  // -----------------
+  // The Grand Shuffle
+  //
+  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
+  // and, if static, the class mirror instead of a receiver.  This pretty much
+  // guarantees that register layout will not match (and mips doesn't use reg
+  // parms though amd does).  Since the native abi doesn't use register args
+  // and the java conventions does we don't have to worry about collisions.
+  // All of our moved are reg->stack or stack->stack.
+  // We ignore the extra arguments during the shuffle and handle them at the
+  // last moment. The shuffle is described by the two calling convention
+  // vectors we have in our possession. We simply walk the java vector to
+  // get the source locations and the c vector to get the destinations.
+
+  int c_arg = method->is_static() ? 2 : 1 ;
+
+  // Record sp-based slot for receiver on stack for non-static methods
+  int receiver_offset = -1;
+
+  // This is a trick. We double the stack slots so we can claim
+  // the oops in the caller's frame. Since we are sure to have
+  // more args than the caller doubling is enough to make
+  // sure we can capture all the incoming oop args from the
+  // caller.
+  //
+  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
+
+  // Mark location of fp (someday)
+  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
+
+#ifdef ASSERT
+  bool reg_destroyed[RegisterImpl::number_of_registers];
+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
+    reg_destroyed[r] = false;
+  }
+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
+    freg_destroyed[f] = false;
+  }
+
+#endif /* ASSERT */
+
+  // This may iterate in two different directions depending on the
+  // kind of native it is.  The reason is that for regular JNI natives
+  // the incoming and outgoing registers are offset upwards and for
+  // critical natives they are offset down.
+  GrowableArray<int> arg_order(2 * total_in_args);
+  VMRegPair tmp_vmreg;
+  tmp_vmreg.set2(T8->as_VMReg());
+
+  if (!is_critical_native) {
+    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
+      arg_order.push(i);
+      arg_order.push(c_arg);
+    }
+  } else {
+    // Compute a valid move order, using tmp_vmreg to break any cycles
+    Unimplemented();
+    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
+  }
+
+  int temploc = -1;
+  for (int ai = 0; ai < arg_order.length(); ai += 2) {
+    int i = arg_order.at(ai);
+    int c_arg = arg_order.at(ai + 1);
+    __ block_comment(err_msg("move %d -> %d", i, c_arg));
+    if (c_arg == -1) {
+      assert(is_critical_native, "should only be required for critical natives");
+      // This arg needs to be moved to a temporary
+      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
+      in_regs[i] = tmp_vmreg;
+      temploc = i;
+      continue;
+    } else if (i == -1) {
+      assert(is_critical_native, "should only be required for critical natives");
+      // Read from the temporary location
+      assert(temploc != -1, "must be valid");
+      i = temploc;
+      temploc = -1;
+    }
+#ifdef ASSERT
+    if (in_regs[i].first()->is_Register()) {
+      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
+    } else if (in_regs[i].first()->is_FloatRegister()) {
+      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
+    }
+    if (out_regs[c_arg].first()->is_Register()) {
+      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
+      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
+    }
+#endif /* ASSERT */
+    switch (in_sig_bt[i]) {
+      case T_ARRAY:
+        if (is_critical_native) {
+          Unimplemented();
+          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
+          c_arg++;
+#ifdef ASSERT
+          if (out_regs[c_arg].first()->is_Register()) {
+            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
+          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
+            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
+          }
+#endif
+          break;
+        }
+      case T_OBJECT:
+        assert(!is_critical_native, "no oop arguments");
+        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
+                    ((i == 0) && (!is_static)),
+                    &receiver_offset);
+        break;
+      case T_VOID:
+        break;
+
+      case T_FLOAT:
+        float_move(masm, in_regs[i], out_regs[c_arg]);
+          break;
+
+      case T_DOUBLE:
+        assert( i + 1 < total_in_args &&
+                in_sig_bt[i + 1] == T_VOID &&
+                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
+        double_move(masm, in_regs[i], out_regs[c_arg]);
+        break;
+
+      case T_LONG :
+        long_move(masm, in_regs[i], out_regs[c_arg]);
+        break;
+
+      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
+
+      default:
+        simple_move32(masm, in_regs[i], out_regs[c_arg]);
+    }
+  }
+
+  // point c_arg at the first arg that is already loaded in case we
+  // need to spill before we call out
+  c_arg = total_c_args - total_in_args;
+  // Pre-load a static method's oop.  Used both by locking code and
+  // the normal JNI call code.
+
+  __ move(oop_handle_reg, A1);
+
+  if (method->is_static() && !is_critical_native) {
+
+    //  load opp into a register
+    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
+          (method->method_holder())->java_mirror()));
+
+
+    RelocationHolder rspec = oop_Relocation::spec(oop_index);
+    __ relocate(rspec);
+    __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
+    // Now handlize the static class mirror it's known not-null.
+    __ sd( oop_handle_reg, SP, klass_offset);
+    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
+
+    // Now get the handle
+    __ lea(oop_handle_reg, Address(SP, klass_offset));
+    // store the klass handle as second argument
+    __ move(A1, oop_handle_reg);
+    // and protect the arg if we must spill
+    c_arg--;
+  }
+
+  // Change state to native (we save the return address in the thread, since it might not
+  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
+  // points into the right code segment. It does not have to be the correct return pc.
+  // We use the same pc/oopMap repeatedly when we call out
+
+  intptr_t the_pc = (intptr_t) __ pc();
+  oop_maps->add_gc_map(the_pc - start, map);
+
+  __ set_last_Java_frame(SP, noreg, NULL);
+  __ relocate(relocInfo::internal_pc_type);
+  {
+    intptr_t save_pc = (intptr_t)the_pc ;
+    __ patchable_set48(AT, save_pc);
+  }
+  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
+
+
+  // We have all of the arguments setup at this point. We must not touch any register
+  // argument registers at this point (what if we save/restore them there are no oop?
+  {
+    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
+    save_args(masm, total_c_args, c_arg, out_regs);
+    int metadata_index = __ oop_recorder()->find_index(method());
+    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
+    __ relocate(rspec);
+    __ patchable_set48(AT, (long)(method()));
+
+    __ call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+      thread, AT);
+
+    restore_args(masm, total_c_args, c_arg, out_regs);
+  }
+
+  // These are register definitions we need for locking/unlocking
+  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
+  const Register obj_reg  = T9;  // Will contain the oop
+  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
+  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
+
+
+
+  Label slow_path_lock;
+  Label lock_done;
+
+  // Lock a synchronized method
+  if (method->is_synchronized()) {
+    assert(!is_critical_native, "unhandled");
+
+    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
+
+    // Get the handle (the 2nd argument)
+    __ move(oop_handle_reg, A1);
+
+    // Get address of the box
+    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
+
+    // Load the oop from the handle
+    __ ld(obj_reg, oop_handle_reg, 0);
+
+    if (UseBiasedLocking) {
+      // Note that oop_handle_reg is trashed during this call
+      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
+    }
+
+    // Load immediate 1 into swap_reg %T8
+    __ move(swap_reg, 1);
+
+    __ ld(AT, obj_reg, 0);
+    __ orr(swap_reg, swap_reg, AT);
+
+    __ sd( swap_reg, lock_reg, mark_word_offset);
+    __ cmpxchg(lock_reg, Address(obj_reg, 0), swap_reg);
+    __ bne(AT, R0, lock_done);
+    __ delayed()->nop();
+    // Test if the oopMark is an obvious stack pointer, i.e.,
+    //  1) (mark & 3) == 0, and
+    //  2) sp <= mark < mark + os::pagesize()
+    // These 3 tests can be done by evaluating the following
+    // expression: ((mark - sp) & (3 - os::vm_page_size())),
+    // assuming both stack pointer and pagesize have their
+    // least significant 2 bits clear.
+    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
+
+    __ dsubu(swap_reg, swap_reg, SP);
+    __ move(AT, 3 - os::vm_page_size());
+    __ andr(swap_reg , swap_reg, AT);
+    // Save the test result, for recursive case, the result is zero
+    __ sd(swap_reg, lock_reg, mark_word_offset);
+    __ bne(swap_reg, R0, slow_path_lock);
+    __ delayed()->nop();
+    // Slow path will re-enter here
+    __ bind(lock_done);
+
+    if (UseBiasedLocking) {
+      // Re-fetch oop_handle_reg as we trashed it above
+      __ move(A1, oop_handle_reg);
+    }
+  }
+
+
+  // Finally just about ready to make the JNI call
+
+
+  // get JNIEnv* which is first argument to native
+  if (!is_critical_native) {
+    __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
+  }
+
+  // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
+  // Load the second arguments into A1
+  //__ ld(A1, SP , wordSize );   // klass
+
+  // Now set thread in native
+  __ addiu(AT, R0, _thread_in_native);
+  if(os::is_MP()) {
+    __ sync(); // store release
+  }
+  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
+  // do the call
+  __ call(native_func, relocInfo::runtime_call_type);
+  __ delayed()->nop();
+  // WARNING - on Windows Java Natives use pascal calling convention and pop the
+  // arguments off of the stack. We could just re-adjust the stack pointer here
+  // and continue to do SP relative addressing but we instead switch to FP
+  // relative addressing.
+
+  // Unpack native results.
+  switch (ret_type) {
+  case T_BOOLEAN: __ c2bool(V0);            break;
+  case T_CHAR   : __ andi(V0, V0, 0xFFFF);      break;
+  case T_BYTE   : __ sign_extend_byte (V0); break;
+  case T_SHORT  : __ sign_extend_short(V0); break;
+  case T_INT    : // nothing to do         break;
+  case T_DOUBLE :
+  case T_FLOAT  :
+  // Result is in st0 we'll save as needed
+  break;
+  case T_ARRAY:                 // Really a handle
+  case T_OBJECT:                // Really a handle
+  break; // can't de-handlize until after safepoint check
+  case T_VOID: break;
+  case T_LONG: break;
+  default       : ShouldNotReachHere();
+  }
+  // Switch thread to "native transition" state before reading the synchronization state.
+  // This additional state is necessary because reading and testing the synchronization
+  // state is not atomic w.r.t. GC, as this scenario demonstrates:
+  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
+  //     VM thread changes sync state to synchronizing and suspends threads for GC.
+  //     Thread A is resumed to finish this native method, but doesn't block here since it
+  //     didn't see any synchronization is progress, and escapes.
+  __ addiu(AT, R0, _thread_in_native_trans);
+  if(os::is_MP()) {
+    __ sync(); // store release
+  }
+  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
+
+  if(os::is_MP()) {
+    if (UseMembar) {
+      // Force this write out before the read below
+      __ sync();
+    } else {
+      // Write serialization page so VM thread can do a pseudo remote membar.
+      // We use the current thread pointer to calculate a thread specific
+      // offset to write to within the page. This minimizes bus traffic
+      // due to cache line collision.
+      __ serialize_memory(thread, A0);
+    }
+  }
+
+  Label after_transition;
+
+  // check for safepoint operation in progress and/or pending suspend requests
+  {
+    Label Continue;
+    __ li(AT, SafepointSynchronize::address_of_state());
+    __ lw(A0, AT, 0);
+    __ addiu(AT, A0, -SafepointSynchronize::_not_synchronized);
+    Label L;
+    __ bne(AT, R0, L);
+    __ delayed()->nop();
+    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
+    __ beq(AT, R0, Continue);
+    __ delayed()->nop();
+    __ bind(L);
+
+    // Don't use call_VM as it will see a possible pending exception and forward it
+    // and never return here preventing us from clearing _last_native_pc down below.
+    //
+    save_native_result(masm, ret_type, stack_slots);
+    __ move(A0, thread);
+    __ addiu(SP, SP, -wordSize);
+    __ push(S2);
+    __ move(AT, -(StackAlignmentInBytes));
+    __ move(S2, SP);     // use S2 as a sender SP holder
+    __ andr(SP, SP, AT); // align stack as required by ABI
+    if (!is_critical_native) {
+      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
+      __ delayed()->nop();
+    } else {
+      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
+      __ delayed()->nop();
+    }
+    __ move(SP, S2);     // use S2 as a sender SP holder
+    __ pop(S2);
+    __ addiu(SP, SP, wordSize);
+    //add for compressedoops
+    __ reinit_heapbase();
+    // Restore any method result value
+    restore_native_result(masm, ret_type, stack_slots);
+
+    if (is_critical_native) {
+      // The call above performed the transition to thread_in_Java so
+      // skip the transition logic below.
+      __ beq(R0, R0, after_transition);
+      __ delayed()->nop();
+    }
+
+    __ bind(Continue);
+  }
+
+  // change thread state
+  __ addiu(AT, R0, _thread_in_Java);
+  if(os::is_MP()) {
+    __ sync(); // store release
+  }
+  __ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
+  __ bind(after_transition);
+  Label reguard;
+  Label reguard_done;
+  __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
+  __ addiu(AT, AT, -JavaThread::stack_guard_yellow_disabled);
+  __ beq(AT, R0, reguard);
+  __ delayed()->nop();
+  // slow path reguard  re-enters here
+  __ bind(reguard_done);
+
+  // Handle possible exception (will unlock if necessary)
+
+  // native result if any is live
+
+  // Unlock
+  Label slow_path_unlock;
+  Label unlock_done;
+  if (method->is_synchronized()) {
+
+    Label done;
+
+    // Get locked oop from the handle we passed to jni
+    __ ld( obj_reg, oop_handle_reg, 0);
+    if (UseBiasedLocking) {
+      __ biased_locking_exit(obj_reg, T8, done);
+
+    }
+
+    // Simple recursive lock?
+
+    __ ld(AT, FP, lock_slot_fp_offset);
+    __ beq(AT, R0, done);
+    __ delayed()->nop();
+    // Must save FSF if if it is live now because cmpxchg must use it
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+
+    //  get old displaced header
+    __ ld (T8, FP, lock_slot_fp_offset);
+    // get address of the stack lock
+    __ addiu(c_rarg0, FP, lock_slot_fp_offset);
+    // Atomic swap old header if oop still contains the stack lock
+    __ cmpxchg(T8, Address(obj_reg, 0), c_rarg0);
+
+    __ beq(AT, R0, slow_path_unlock);
+    __ delayed()->nop();
+    // slow path re-enters here
+    __ bind(unlock_done);
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+
+    __ bind(done);
+
+  }
+  {
+    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
+    // Tell dtrace about this method exit
+    save_native_result(masm, ret_type, stack_slots);
+    int metadata_index = __ oop_recorder()->find_index( (method()));
+    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
+    __ relocate(rspec);
+    __ patchable_set48(AT, (long)(method()));
+
+    __ call_VM_leaf(
+         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
+         thread, AT);
+    restore_native_result(masm, ret_type, stack_slots);
+  }
+
+  // We can finally stop using that last_Java_frame we setup ages ago
+
+  __ reset_last_Java_frame(false);
+
+  // Unpack oop result, e.g. JNIHandles::resolve value.
+  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+    __ resolve_jobject(V0, thread, T9);
+  }
+
+  if (!is_critical_native) {
+    // reset handle block
+    __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
+    __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
+  }
+
+  if (!is_critical_native) {
+    // Any exception pending?
+    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
+    __ bne(AT, R0, exception_pending);
+    __ delayed()->nop();
+  }
+  // no exception, we're almost done
+
+  // check that only result value is on FPU stack
+  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
+
+  // Return
+#ifndef OPT_THREAD
+  __ get_thread(TREG);
+#endif
+  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
+  __ leave();
+
+  __ jr(RA);
+  __ delayed()->nop();
+  // Unexpected paths are out of line and go here
+  // Slow path locking & unlocking
+  if (method->is_synchronized()) {
+
+    // BEGIN Slow path lock
+    __ bind(slow_path_lock);
+
+    // protect the args we've loaded
+    save_args(masm, total_c_args, c_arg, out_regs);
+
+    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
+
+    __ move(A0, obj_reg);
+    __ move(A1, lock_reg);
+    __ move(A2, thread);
+    __ addiu(SP, SP, - 3*wordSize);
+
+    __ move(AT, -(StackAlignmentInBytes));
+    __ move(S2, SP);     // use S2 as a sender SP holder
+    __ andr(SP, SP, AT); // align stack as required by ABI
+
+    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
+    __ delayed()->nop();
+                __ move(SP, S2);
+    __ addiu(SP, SP, 3*wordSize);
+
+    restore_args(masm, total_c_args, c_arg, out_regs);
+
+#ifdef ASSERT
+    { Label L;
+      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
+      __ beq(AT, R0, L);
+      __ delayed()->nop();
+      __ stop("no pending exception allowed on exit from monitorenter");
+      __ bind(L);
+    }
+#endif
+    __ b(lock_done);
+    __ delayed()->nop();
+    // END Slow path lock
+
+    // BEGIN Slow path unlock
+    __ bind(slow_path_unlock);
+
+    // Slow path unlock
+
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
+
+    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
+    __ push(AT);
+    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
+
+                __ move(AT, -(StackAlignmentInBytes));
+                __ move(S2, SP);     // use S2 as a sender SP holder
+                __ andr(SP, SP, AT); // align stack as required by ABI
+
+    // should be a peal
+    // +wordSize because of the push above
+    __ addiu(A1, FP, lock_slot_fp_offset);
+
+    __ move(A0, obj_reg);
+    __ addiu(SP,SP, -2*wordSize);
+    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
+        relocInfo::runtime_call_type);
+    __ delayed()->nop();
+    __ addiu(SP, SP, 2*wordSize);
+                __ move(SP, S2);
+    //add for compressedoops
+    __ reinit_heapbase();
+#ifdef ASSERT
+    {
+      Label L;
+      __ ld( AT, thread, in_bytes(Thread::pending_exception_offset()));
+      __ beq(AT, R0, L);
+      __ delayed()->nop();
+      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
+      __ bind(L);
+    }
+#endif /* ASSERT */
+
+    __ pop(AT);
+    __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+    __ b(unlock_done);
+    __ delayed()->nop();
+    // END Slow path unlock
+
+  }
+
+  // SLOW PATH Reguard the stack if needed
+
+  __ bind(reguard);
+  save_native_result(masm, ret_type, stack_slots);
+  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
+      relocInfo::runtime_call_type);
+  __ delayed()->nop();
+  //add for compressedoops
+  __ reinit_heapbase();
+  restore_native_result(masm, ret_type, stack_slots);
+  __ b(reguard_done);
+  __ delayed()->nop();
+
+  // BEGIN EXCEPTION PROCESSING
+  if (!is_critical_native) {
+    // Forward  the exception
+    __ bind(exception_pending);
+
+    // remove possible return value from FPU register stack
+    __ empty_FPU_stack();
+
+    // pop our frame
+    //forward_exception_entry need return address on stack
+    __ move(SP, FP);
+    __ pop(FP);
+
+    // and forward the exception
+    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+    __ delayed()->nop();
+  }
+  __ flush();
+
+  nmethod *nm = nmethod::new_native_nmethod(method,
+                                            compile_id,
+                                            masm->code(),
+                                            vep_offset,
+                                            frame_complete,
+                                            stack_slots / VMRegImpl::slots_per_word,
+                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
+                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
+                                            oop_maps);
+
+  if (is_critical_native) {
+    nm->set_lazy_critical_native(true);
+  }
+
+  return nm;
+
+}
+
+#ifdef HAVE_DTRACE_H
+// ---------------------------------------------------------------------------
+// Generate a dtrace nmethod for a given signature.  The method takes arguments
+// in the Java compiled code convention, marshals them to the native
+// abi and then leaves nops at the position you would expect to call a native
+// function. When the probe is enabled the nops are replaced with a trap
+// instruction that dtrace inserts and the trace will cause a notification
+// to dtrace.
+//
+// The probes are only able to take primitive types and java/lang/String as
+// arguments.  No other java types are allowed. Strings are converted to utf8
+// strings so that from dtrace point of view java strings are converted to C
+// strings. There is an arbitrary fixed limit on the total space that a method
+// can use for converting the strings. (256 chars per string in the signature).
+// So any java string larger then this is truncated.
+
+static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
+static bool offsets_initialized = false;
+
+static VMRegPair reg64_to_VMRegPair(Register r) {
+  VMRegPair ret;
+  if (wordSize == 8) {
+    ret.set2(r->as_VMReg());
+  } else {
+    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
+  }
+  return ret;
+}
+
+
+nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
+                                                methodHandle method) {
+
+
+  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
+  // be single threaded in this method.
+  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
+
+  // Fill in the signature array, for the calling-convention call.
+  int total_args_passed = method->size_of_parameters();
+
+  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
+  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
+
+  // The signature we are going to use for the trap that dtrace will see
+  // java/lang/String is converted. We drop "this" and any other object
+  // is converted to NULL.  (A one-slot java/lang/Long object reference
+  // is converted to a two-slot long, which is why we double the allocation).
+  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
+  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
+
+  int i=0;
+  int total_strings = 0;
+  int first_arg_to_pass = 0;
+  int total_c_args = 0;
+
+  // Skip the receiver as dtrace doesn't want to see it
+  if( !method->is_static() ) {
+    in_sig_bt[i++] = T_OBJECT;
+    first_arg_to_pass = 1;
+  }
+
+  SignatureStream ss(method->signature());
+  for ( ; !ss.at_return_type(); ss.next()) {
+    BasicType bt = ss.type();
+    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
+    out_sig_bt[total_c_args++] = bt;
+    if( bt == T_OBJECT) {
+      symbolOop s = ss.as_symbol_or_null();
+      if (s == vmSymbols::java_lang_String()) {
+        total_strings++;
+        out_sig_bt[total_c_args-1] = T_ADDRESS;
+      } else if (s == vmSymbols::java_lang_Boolean() ||
+                 s == vmSymbols::java_lang_Byte()) {
+        out_sig_bt[total_c_args-1] = T_BYTE;
+      } else if (s == vmSymbols::java_lang_Character() ||
+                 s == vmSymbols::java_lang_Short()) {
+        out_sig_bt[total_c_args-1] = T_SHORT;
+      } else if (s == vmSymbols::java_lang_Integer() ||
+                 s == vmSymbols::java_lang_Float()) {
+        out_sig_bt[total_c_args-1] = T_INT;
+      } else if (s == vmSymbols::java_lang_Long() ||
+                 s == vmSymbols::java_lang_Double()) {
+        out_sig_bt[total_c_args-1] = T_LONG;
+        out_sig_bt[total_c_args++] = T_VOID;
+      }
+    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
+      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
+      // We convert double to long
+      out_sig_bt[total_c_args-1] = T_LONG;
+      out_sig_bt[total_c_args++] = T_VOID;
+    } else if ( bt == T_FLOAT) {
+      // We convert float to int
+      out_sig_bt[total_c_args-1] = T_INT;
+    }
+  }
+
+  assert(i==total_args_passed, "validly parsed signature");
+
+  // Now get the compiled-Java layout as input arguments
+  int comp_args_on_stack;
+  comp_args_on_stack = SharedRuntime::java_calling_convention(
+      in_sig_bt, in_regs, total_args_passed, false);
+
+  // We have received a description of where all the java arg are located
+  // on entry to the wrapper. We need to convert these args to where
+  // the a  native (non-jni) function would expect them. To figure out
+  // where they go we convert the java signature to a C signature and remove
+  // T_VOID for any long/double we might have received.
+
+
+  // Now figure out where the args must be stored and how much stack space
+  // they require (neglecting out_preserve_stack_slots but space for storing
+  // the 1st six register arguments). It's weird see int_stk_helper.
+
+  int out_arg_slots;
+  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
+
+  // Calculate the total number of stack slots we will need.
+
+  // First count the abi requirement plus all of the outgoing args
+  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
+
+  // Plus a temp for possible converion of float/double/long register args
+
+  int conversion_temp = stack_slots;
+  stack_slots += 2;
+
+
+  // Now space for the string(s) we must convert
+
+  int string_locs = stack_slots;
+  stack_slots += total_strings *
+                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
+
+  // Ok The space we have allocated will look like:
+  //
+  //
+  // FP-> |                     |
+  //      |---------------------|
+  //      | string[n]           |
+  //      |---------------------| <- string_locs[n]
+  //      | string[n-1]         |
+  //      |---------------------| <- string_locs[n-1]
+  //      | ...                 |
+  //      | ...                 |
+  //      |---------------------| <- string_locs[1]
+  //      | string[0]           |
+  //      |---------------------| <- string_locs[0]
+  //      | temp                |
+  //      |---------------------| <- conversion_temp
+  //      | outbound memory     |
+  //      | based arguments     |
+  //      |                     |
+  //      |---------------------|
+  //      |                     |
+  // SP-> | out_preserved_slots |
+  //
+  //
+
+  // Now compute actual number of stack words we need rounding to make
+  // stack properly aligned.
+  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
+
+  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
+
+  intptr_t start = (intptr_t)__ pc();
+
+  // First thing make an ic check to see if we should even be here
+
+  {
+    Label L;
+    const Register temp_reg = G3_scratch;
+    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
+    __ verify_oop(O0);
+    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
+    __ cmp(temp_reg, G5_inline_cache_reg);
+    __ brx(Assembler::equal, true, Assembler::pt, L);
+    __ delayed()->nop();
+
+    __ jump_to(ic_miss, 0);
+    __ delayed()->nop();
+    __ align(CodeEntryAlignment);
+    __ bind(L);
+  }
+
+  int vep_offset = ((intptr_t)__ pc()) - start;
+
+  // Make enough room for patch_verified_entry
+  __ nop();
+  __ nop();
+
+  // Generate stack overflow check before creating frame
+  __ generate_stack_overflow_check(stack_size);
+
+  // Generate a new frame for the wrapper.
+  __ save(SP, -stack_size, SP);
+
+  // Frame is now completed as far a size and linkage.
+
+  int frame_complete = ((intptr_t)__ pc()) - start;
+
+#ifdef ASSERT
+  bool reg_destroyed[RegisterImpl::number_of_registers];
+  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
+  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
+    reg_destroyed[r] = false;
+  }
+  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
+    freg_destroyed[f] = false;
+  }
+
+#endif /* ASSERT */
+
+  VMRegPair zero;
+  const Register g0 = G0; // without this we get a compiler warning (why??)
+  zero.set2(g0->as_VMReg());
+
+  int c_arg, j_arg;
+
+  Register conversion_off = noreg;
+
+  for (j_arg = first_arg_to_pass, c_arg = 0 ;
+       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
+
+    VMRegPair src = in_regs[j_arg];
+    VMRegPair dst = out_regs[c_arg];
+
+#ifdef ASSERT
+    if (src.first()->is_Register()) {
+      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
+    } else if (src.first()->is_FloatRegister()) {
+      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
+                                               FloatRegisterImpl::S)], "ack!");
+    }
+    if (dst.first()->is_Register()) {
+      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
+    } else if (dst.first()->is_FloatRegister()) {
+      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
+                                                 FloatRegisterImpl::S)] = true;
+    }
+#endif /* ASSERT */
+
+    switch (in_sig_bt[j_arg]) {
+      case T_ARRAY:
+      case T_OBJECT:
+        {
+          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
+              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
+            // need to unbox a one-slot value
+            Register in_reg = L0;
+            Register tmp = L2;
+            if ( src.first()->is_reg() ) {
+              in_reg = src.first()->as_Register();
+            } else {
+              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
+                     "must be");
+              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
+            }
+            // If the final destination is an acceptable register
+            if ( dst.first()->is_reg() ) {
+              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
+                tmp = dst.first()->as_Register();
+              }
+            }
+
+            Label skipUnbox;
+            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
+              __ mov(G0, tmp->successor());
+            }
+            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
+            __ delayed()->mov(G0, tmp);
+
+            BasicType bt = out_sig_bt[c_arg];
+            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
+            switch (bt) {
+                case T_BYTE:
+                  __ ldub(in_reg, box_offset, tmp); break;
+                case T_SHORT:
+                  __ lduh(in_reg, box_offset, tmp); break;
+                case T_INT:
+                  __ ld(in_reg, box_offset, tmp); break;
+                case T_LONG:
+                  __ ld_long(in_reg, box_offset, tmp); break;
+                default: ShouldNotReachHere();
+            }
+
+            __ bind(skipUnbox);
+            // If tmp wasn't final destination copy to final destination
+            if (tmp == L2) {
+              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
+              if (out_sig_bt[c_arg] == T_LONG) {
+                long_move(masm, tmp_as_VM, dst);
+              } else {
+                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
+              }
+            }
+            if (out_sig_bt[c_arg] == T_LONG) {
+              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
+              ++c_arg; // move over the T_VOID to keep the loop indices in sync
+            }
+          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
+            Register s =
+                src.first()->is_reg() ? src.first()->as_Register() : L2;
+            Register d =
+                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
+
+            // We store the oop now so that the conversion pass can reach
+            // while in the inner frame. This will be the only store if
+            // the oop is NULL.
+            if (s != L2) {
+              // src is register
+              if (d != L2) {
+                // dst is register
+                __ mov(s, d);
+              } else {
+                assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                          STACK_BIAS), "must be");
+                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
+              }
+            } else {
+                // src not a register
+                assert(Assembler::is_simm13(reg2offset(src.first()) +
+                           STACK_BIAS), "must be");
+                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
+                if (d == L2) {
+                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                             STACK_BIAS), "must be");
+                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
+                }
+            }
+          } else if (out_sig_bt[c_arg] != T_VOID) {
+            // Convert the arg to NULL
+            if (dst.first()->is_reg()) {
+              __ mov(G0, dst.first()->as_Register());
+            } else {
+              assert(Assembler::is_simm13(reg2offset(dst.first()) +
+                         STACK_BIAS), "must be");
+              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
+            }
+          }
+        }
+        break;
+      case T_VOID:
+        break;
+
+      case T_FLOAT:
+        if (src.first()->is_stack()) {
+          // Stack to stack/reg is simple
+          move32_64(masm, src, dst);
+        } else {
+          if (dst.first()->is_reg()) {
+            // freg -> reg
+            int off =
+              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+            Register d = dst.first()->as_Register();
+            if (Assembler::is_simm13(off)) {
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, off);
+              __ ld(SP, off, d);
+            } else {
+              if (conversion_off == noreg) {
+                __ set(off, L6);
+                conversion_off = L6;
+              }
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, conversion_off);
+              __ ld(SP, conversion_off , d);
+            }
+          } else {
+            // freg -> mem
+            int off = STACK_BIAS + reg2offset(dst.first());
+            if (Assembler::is_simm13(off)) {
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, off);
+            } else {
+              if (conversion_off == noreg) {
+                __ set(off, L6);
+                conversion_off = L6;
+              }
+              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
+                     SP, conversion_off);
+            }
+          }
+        }
+        break;
+
+      case T_DOUBLE:
+        assert( j_arg + 1 < total_args_passed &&
+                in_sig_bt[j_arg + 1] == T_VOID &&
+                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
+        if (src.first()->is_stack()) {
+          // Stack to stack/reg is simple
+          long_move(masm, src, dst);
+        } else {
+          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
+
+          // Destination could be an odd reg on 32bit in which case
+          // we can't load direct to the destination.
+
+          if (!d->is_even() && wordSize == 4) {
+            d = L2;
+          }
+          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+          if (Assembler::is_simm13(off)) {
+            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
+                   SP, off);
+            __ ld_long(SP, off, d);
+          } else {
+            if (conversion_off == noreg) {
+              __ set(off, L6);
+              conversion_off = L6;
+            }
+            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
+                   SP, conversion_off);
+            __ ld_long(SP, conversion_off, d);
+          }
+          if (d == L2) {
+            long_move(masm, reg64_to_VMRegPair(L2), dst);
+          }
+        }
+        break;
+
+      case T_LONG :
+        // 32bit can't do a split move of something like g1 -> O0, O1
+        // so use a memory temp
+        if (src.is_single_phys_reg() && wordSize == 4) {
+          Register tmp = L2;
+          if (dst.first()->is_reg() &&
+              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
+            tmp = dst.first()->as_Register();
+          }
+
+          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
+          if (Assembler::is_simm13(off)) {
+            __ stx(src.first()->as_Register(), SP, off);
+            __ ld_long(SP, off, tmp);
+          } else {
+            if (conversion_off == noreg) {
+              __ set(off, L6);
+              conversion_off = L6;
+            }
+            __ stx(src.first()->as_Register(), SP, conversion_off);
+            __ ld_long(SP, conversion_off, tmp);
+          }
+
+          if (tmp == L2) {
+            long_move(masm, reg64_to_VMRegPair(L2), dst);
+          }
+        } else {
+          long_move(masm, src, dst);
+        }
+        break;
+
+      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
+
+      default:
+        move32_64(masm, src, dst);
+    }
+  }
+
+
+  // If we have any strings we must store any register based arg to the stack
+  // This includes any still live xmm registers too.
+
+  if (total_strings > 0 ) {
+
+    // protect all the arg registers
+    __ save_frame(0);
+    __ mov(G2_thread, L7_thread_cache);
+    const Register L2_string_off = L2;
+
+    // Get first string offset
+    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
+
+    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
+      if (out_sig_bt[c_arg] == T_ADDRESS) {
+
+        VMRegPair dst = out_regs[c_arg];
+        const Register d = dst.first()->is_reg() ?
+            dst.first()->as_Register()->after_save() : noreg;
+
+        // It's a string the oop and it was already copied to the out arg
+        // position
+        if (d != noreg) {
+          __ mov(d, O0);
+        } else {
+          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
+                 "must be");
+          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
+        }
+        Label skip;
+
+        __ br_null(O0, false, Assembler::pn, skip);
+        __ delayed()->addu(FP, L2_string_off, O1);
+
+        if (d != noreg) {
+          __ mov(O1, d);
+        } else {
+          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
+                 "must be");
+          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
+        }
+
+        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
+                relocInfo::runtime_call_type);
+        __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off);
+
+        __ bind(skip);
+
+      }
+
+    }
+    __ mov(L7_thread_cache, G2_thread);
+    __ restore();
+
+  }
+
+
+  // Ok now we are done. Need to place the nop that dtrace wants in order to
+  // patch in the trap
+
+  int patch_offset = ((intptr_t)__ pc()) - start;
+
+  __ nop();
+
+
+  // Return
+
+  __ ret();
+  __ delayed()->restore();
+
+  __ flush();
+
+  nmethod *nm = nmethod::new_dtrace_nmethod(
+      method, masm->code(), vep_offset, patch_offset, frame_complete,
+      stack_slots / VMRegImpl::slots_per_word);
+  return nm;
+
+}
+
+#endif // HAVE_DTRACE_H
+
+// this function returns the adjust size (in number of words) to a c2i adapter
+// activation for use during deoptimization
+int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
+  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
+}
+
+// "Top of Stack" slots that may be unused by the calling convention but must
+// otherwise be preserved.
+// On Intel these are not necessary and the value can be zero.
+// On Sparc this describes the words reserved for storing a register window
+// when an interrupt occurs.
+uint SharedRuntime::out_preserve_stack_slots() {
+   return 0;
+}
+
+//------------------------------generate_deopt_blob----------------------------
+// Ought to generate an ideal graph & compile, but here's some SPARC ASM
+// instead.
+void SharedRuntime::generate_deopt_blob() {
+  // allocate space for the code
+  ResourceMark rm;
+  // setup code generation tools
+  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
+  CodeBuffer     buffer ("deopt_blob", 8000, 2048);
+  MacroAssembler* masm  = new MacroAssembler( & buffer);
+  int frame_size_in_words;
+  OopMap* map = NULL;
+  // Account for the extra args we place on the stack
+  // by the time we call fetch_unroll_info
+  const int additional_words = 2; // deopt kind, thread
+
+  OopMapSet *oop_maps = new OopMapSet();
+
+  address start = __ pc();
+  Label cont;
+  // we use S3 for DeOpt reason register
+  Register reason = S3;
+  // use S6 for thread register
+  Register thread = TREG;
+  // use S7 for fetch_unroll_info returned UnrollBlock
+  Register unroll = S7;
+  // Prolog for non exception case!
+  // Correct the return address we were given.
+  //FIXME, return address is on the tos or Ra?
+  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
+  // Save everything in sight.
+  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
+  // Normal deoptimization
+  __ move(reason, Deoptimization::Unpack_deopt);
+  __ b(cont);
+  __ delayed()->nop();
+
+  int reexecute_offset = __ pc() - start;
+
+  // Reexecute case
+  // return address is the pc describes what bci to do re-execute at
+
+  // No need to update map as each call to save_live_registers will produce identical oopmap
+  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
+  __ move(reason, Deoptimization::Unpack_reexecute);
+  __ b(cont);
+  __ delayed()->nop();
+
+  int   exception_offset = __ pc() - start;
+  // Prolog for exception case
+
+  // all registers are dead at this entry point, except for V0 and
+  // V1 which contain the exception oop and exception pc
+  // respectively.  Set them in TLS and fall thru to the
+  // unpack_with_exception_in_tls entry point.
+
+  __ get_thread(thread);
+  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
+  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
+  int exception_in_tls_offset = __ pc() - start;
+  // new implementation because exception oop is now passed in JavaThread
+
+  // Prolog for exception case
+  // All registers must be preserved because they might be used by LinearScan
+  // Exceptiop oop and throwing PC are passed in JavaThread
+  // tos: stack at point of call to method that threw the exception (i.e. only
+  // args are on the stack, no return address)
+
+  // Return address will be patched later with the throwing pc. The correct value is not
+  // available now because loading it from memory would destroy registers.
+  // Save everything in sight.
+  // No need to update map as each call to save_live_registers will produce identical oopmap
+  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
+  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
+
+  // Now it is safe to overwrite any register
+  // store the correct deoptimization type
+  __ move(reason, Deoptimization::Unpack_exception);
+  // load throwing pc from JavaThread and patch it as the return address
+  // of the current frame. Then clear the field in JavaThread
+  __ get_thread(thread);
+  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
+  __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
+  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
+
+
+#ifdef ASSERT
+  // verify that there is really an exception oop in JavaThread
+  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
+  __ verify_oop(AT);
+  // verify that there is no pending exception
+  Label no_pending_exception;
+  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
+  __ beq(AT, R0, no_pending_exception);
+  __ delayed()->nop();
+  __ stop("must not have pending exception here");
+  __ bind(no_pending_exception);
+#endif
+  __ bind(cont);
+  // Compiled code leaves the floating point stack dirty, empty it.
+  __ empty_FPU_stack();
+
+
+  // Call C code.  Need thread and this frame, but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+  __ move(A0, thread);
+  __ addiu(SP, SP, -additional_words  * wordSize);
+
+  __ set_last_Java_frame(NOREG, NOREG, NULL);
+
+  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
+  // this call, no GC can happen.  Call should capture return values.
+
+  __ relocate(relocInfo::internal_pc_type);
+  {
+    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
+    __ patchable_set48(AT, save_pc);
+  }
+  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
+
+  __ call((address)Deoptimization::fetch_unroll_info);
+  //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
+  __ delayed()->nop();
+  oop_maps->add_gc_map(__ pc() - start, map);
+  __ addiu(SP, SP, additional_words * wordSize);
+  __ get_thread(thread);
+  __ reset_last_Java_frame(false);
+
+  // Load UnrollBlock into S7
+  __ move(unroll, V0);
+
+
+  // Move the unpack kind to a safe place in the UnrollBlock because
+  // we are very short of registers
+
+  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
+  __ sw(reason, unpack_kind);
+  // save the unpack_kind value
+  // Retrieve the possible live values (return values)
+  // All callee save registers representing jvm state
+  // are now in the vframeArray.
+
+  Label noException;
+  __ move(AT, Deoptimization::Unpack_exception);
+  __ bne(AT, reason, noException);// Was exception pending?
+  __ delayed()->nop();
+  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
+  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
+  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
+  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
+
+  __ verify_oop(V0);
+
+  // Overwrite the result registers with the exception results.
+  __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
+  __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
+
+  __ bind(noException);
+
+
+  // Stack is back to only having register save data on the stack.
+  // Now restore the result registers. Everything else is either dead or captured
+  // in the vframeArray.
+
+  RegisterSaver::restore_result_registers(masm);
+  // All of the register save area has been popped of the stack. Only the
+  // return address remains.
+  // Pop all the frames we must move/replace.
+  // Frame picture (youngest to oldest)
+  // 1: self-frame (no frame link)
+  // 2: deopting frame  (no frame link)
+  // 3: caller of deopting frame (could be compiled/interpreted).
+  //
+  // Note: by leaving the return address of self-frame on the stack
+  // and using the size of frame 2 to adjust the stack
+  // when we are done the return to frame 3 will still be on the stack.
+
+  // register for the sender's sp
+  Register sender_sp = Rsender;
+  // register for frame pcs
+  Register pcs = T0;
+  // register for frame sizes
+  Register sizes = T1;
+  // register for frame count
+  Register count = T3;
+
+  // Pop deoptimized frame
+  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
+  __ addu(SP, SP, AT);
+  // sp should be pointing at the return address to the caller (3)
+
+  // Load array of frame pcs into pcs
+  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
+  __ addiu(SP, SP, wordSize);  // trash the old pc
+  // Load array of frame sizes into T6
+  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
+
+
+
+  // Load count of frams into T3
+  __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
+  // Pick up the initial fp we should save
+  __ ld(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
+   // Now adjust the caller's stack to make up for the extra locals
+  // but record the original sp so that we can save it in the skeletal interpreter
+  // frame and the stack walking of interpreter_sender will get the unextended sp
+  // value and not the "real" sp value.
+  __ move(sender_sp, SP);
+  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
+  __ subu(SP, SP, AT);
+
+  // Push interpreter frames in a loop
+  //
+  //Loop:
+  //   0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]  <--- error lw->ld
+  //   0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
+  //   0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16
+  //   0x000000555bd82d24: daddiu sp, sp, 0xfffffff0
+  //   0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
+  //   0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
+  //   0x000000555bd82d30: daddu fp, sp, zero        ; fp <- sp
+  //   0x000000555bd82d34: dsubu sp, sp, t2          ; sp -= t2
+  //   0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  //   0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
+  //   0x000000555bd82d40: daddu s4, sp, zero        ; move(sender_sp, SP);
+  //   0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count --
+  //   0x000000555bd82d48: daddiu t1, t1, 0x4        ; sizes += 4
+  //   0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
+  //   0x000000555bd82d50: daddiu t0, t0, 0x4        ; <--- error    t0 += 8
+  //
+  // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
+  Label loop;
+  __ bind(loop);
+  __ ld(T2, sizes, 0);    // Load frame size
+  __ ld_ptr(AT, pcs, 0);           // save return address
+  __ addiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
+  __ push2(AT, FP);
+  __ move(FP, SP);
+  __ subu(SP, SP, T2);       // Prolog!
+  // This value is corrected by layout_activation_impl
+  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
+  __ move(sender_sp, SP);  // pass to next frame
+  __ addiu(count, count, -1);   // decrement counter
+  __ addiu(sizes, sizes, wordSize);   // Bump array pointer (sizes)
+  __ bne(count, R0, loop);
+  __ delayed()->addiu(pcs, pcs, wordSize);   // Bump array pointer (pcs)
+  __ ld(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
+  // Re-push self-frame
+  __ push2(AT, FP);
+  __ move(FP, SP);
+  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
+  __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
+
+  // Restore frame locals after moving the frame
+  __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
+  __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
+  __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
+  __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
+
+
+  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
+  // this call, no GC can happen.
+  __ move(A1, reason);  // exec_mode
+  __ get_thread(thread);
+  __ move(A0, thread);  // thread
+  __ addiu(SP, SP, (-additional_words) *wordSize);
+
+  // set last_Java_sp, last_Java_fp
+  __ set_last_Java_frame(NOREG, FP, NULL);
+
+  __ move(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
+
+  __ relocate(relocInfo::internal_pc_type);
+  {
+    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
+    __ patchable_set48(AT, save_pc);
+  }
+  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
+
+  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
+  __ delayed()->nop();
+  // Revert SP alignment after call since we're going to do some SP relative addressing below
+  __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // Set an oopmap for the call site
+  oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
+
+  __ push(V0);
+
+  __ get_thread(thread);
+  __ reset_last_Java_frame(true);
+
+  // Collect return values
+  __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize);
+  __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize);
+  __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local
+  __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize);
+  //FIXME,
+  // Clear floating point stack before returning to interpreter
+  __ empty_FPU_stack();
+  //FIXME, we should consider about float and double
+  // Push a float or double return value if necessary.
+  __ leave();
+
+  // Jump to interpreter
+  __ jr(RA);
+  __ delayed()->nop();
+
+  masm->flush();
+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+}
+
+#ifdef COMPILER2
+
+//------------------------------generate_uncommon_trap_blob--------------------
+// Ought to generate an ideal graph & compile, but here's some SPARC ASM
+// instead.
+void SharedRuntime::generate_uncommon_trap_blob() {
+  // allocate space for the code
+  ResourceMark rm;
+  // setup code generation tools
+  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  enum frame_layout {
+    fp_off, fp_off2,
+    return_off, return_off2,
+    framesize
+  };
+  assert(framesize % 4 == 0, "sp not 16-byte aligned");
+
+  address start = __ pc();
+
+  // Push self-frame.
+  __ daddiu(SP, SP, -framesize * BytesPerInt);
+
+  __ sd(RA, SP, return_off * BytesPerInt);
+  __ sd(FP, SP, fp_off * BytesPerInt);
+
+  __ daddiu(FP, SP, fp_off * BytesPerInt);
+
+  // Clear the floating point exception stack
+  __ empty_FPU_stack();
+
+  Register thread = TREG;
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  // set last_Java_sp
+  __ set_last_Java_frame(NOREG, FP, NULL);
+  __ relocate(relocInfo::internal_pc_type);
+  {
+    long save_pc = (long)__ pc() + 52;
+    __ patchable_set48(AT, (long)save_pc);
+    __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
+  }
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // capture callee-saved registers as well as return values.
+  __ move(A0, thread);
+  // argument already in T0
+  __ move(A1, T0);
+  __ patchable_call((address)Deoptimization::uncommon_trap);
+
+  // Set an oopmap for the call site
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map =  new OopMap( framesize, 0 );
+
+  //oop_maps->add_gc_map( __ offset(), true, map);
+  oop_maps->add_gc_map( __ offset(),  map);
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ reset_last_Java_frame(false);
+
+  // Load UnrollBlock into S7
+  Register unroll = S7;
+  __ move(unroll, V0);
+
+  // Pop all the frames we must move/replace.
+  //
+  // Frame picture (youngest to oldest)
+  // 1: self-frame (no frame link)
+  // 2: deopting frame  (no frame link)
+  // 3: possible-i2c-adapter-frame
+  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
+  //    and c2i here)
+
+  __ daddiu(SP, SP, framesize * BytesPerInt);
+
+  // Pop deoptimized frame
+  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
+  __ daddu(SP, SP, AT);
+
+  // register for frame pcs
+  Register pcs = T8;
+  // register for frame sizes
+  Register sizes = T9;
+  // register for frame count
+  Register count = T3;
+  // register for the sender's sp
+  Register sender_sp = T1;
+
+  // sp should be pointing at the return address to the caller (4)
+  // Load array of frame pcs
+  __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
+
+  // Load array of frame sizes
+  __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
+  __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
+
+  // Pick up the initial fp we should save
+  __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
+  // Now adjust the caller's stack to make up for the extra locals
+  // but record the original sp so that we can save it in the skeletal interpreter
+  // frame and the stack walking of interpreter_sender will get the unextended sp
+  // value and not the "real" sp value.
+
+  __ move(sender_sp, SP);
+  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
+  __ dsubu(SP, SP, AT);
+  // Push interpreter frames in a loop
+  Label loop;
+  __ bind(loop);
+  __ ld(T2, sizes, 0);          // Load frame size
+  __ ld(AT, pcs, 0);           // save return address
+  __ daddiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
+  __ push2(AT, FP);
+  __ move(FP, SP);
+  __ dsubu(SP, SP, T2);                   // Prolog!
+  // This value is corrected by layout_activation_impl
+  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
+  __ move(sender_sp, SP);       // pass to next frame
+  __ daddiu(count, count, -1);    // decrement counter
+  __ daddiu(sizes, sizes, wordSize);     // Bump array pointer (sizes)
+  __ addiu(pcs, pcs, wordSize);      // Bump array pointer (pcs)
+  __ bne(count, R0, loop);
+  __ delayed()->nop();      // Bump array pointer (pcs)
+
+  __ ld(RA, pcs, 0);
+
+  // Re-push self-frame
+  // save old & set new FP
+  // save final return address
+  __ enter();
+
+  // Use FP because the frames look interpreted now
+  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
+  // Don't need the precise return PC here, just precise enough to point into this code blob.
+  address the_pc = __ pc();
+  __ set_last_Java_frame(NOREG, FP, the_pc);
+
+  __ move(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
+
+  // Call C code.  Need thread but NOT official VM entry
+  // crud.  We cannot block on this call, no GC can happen.  Call should
+  // restore return values to their stack-slots with the new SP.
+  __ move(A0, thread);
+  __ move(A1, Deoptimization::Unpack_uncommon_trap);
+  __ patchable_call((address)Deoptimization::unpack_frames);
+  // Set an oopmap for the call site
+  oop_maps->add_gc_map( __ offset(),  new OopMap( framesize, 0 ) );
+
+  __ reset_last_Java_frame(true);
+
+  // Pop self-frame.
+  __ leave();     // Epilog!
+
+  // Jump to interpreter
+  __ jr(RA);
+  __ delayed()->nop();
+  // -------------
+  // make sure all code is generated
+  masm->flush();
+
+  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
+}
+
+#endif // COMPILER2
+
+//------------------------------generate_handler_blob-------------------
+//
+// Generate a special Compile2Runtime blob that saves all registers, and sets
+// up an OopMap and calls safepoint code to stop the compiled code for
+// a safepoint.
+//
+// This blob is jumped to (via a breakpoint and the signal handler) from a
+// safepoint in compiled code.
+
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
+
+  // Account for thread arg in our frame
+  const int additional_words = 0;
+  int frame_size_in_words;
+
+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+  ResourceMark rm;
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map;
+
+  // allocate space for the code
+  // setup code generation tools
+  CodeBuffer  buffer ("handler_blob", 2048, 512);
+  MacroAssembler* masm = new MacroAssembler( &buffer);
+
+  const Register thread = TREG;
+  address start   = __ pc();
+  address call_pc = NULL;
+  bool cause_return = (pool_type == POLL_AT_RETURN);
+  bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
+
+  // If cause_return is true we are at a poll_return and there is
+  // the return address in RA to the caller on the nmethod
+  // that is safepoint. We can leave this return in RA and
+  // effectively complete the return and safepoint in the caller.
+  // Otherwise we load exception pc to RA.
+  __ push(thread);
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+  if(!cause_return) {
+    __ ld_ptr(RA, Address(thread, JavaThread::saved_exception_pc_offset()));
+  }
+
+  __ pop(thread);
+  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
+
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+
+  // The following is basically a call_VM. However, we need the precise
+  // address of the call in order to generate an oopmap. Hence, we do all the
+  // work outselvs.
+
+  __ move(A0, thread);
+  __ set_last_Java_frame(NOREG, NOREG, NULL);
+
+
+  // Do the call
+  __ call(call_ptr);
+  __ delayed()->nop();
+
+  // Set an oopmap for the call site.  This oopmap will map all
+  // oop-registers and debug-info registers as callee-saved.  This
+  // will allow deoptimization at this safepoint to find all possible
+  // debug-info recordings, as well as let GC find all oops.
+  oop_maps->add_gc_map(__ offset(),  map);
+
+  Label noException;
+
+  // Clear last_Java_sp again
+  __ reset_last_Java_frame(false);
+
+  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
+  __ beq(AT, R0, noException);
+  __ delayed()->nop();
+
+  // Exception pending
+
+  RegisterSaver::restore_live_registers(masm, save_vectors);
+  //forward_exception_entry need return address on the stack
+  __ push(RA);
+  __ patchable_jump((address)StubRoutines::forward_exception_entry());
+
+  // No exception case
+  __ bind(noException);
+  // Normal exit, register restoring and exit
+  RegisterSaver::restore_live_registers(masm, save_vectors);
+  __ jr(RA);
+  __ delayed()->nop();
+
+  masm->flush();
+
+  // Fill-out other meta info
+  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
+}
+
+//
+// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
+//
+// Generate a stub that calls into vm to find out the proper destination
+// of a java call. All the argument registers are live at this point
+// but since this is generic code we don't know what they are and the caller
+// must do any gc of the args.
+//
+RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+
+  // allocate space for the code
+  ResourceMark rm;
+
+  //CodeBuffer buffer(name, 1000, 512);
+  CodeBuffer buffer(name, 2000, 2048);
+  MacroAssembler* masm  = new MacroAssembler(&buffer);
+
+  int frame_size_words;
+  //we put the thread in A0
+
+  OopMapSet *oop_maps = new OopMapSet();
+  OopMap* map = NULL;
+
+  int start = __ offset();
+  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
+
+
+  int frame_complete = __ offset();
+
+  const Register thread = T8;
+  __ get_thread(thread);
+
+  __ move(A0, thread);
+  __ set_last_Java_frame(noreg, FP, NULL);
+  //align the stack before invoke native
+  __ move(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);
+  __ relocate(relocInfo::internal_pc_type);
+  {
+    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord;
+    __ patchable_set48(AT, save_pc);
+  }
+  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+  __ call(destination);
+  __ delayed()->nop();
+
+  // Set an oopmap for the call site.
+  // We need this not only for callee-saved registers, but also for volatile
+  // registers that the compiler might be keeping live across a safepoint.
+  oop_maps->add_gc_map( __ offset() - start, map);
+  // V0 contains the address we are going to jump to assuming no exception got installed
+  __ get_thread(thread);
+  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+  // clear last_Java_sp
+  __ reset_last_Java_frame(true);
+  // check for pending exceptions
+  Label pending;
+  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
+  __ bne(AT, R0, pending);
+  __ delayed()->nop();
+  // get the returned Method*
+  //FIXME, do mips need this ?
+  __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
+  __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
+  __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
+  RegisterSaver::restore_live_registers(masm);
+
+  // We are back the the original state on entry and ready to go the callee method.
+  __ jr(V0);
+  __ delayed()->nop();
+  // Pending exception after the safepoint
+
+  __ bind(pending);
+
+  RegisterSaver::restore_live_registers(masm);
+
+  // exception pending => remove activation and forward to exception handler
+  //forward_exception_entry need return address on the stack
+  __ push(RA);
+  __ get_thread(thread);
+  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
+  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
+  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+  __ delayed()->nop();
+  //
+  // make sure all code is generated
+  masm->flush();
+
+  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
+  return tmp;
+}
+
+extern "C" int SpinPause() {return 0;}
+
+
+//------------------------------Montgomery multiplication------------------------
+//
+
+// Subtract 0:b from carry:a.  Return carry.
+static unsigned long
+sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
+  long borrow = 0, t = 0;
+  unsigned long tmp0, tmp1;
+  __asm__ __volatile__ (
+    "0:                                            \n"
+    "ld      %[tmp0],     0(%[a])                  \n"
+    "ld      %[tmp1],     0(%[b])                  \n"
+    "sltu    %[t],        %[tmp0],     %[borrow]   \n"
+    "dsubu   %[tmp0],     %[tmp0],     %[borrow]   \n"
+    "sltu    %[borrow],   %[tmp0],     %[tmp1]     \n"
+    "or      %[borrow],   %[borrow],   %[t]        \n"
+    "dsubu   %[tmp0],     %[tmp0],     %[tmp1]     \n"
+    "sd      %[tmp0],     0(%[a])                  \n"
+    "daddiu  %[a],        %[a],         8          \n"
+    "daddiu  %[b],        %[b],         8          \n"
+    "daddiu  %[len],      %[len],      -1          \n"
+    "bgtz    %[len],      0b                       \n"
+    "dsubu   %[tmp0],     %[carry],    %[borrow]   \n"
+    : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t)
+    : [carry]"r"(carry)
+    : "memory"
+  );
+  return tmp0;
+}
+
+// Multiply (unsigned) Long A by Long B, accumulating the double-
+// length result into the accumulator formed of t0, t1, and t2.
+inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
+  unsigned long hi, lo, carry = 0, t = 0;
+  __asm__ __volatile__(
+    "dmultu  %[A],        %[B]                     \n"
+    "mfhi    %[hi]                                 \n"
+    "mflo    %[lo]                                 \n"
+    "daddu   %[t0],       %[t0],       %[lo]       \n"
+    "sltu    %[carry],    %[t0],       %[lo]       \n"
+    "daddu   %[t1],       %[t1],       %[carry]    \n"
+    "sltu    %[t],        %[t1],       %[carry]    \n"
+    "daddu   %[t1],       %[t1],       %[hi]       \n"
+    "sltu    %[carry],    %[t1],       %[hi]       \n"
+    "or      %[carry],    %[carry],    %[t]        \n"
+    "daddu   %[t2],       %[t2],       %[carry]    \n"
+    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
+    : [A]"r"(A), [B]"r"(B)
+    :
+  );
+}
+
+// As above, but add twice the double-length result into the
+// accumulator.
+inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
+  unsigned long hi, lo, carry = 0, t = 0;
+  __asm__ __volatile__(
+    "dmultu  %[A],        %[B]                     \n"
+    "mfhi    %[hi]                                 \n"
+    "mflo    %[lo]                                 \n"
+    "daddu   %[t0],       %[t0],       %[lo]       \n"
+    "sltu    %[carry],    %[t0],       %[lo]       \n"
+    "daddu   %[t1],       %[t1],       %[carry]    \n"
+    "sltu    %[t],        %[t1],       %[carry]    \n"
+    "daddu   %[t1],       %[t1],       %[hi]       \n"
+    "sltu    %[carry],    %[t1],       %[hi]       \n"
+    "or      %[carry],    %[carry],    %[t]        \n"
+    "daddu   %[t2],       %[t2],       %[carry]    \n"
+    "daddu   %[t0],       %[t0],       %[lo]       \n"
+    "sltu    %[carry],    %[t0],       %[lo]       \n"
+    "daddu   %[t1],       %[t1],       %[carry]    \n"
+    "sltu    %[t],        %[t1],       %[carry]    \n"
+    "daddu   %[t1],       %[t1],       %[hi]       \n"
+    "sltu    %[carry],    %[t1],       %[hi]       \n"
+    "or      %[carry],    %[carry],    %[t]        \n"
+    "daddu   %[t2],       %[t2],       %[carry]    \n"
+    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
+    : [A]"r"(A), [B]"r"(B)
+    :
+  );
+}
+
+// Fast Montgomery multiplication.  The derivation of the algorithm is
+// in  A Cryptographic Library for the Motorola DSP56000,
+// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
+
+static void __attribute__((noinline))
+montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
+                    unsigned long m[], unsigned long inv, int len) {
+  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+  int i;
+
+  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+
+  for (i = 0; i < len; i++) {
+    int j;
+    for (j = 0; j < i; j++) {
+      MACC(a[j], b[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    MACC(a[i], b[0], t0, t1, t2);
+    m[i] = t0 * inv;
+    MACC(m[i], n[0], t0, t1, t2);
+
+    assert(t0 == 0, "broken Montgomery multiply");
+
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  for (i = len; i < 2*len; i++) {
+    int j;
+    for (j = i-len+1; j < len; j++) {
+      MACC(a[j], b[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    m[i-len] = t0;
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  while (t0)
+    t0 = sub(m, n, t0, len);
+}
+
+// Fast Montgomery squaring.  This uses asymptotically 25% fewer
+// multiplies so it should be up to 25% faster than Montgomery
+// multiplication.  However, its loop control is more complex and it
+// may actually run slower on some machines.
+
+static void __attribute__((noinline))
+montgomery_square(unsigned long a[], unsigned long n[],
+                  unsigned long m[], unsigned long inv, int len) {
+  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+  int i;
+
+  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+
+  for (i = 0; i < len; i++) {
+    int j;
+    int end = (i+1)/2;
+    for (j = 0; j < end; j++) {
+      MACC2(a[j], a[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    if ((i & 1) == 0) {
+      MACC(a[j], a[j], t0, t1, t2);
+    }
+    for (; j < i; j++) {
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    m[i] = t0 * inv;
+    MACC(m[i], n[0], t0, t1, t2);
+
+    assert(t0 == 0, "broken Montgomery square");
+
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  for (i = len; i < 2*len; i++) {
+    int start = i-len+1;
+    int end = start + (len - start)/2;
+    int j;
+    for (j = start; j < end; j++) {
+      MACC2(a[j], a[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    if ((i & 1) == 0) {
+      MACC(a[j], a[j], t0, t1, t2);
+    }
+    for (; j < len; j++) {
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    m[i-len] = t0;
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  while (t0)
+    t0 = sub(m, n, t0, len);
+}
+
+// Swap words in a longword.
+static unsigned long swap(unsigned long x) {
+  return (x << 32) | (x >> 32);
+}
+
+// Copy len longwords from s to d, word-swapping as we go.  The
+// destination array is reversed.
+static void reverse_words(unsigned long *s, unsigned long *d, int len) {
+  d += len;
+  while(len-- > 0) {
+    d--;
+    *d = swap(*s);
+    s++;
+  }
+}
+
+// The threshold at which squaring is advantageous was determined
+// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
+// Doesn't seem to be relevant for MIPS64 so we use the same value.
+#define MONTGOMERY_SQUARING_THRESHOLD 64
+
+void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
+                                        jint len, jlong inv,
+                                        jint *m_ints) {
+  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
+  int longwords = len/2;
+
+  // Make very sure we don't use so much space that the stack might
+  // overflow.  512 jints corresponds to an 16384-bit integer and
+  // will use here a total of 8k bytes of stack space.
+  int total_allocation = longwords * sizeof (unsigned long) * 4;
+  guarantee(total_allocation <= 8192, "must be");
+  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
+
+  // Local scratch arrays
+  unsigned long
+    *a = scratch + 0 * longwords,
+    *b = scratch + 1 * longwords,
+    *n = scratch + 2 * longwords,
+    *m = scratch + 3 * longwords;
+
+  reverse_words((unsigned long *)a_ints, a, longwords);
+  reverse_words((unsigned long *)b_ints, b, longwords);
+  reverse_words((unsigned long *)n_ints, n, longwords);
+
+  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
+
+  reverse_words(m, (unsigned long *)m_ints, longwords);
+}
+
+void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
+                                      jint len, jlong inv,
+                                      jint *m_ints) {
+  assert(len % 2 == 0, "array length in montgomery_square must be even");
+  int longwords = len/2;
+
+  // Make very sure we don't use so much space that the stack might
+  // overflow.  512 jints corresponds to an 16384-bit integer and
+  // will use here a total of 6k bytes of stack space.
+  int total_allocation = longwords * sizeof (unsigned long) * 3;
+  guarantee(total_allocation <= 8192, "must be");
+  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
+
+  // Local scratch arrays
+  unsigned long
+    *a = scratch + 0 * longwords,
+    *n = scratch + 1 * longwords,
+    *m = scratch + 2 * longwords;
+
+  reverse_words((unsigned long *)a_ints, a, longwords);
+  reverse_words((unsigned long *)n_ints, n, longwords);
+
+  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
+    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
+  } else {
+    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
+  }
+
+  reverse_words(m, (unsigned long *)m_ints, longwords);
+}
diff --git a/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp b/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp
new file mode 100644
index 00000000000..aeb797faf9f
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/stubGenerator_mips_64.cpp
@@ -0,0 +1,2147 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "nativeInst_mips.hpp"
+#include "oops/instanceOop.hpp"
+#include "oops/method.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "utilities/top.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+// Declaration and definition of StubGenerator (no .hpp file).
+// For a more detailed description of the stub routine structure
+// see the comment in stubRoutines.hpp
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
+//#define a__ ((Assembler*)_masm)->
+
+//#ifdef PRODUCT
+//#define BLOCK_COMMENT(str) /* nothing */
+//#else
+//#define BLOCK_COMMENT(str) __ block_comment(str)
+//#endif
+
+//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
+
+// Stub Code definitions
+
+static address handle_unsafe_access() {
+  JavaThread* thread = JavaThread::current();
+  address pc = thread->saved_exception_pc();
+  // pc is the instruction which we must emulate
+  // doing a no-op is fine:  return garbage from the load
+  // therefore, compute npc
+  address npc = (address)((unsigned long)pc + sizeof(unsigned int));
+
+  // request an async exception
+  thread->set_pending_unsafe_access_error();
+
+  // return address of next instruction to execute
+  return npc;
+}
+
+class StubGenerator: public StubCodeGenerator {
+ private:
+
+  // ABI mips n64
+  // This fig is not MIPS ABI. It is call Java from C ABI.
+  // Call stubs are used to call Java from C
+  //
+  //    [ return_from_Java     ]
+  //    [ argument word n-1    ] <--- sp
+  //      ...
+  //    [ argument word 0      ]
+  //      ...
+  // -8 [ S6                   ]
+  // -7 [ S5                   ]
+  // -6 [ S4                   ]
+  // -5 [ S3                   ]
+  // -4 [ S1                   ]
+  // -3 [ TSR(S2)              ]
+  // -2 [ LVP(S7)              ]
+  // -1 [ BCP(S1)              ]
+  //  0 [ saved fp             ] <--- fp_after_call
+  //  1 [ return address       ]
+  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
+  //  3 [ result               ] <--- a1
+  //  4 [ result_type          ] <--- a2
+  //  5 [ method               ] <--- a3
+  //  6 [ entry_point          ] <--- a4
+  //  7 [ parameters           ] <--- a5
+  //  8 [ parameter_size       ] <--- a6
+  //  9 [ thread               ] <--- a7
+
+  //
+  //  n64 does not save paras in sp.
+  //
+  //    [ return_from_Java     ]
+  //    [ argument word n-1    ] <--- sp
+  //      ...
+  //    [ argument word 0      ]
+  //      ...
+  //-13 [ thread               ]
+  //-12 [ result_type          ] <--- a2
+  //-11 [ result               ] <--- a1
+  //-10 [                      ]
+  // -9 [ ptr. to call wrapper ] <--- a0
+  // -8 [ S6                   ]
+  // -7 [ S5                   ]
+  // -6 [ S4                   ]
+  // -5 [ S3                   ]
+  // -4 [ S1                   ]
+  // -3 [ TSR(S2)              ]
+  // -2 [ LVP(S7)              ]
+  // -1 [ BCP(S1)              ]
+  //  0 [ saved fp             ] <--- fp_after_call
+  //  1 [ return address       ]
+  //  2 [                      ] <--- old sp
+  //
+  // Find a right place in the call_stub for GP.
+  // GP will point to the starting point of Interpreter::dispatch_table(itos).
+  // It should be saved/restored before/after Java calls.
+  //
+  enum call_stub_layout {
+    RA_off             =  1,
+    FP_off             =  0,
+    BCP_off            = -1,
+    LVP_off            = -2,
+    TSR_off            = -3,
+    S1_off             = -4,
+    S3_off             = -5,
+    S4_off             = -6,
+    S5_off             = -7,
+    S6_off             = -8,
+    call_wrapper_off   = -9,
+    result_off         = -11,
+    result_type_off    = -12,
+    thread_off         = -13,
+    total_off          = thread_off - 1,
+    GP_off             = -14,
+ };
+
+  address generate_call_stub(address& return_address) {
+
+    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
+    StubCodeMark mark(this, "StubRoutines", "call_stub");
+    address start = __ pc();
+
+    // same as in generate_catch_exception()!
+
+    // stub code
+    // save ra and fp
+    __ enter();
+    // I think 14 is the max gap between argument and callee saved register
+    __ daddiu(SP, SP, total_off * wordSize);
+    __ sd(BCP, FP, BCP_off * wordSize);
+    __ sd(LVP, FP, LVP_off * wordSize);
+    __ sd(TSR, FP, TSR_off * wordSize);
+    __ sd(S1, FP, S1_off * wordSize);
+    __ sd(S3, FP, S3_off * wordSize);
+    __ sd(S4, FP, S4_off * wordSize);
+    __ sd(S5, FP, S5_off * wordSize);
+    __ sd(S6, FP, S6_off * wordSize);
+    __ sd(A0, FP, call_wrapper_off * wordSize);
+    __ sd(A1, FP, result_off * wordSize);
+    __ sd(A2, FP, result_type_off * wordSize);
+    __ sd(A7, FP, thread_off * wordSize);
+    __ sd(GP, FP, GP_off * wordSize);
+
+    __ set64(GP, (long)Interpreter::dispatch_table(itos));
+
+#ifdef OPT_THREAD
+    __ move(TREG, A7);
+#endif
+    //add for compressedoops
+    __ reinit_heapbase();
+
+#ifdef ASSERT
+    // make sure we have no pending exceptions
+    {
+      Label L;
+      __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
+      __ beq(AT, R0, L);
+      __ delayed()->nop();
+      /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
+      __ stop("StubRoutines::call_stub: entered with pending exception");
+      __ bind(L);
+    }
+#endif
+
+    // pass parameters if any
+    // A5: parameter
+    // A6: parameter_size
+    // T0: parameter_size_tmp(--)
+    // T2: offset(++)
+    // T3: tmp
+    Label parameters_done;
+    // judge if the parameter_size equals 0
+    __ beq(A6, R0, parameters_done);
+    __ delayed()->nop();
+    __ dsll(AT, A6, Interpreter::logStackElementSize);
+    __ dsubu(SP, SP, AT);
+    __ move(AT, -StackAlignmentInBytes);
+    __ andr(SP, SP , AT);
+    // Copy Java parameters in reverse order (receiver last)
+    // Note that the argument order is inverted in the process
+    Label loop;
+    __ move(T0, A6);
+    __ move(T2, R0);
+    __ bind(loop);
+
+    // get parameter
+    __ dsll(T3, T0, LogBytesPerWord);
+    __ daddu(T3, T3, A5);
+    __ ld(AT, T3,  -wordSize);
+    __ dsll(T3, T2, LogBytesPerWord);
+    __ daddu(T3, T3, SP);
+    __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
+    __ daddiu(T2, T2, 1);
+    __ daddiu(T0, T0, -1);
+    __ bne(T0, R0, loop);
+    __ delayed()->nop();
+    // advance to next parameter
+
+    // call Java function
+    __ bind(parameters_done);
+
+    // receiver in V0, methodOop in Rmethod
+
+    __ move(Rmethod, A3);
+    __ move(Rsender, SP);             //set sender sp
+    __ jalr(A4);
+    __ delayed()->nop();
+    return_address = __ pc();
+
+    Label common_return;
+    __ bind(common_return);
+
+    // store result depending on type
+    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
+    __ ld(T0, FP, result_off * wordSize);   // result --> T0
+    Label is_long, is_float, is_double, exit;
+    __ ld(T2, FP, result_type_off * wordSize);  // result_type --> T2
+    __ daddiu(T3, T2, (-1) * T_LONG);
+    __ beq(T3, R0, is_long);
+    __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT);
+    __ beq(T3, R0, is_float);
+    __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE);
+    __ beq(T3, R0, is_double);
+    __ delayed()->nop();
+
+    // handle T_INT case
+    __ sd(V0, T0, 0 * wordSize);
+    __ bind(exit);
+
+    // restore
+    __ ld(BCP, FP, BCP_off * wordSize);
+    __ ld(LVP, FP, LVP_off * wordSize);
+    __ ld(GP, FP, GP_off * wordSize);
+    __ ld(TSR, FP, TSR_off * wordSize);
+
+    __ ld(S1, FP, S1_off * wordSize);
+    __ ld(S3, FP, S3_off * wordSize);
+    __ ld(S4, FP, S4_off * wordSize);
+    __ ld(S5, FP, S5_off * wordSize);
+    __ ld(S6, FP, S6_off * wordSize);
+
+    __ leave();
+
+    // return
+    __ jr(RA);
+    __ delayed()->nop();
+
+    // handle return types different from T_INT
+    __ bind(is_long);
+    __ sd(V0, T0, 0 * wordSize);
+    __ b(exit);
+    __ delayed()->nop();
+
+    __ bind(is_float);
+    __ swc1(F0, T0, 0 * wordSize);
+    __ b(exit);
+    __ delayed()->nop();
+
+    __ bind(is_double);
+    __ sdc1(F0, T0, 0 * wordSize);
+    __ b(exit);
+    __ delayed()->nop();
+    //FIXME, 1.6 mips version add operation of fpu here
+    StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
+    __ b(common_return);
+    __ delayed()->nop();
+    return start;
+  }
+
+  // Return point for a Java call if there's an exception thrown in
+  // Java code.  The exception is caught and transformed into a
+  // pending exception stored in JavaThread that can be tested from
+  // within the VM.
+  //
+  // Note: Usually the parameters are removed by the callee. In case
+  // of an exception crossing an activation frame boundary, that is
+  // not the case if the callee is compiled code => need to setup the
+  // sp.
+  //
+  // V0: exception oop
+
+  address generate_catch_exception() {
+    StubCodeMark mark(this, "StubRoutines", "catch_exception");
+    address start = __ pc();
+
+    Register thread = TREG;
+
+    // get thread directly
+#ifndef OPT_THREAD
+    __ ld(thread, FP, thread_off * wordSize);
+#endif
+
+#ifdef ASSERT
+    // verify that threads correspond
+    { Label L;
+      __ get_thread(T8);
+      __ beq(T8, thread, L);
+      __ delayed()->nop();
+      __ stop("StubRoutines::catch_exception: threads must correspond");
+      __ bind(L);
+    }
+#endif
+    // set pending exception
+    __ verify_oop(V0);
+    __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
+    __ li(AT, (long)__FILE__);
+    __ sd(AT, thread, in_bytes(Thread::exception_file_offset   ()));
+    __ li(AT, (long)__LINE__);
+    __ sd(AT, thread, in_bytes(Thread::exception_line_offset   ()));
+
+    // complete return to VM
+    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
+    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
+    __ delayed()->nop();
+
+    return start;
+  }
+
+  // Continuation point for runtime calls returning with a pending
+  // exception.  The pending exception check happened in the runtime
+  // or native call stub.  The pending exception in Thread is
+  // converted into a Java-level exception.
+  //
+  // Contract with Java-level exception handlers:
+  // V0: exception
+  // V1: throwing pc
+  //
+  // NOTE: At entry of this stub, exception-pc must be on stack !!
+
+  address generate_forward_exception() {
+    StubCodeMark mark(this, "StubRoutines", "forward exception");
+    //Register thread = TREG;
+    Register thread = TREG;
+    address start = __ pc();
+
+    // Upon entry, the sp points to the return address returning into
+    // Java (interpreted or compiled) code; i.e., the return address
+    // throwing pc.
+    //
+    // Arguments pushed before the runtime call are still on the stack
+    // but the exception handler will reset the stack pointer ->
+    // ignore them.  A potential result in registers can be ignored as
+    // well.
+
+#ifndef OPT_THREAD
+    __ get_thread(thread);
+#endif
+#ifdef ASSERT
+    // make sure this code is only executed if there is a pending exception
+    {
+      Label L;
+      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
+      __ bne(AT, R0, L);
+      __ delayed()->nop();
+      __ stop("StubRoutines::forward exception: no pending exception (1)");
+      __ bind(L);
+    }
+#endif
+
+    // compute exception handler into T9
+    __ ld(A1, SP, 0);
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
+    __ move(T9, V0);
+    __ pop(V1);
+
+#ifndef OPT_THREAD
+    __ get_thread(thread);
+#endif
+    __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
+    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
+
+#ifdef ASSERT
+    // make sure exception is set
+    {
+      Label L;
+      __ bne(V0, R0, L);
+      __ delayed()->nop();
+      __ stop("StubRoutines::forward exception: no pending exception (2)");
+      __ bind(L);
+    }
+#endif
+
+    // continue at exception handler (return address removed)
+    // V0: exception
+    // T9: exception handler
+    // V1: throwing pc
+    __ verify_oop(V0);
+    __ jr(T9);
+    __ delayed()->nop();
+
+    return start;
+  }
+
+  // The following routine generates a subroutine to throw an
+  // asynchronous UnknownError when an unsafe access gets a fault that
+  // could not be reasonably prevented by the programmer.  (Example:
+  // SIGBUS/OBJERR.)
+  address generate_handler_for_unsafe_access() {
+    StubCodeMark mark(this, "StubRoutines", "handler_for_unsafe_access");
+    address start = __ pc();
+    __ push(V0);
+    __ pushad_except_v0();                      // push registers
+    __ call(CAST_FROM_FN_PTR(address, handle_unsafe_access), relocInfo::runtime_call_type);
+    __ delayed()->nop();
+    __ popad_except_v0();
+    __ move(RA, V0);
+    __ pop(V0);
+    __ jr(RA);
+    __ delayed()->nop();
+    return start;
+  }
+
+  // Non-destructive plausibility checks for oops
+  //
+  address generate_verify_oop() {
+    StubCodeMark mark(this, "StubRoutines", "verify_oop");
+    address start = __ pc();
+    __ reinit_heapbase();
+    __ verify_oop_subroutine();
+    address end = __ pc();
+    return start;
+  }
+
+  //
+  //  Generate overlap test for array copy stubs
+  //
+  //  Input:
+  //     A0    -  array1
+  //     A1    -  array2
+  //     A2    -  element count
+  //
+
+ // use T9 as temp
+  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
+    int elem_size = 1 << log2_elem_size;
+    Address::ScaleFactor sf = Address::times_1;
+
+    switch (log2_elem_size) {
+      case 0: sf = Address::times_1; break;
+      case 1: sf = Address::times_2; break;
+      case 2: sf = Address::times_4; break;
+      case 3: sf = Address::times_8; break;
+    }
+
+    __ dsll(AT, A2, sf);
+    __ daddu(AT, AT, A0);
+    __ daddiu(T9, AT, -elem_size);
+    __ dsubu(AT, A1, A0);
+    __ blez(AT, no_overlap_target);
+    __ delayed()->nop();
+    __ dsubu(AT, A1, T9);
+    __ bgtz(AT, no_overlap_target);
+    __ delayed()->nop();
+
+    // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
+    Label L;
+    __ bgez(A0, L);
+    __ delayed()->nop();
+    __ bgtz(A1, no_overlap_target);
+    __ delayed()->nop();
+    __ bind(L);
+
+  }
+
+  //
+  //  Generate store check for array
+  //
+  //  Input:
+  //     T0    -  starting address
+  //     T1    -  element count
+  //
+  //  The 2 input registers are overwritten
+  //
+
+
+  void array_store_check(Register tmp) {
+    assert_different_registers(tmp, AT, T0, T1);
+    BarrierSet* bs = Universe::heap()->barrier_set();
+    assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+    CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+    assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+    Label l_0;
+
+    if (UseConcMarkSweepGC) __ sync();
+
+    __ set64(tmp, (long)ct->byte_map_base);
+
+    __ dsll(AT, T1, TIMES_OOP);
+    __ daddu(AT, T0, AT);
+    __ daddiu(T1, AT, - BytesPerHeapOop);
+
+    __ shr(T0, CardTableModRefBS::card_shift);
+    __ shr(T1, CardTableModRefBS::card_shift);
+
+    __ dsubu(T1, T1, T0);   // end --> cards count
+    __ bind(l_0);
+
+    __ daddu(AT, tmp, T0);
+    if (UseLEXT1) {
+      __ gssbx(R0, AT, T1, 0);
+    } else {
+      __ daddu(AT, AT, T1);
+      __ sb(R0, AT, 0);
+    }
+
+    __ bgtz(T1, l_0);
+    __ delayed()->daddiu(T1, T1, - 1);
+  }
+
+  // Generate code for an array write pre barrier
+  //
+  //     addr    -  starting address
+  //     count   -  element count
+  //     tmp     - scratch register
+  //
+  //     Destroy no registers!
+  //
+  void  gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
+    BarrierSet* bs = Universe::heap()->barrier_set();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCT:
+      case BarrierSet::G1SATBCTLogging:
+        // With G1, don't generate the call if we statically know that the target in uninitialized
+        if (!dest_uninitialized) {
+           __ pushad();                      // push registers
+           if (count == A0) {
+             if (addr == A1) {
+               // exactly backwards!!
+               //__ xchgptr(c_rarg1, c_rarg0);
+               __ move(AT, A0);
+               __ move(A0, A1);
+               __ move(A1, AT);
+             } else {
+               __ move(A1, count);
+               __ move(A0, addr);
+             }
+           } else {
+             __ move(A0, addr);
+             __ move(A1, count);
+           }
+           __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
+           __ popad();
+        }
+        break;
+      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableExtension:
+      case BarrierSet::ModRef:
+        break;
+      default:
+        ShouldNotReachHere();
+
+    }
+  }
+
+  //
+  // Generate code for an array write post barrier
+  //
+  //  Input:
+  //     start    - register containing starting address of destination array
+  //     count    - elements count
+  //     scratch  - scratch register
+  //
+  //  The input registers are overwritten.
+  //
+  void  gen_write_ref_array_post_barrier(Register start, Register count, Register scratch) {
+    assert_different_registers(start, count, scratch, AT);
+    BarrierSet* bs = Universe::heap()->barrier_set();
+    switch (bs->kind()) {
+      case BarrierSet::G1SATBCT:
+      case BarrierSet::G1SATBCTLogging:
+        {
+          __ pushad();             // push registers (overkill)
+          if (count == A0) {
+            if (start == A1) {
+              // exactly backwards!!
+              //__ xchgptr(c_rarg1, c_rarg0);
+              __ move(AT, A0);
+              __ move(A0, A1);
+              __ move(A1, AT);
+            } else {
+              __ move(A1, count);
+              __ move(A0, start);
+            }
+          } else {
+            __ move(A0, start);
+            __ move(A1, count);
+          }
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
+          __ popad();
+        }
+        break;
+      case BarrierSet::CardTableModRef:
+      case BarrierSet::CardTableExtension:
+        {
+          CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+          assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+          Label L_loop;
+          const Register end = count;
+
+          if (UseConcMarkSweepGC) __ sync();
+
+          int64_t disp = (int64_t) ct->byte_map_base;
+          __ set64(scratch, disp);
+
+          __ lea(end, Address(start, count, TIMES_OOP, 0));  // end == start+count*oop_size
+          __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
+          __ shr(start, CardTableModRefBS::card_shift);
+          __ shr(end,   CardTableModRefBS::card_shift);
+          __ dsubu(end, end, start); // end --> cards count
+
+          __ daddu(start, start, scratch);
+
+          __ bind(L_loop);
+          if (UseLEXT1) {
+            __ gssbx(R0, start, count, 0);
+          } else {
+            __ daddu(AT, start, count);
+            __ sb(R0, AT, 0);
+          }
+          __ daddiu(count, count, -1);
+          __ slt(AT, count, R0);
+          __ beq(AT, R0, L_loop);
+          __ delayed()->nop();
+        }
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  // Side Effects:
+  //   disjoint_byte_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_byte_copy().
+  //
+  address generate_disjoint_byte_copy(bool aligned, const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+
+
+    Register tmp1 = T0;
+    Register tmp2 = T1;
+    Register tmp3 = T3;
+
+    address start = __ pc();
+
+    __ push(tmp1);
+    __ push(tmp2);
+    __ push(tmp3);
+    __ move(tmp1, A0);
+    __ move(tmp2, A1);
+    __ move(tmp3, A2);
+
+
+    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
+    Label l_debug;
+
+    __ daddiu(AT, tmp3, -9); //why the number is 9 ?
+    __ blez(AT, l_9);
+    __ delayed()->nop();
+
+    if (!aligned) {
+      __ xorr(AT, tmp1, tmp2);
+      __ andi(AT, AT, 1);
+      __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
+      __ delayed()->nop();
+
+      __ andi(AT, tmp1, 1);
+      __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
+      __ delayed()->nop();
+
+      __ lb(AT, tmp1, 0);
+      __ daddiu(tmp1, tmp1, 1);
+      __ sb(AT, tmp2, 0);
+      __ daddiu(tmp2, tmp2, 1);
+      __ daddiu(tmp3, tmp3, -1);
+      __ bind(l_10);
+
+      __ xorr(AT, tmp1, tmp2);
+      __ andi(AT, AT, 3);
+      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
+      __ delayed()->nop();
+
+      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
+
+      // Copy 2 elements if necessary to align to 4 bytes.
+      __ andi(AT, tmp1, 3);
+      __ beq(AT, R0, l_2);
+      __ delayed()->nop();
+
+      __ lhu(AT, tmp1, 0);
+      __ daddiu(tmp1, tmp1, 2);
+      __ sh(AT, tmp2, 0);
+      __ daddiu(tmp2, tmp2, 2);
+      __ daddiu(tmp3, tmp3, -2);
+      __ bind(l_2);
+
+      // At this point the positions of both, from and to, are at least 4 byte aligned.
+
+      // Copy 4 elements at a time.
+      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
+      __ xorr(AT, tmp1, tmp2);
+      __ andi(AT, AT, 7);
+      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
+      __ delayed()->nop();
+
+      // Copy a 4 elements if necessary to align to 8 bytes.
+      __ andi(AT, tmp1, 7);
+      __ beq(AT, R0, l_7);
+      __ delayed()->nop();
+
+      __ lw(AT, tmp1, 0);
+      __ daddiu(tmp3, tmp3, -4);
+      __ sw(AT, tmp2, 0);
+      { // FasterArrayCopy
+        __ daddiu(tmp1, tmp1, 4);
+        __ daddiu(tmp2, tmp2, 4);
+      }
+    }
+
+    __ bind(l_7);
+
+    // Copy 4 elements at a time; either the loads or the stores can
+    // be unaligned if aligned == false.
+
+    { // FasterArrayCopy
+      __ daddiu(AT, tmp3, -7);
+      __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
+      __ delayed()->nop();
+
+      __ bind(l_8);
+      // For Loongson, there is 128-bit memory access. TODO
+      __ ld(AT, tmp1, 0);
+      __ sd(AT, tmp2, 0);
+      __ daddiu(tmp1, tmp1, 8);
+      __ daddiu(tmp2, tmp2, 8);
+      __ daddiu(tmp3, tmp3, -8);
+      __ daddiu(AT, tmp3, -8);
+      __ bgez(AT, l_8);
+      __ delayed()->nop();
+    }
+    __ bind(l_6);
+
+    // copy 4 bytes at a time
+    { // FasterArrayCopy
+      __ daddiu(AT, tmp3, -3);
+      __ blez(AT, l_1);
+      __ delayed()->nop();
+
+      __ bind(l_3);
+      __ lw(AT, tmp1, 0);
+      __ sw(AT, tmp2, 0);
+      __ daddiu(tmp1, tmp1, 4);
+      __ daddiu(tmp2, tmp2, 4);
+      __ daddiu(tmp3, tmp3, -4);
+      __ daddiu(AT, tmp3, -4);
+      __ bgez(AT, l_3);
+      __ delayed()->nop();
+
+    }
+
+    // do 2 bytes copy
+    __ bind(l_1);
+    {
+      __ daddiu(AT, tmp3, -1);
+      __ blez(AT, l_9);
+      __ delayed()->nop();
+
+      __ bind(l_5);
+      __ lhu(AT, tmp1, 0);
+      __ daddiu(tmp3, tmp3, -2);
+      __ sh(AT, tmp2, 0);
+      __ daddiu(tmp1, tmp1, 2);
+      __ daddiu(tmp2, tmp2, 2);
+      __ daddiu(AT, tmp3, -2);
+      __ bgez(AT, l_5);
+      __ delayed()->nop();
+    }
+
+    //do 1 element copy--byte
+    __ bind(l_9);
+    __ beq(R0, tmp3, l_4);
+    __ delayed()->nop();
+
+    {
+      __ bind(l_11);
+      __ lb(AT, tmp1, 0);
+      __ daddiu(tmp3, tmp3, -1);
+      __ sb(AT, tmp2, 0);
+      __ daddiu(tmp1, tmp1, 1);
+      __ daddiu(tmp2, tmp2, 1);
+      __ daddiu(AT, tmp3, -1);
+      __ bgez(AT, l_11);
+      __ delayed()->nop();
+    }
+
+    __ bind(l_4);
+    __ pop(tmp3);
+    __ pop(tmp2);
+    __ pop(tmp1);
+
+    __ jr(RA);
+    __ delayed()->nop();
+
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   A0   - source array address
+  //   A1   - destination array address
+  //   A2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+  // we let the hardware handle it.  The one to eight bytes within words,
+  // dwords or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  address generate_conjoint_byte_copy(bool aligned, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
+    Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
+
+    address nooverlap_target = aligned ?
+      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
+      StubRoutines::jbyte_disjoint_arraycopy();
+
+    array_overlap_test(nooverlap_target, 0);
+
+    const Register from      = A0;   // source array address
+    const Register to        = A1;   // destination array address
+    const Register count     = A2;   // elements count
+    const Register end_from  = T3;   // source array end address
+    const Register end_to    = T0;   // destination array end address
+    const Register end_count = T1;   // destination array end address
+
+    __ push(end_from);
+    __ push(end_to);
+    __ push(end_count);
+    __ push(T8);
+
+    // copy from high to low
+    __ move(end_count, count);
+    __ daddu(end_from, from, end_count);
+    __ daddu(end_to, to, end_count);
+
+    // If end_from and end_to has differante alignment, unaligned copy is performed.
+    __ andi(AT, end_from, 3);
+    __ andi(T8, end_to, 3);
+    __ bne(AT, T8, l_copy_byte);
+    __ delayed()->nop();
+
+    // First deal with the unaligned data at the top.
+    __ bind(l_unaligned);
+    __ beq(end_count, R0, l_exit);
+    __ delayed()->nop();
+
+    __ andi(AT, end_from, 3);
+    __ bne(AT, R0, l_from_unaligned);
+    __ delayed()->nop();
+
+    __ andi(AT, end_to, 3);
+    __ beq(AT, R0, l_4_bytes_aligned);
+    __ delayed()->nop();
+
+    __ bind(l_from_unaligned);
+    __ lb(AT, end_from, -1);
+    __ sb(AT, end_to, -1);
+    __ daddiu(end_from, end_from, -1);
+    __ daddiu(end_to, end_to, -1);
+    __ daddiu(end_count, end_count, -1);
+    __ b(l_unaligned);
+    __ delayed()->nop();
+
+    // now end_to, end_from point to 4-byte aligned high-ends
+    //     end_count contains byte count that is not copied.
+    // copy 4 bytes at a time
+    __ bind(l_4_bytes_aligned);
+
+    __ move(T8, end_count);
+    __ daddiu(AT, end_count, -3);
+    __ blez(AT, l_copy_suffix);
+    __ delayed()->nop();
+
+    //__ andi(T8, T8, 3);
+    __ lea(end_from, Address(end_from, -4));
+    __ lea(end_to, Address(end_to, -4));
+
+    __ dsrl(end_count, end_count, 2);
+    __ align(16);
+    __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
+    __ lw(AT, end_from, 0);
+    __ sw(AT, end_to, 0);
+    __ addiu(end_from, end_from, -4);
+    __ addiu(end_to, end_to, -4);
+    __ addiu(end_count, end_count, -1);
+    __ bne(end_count, R0, l_copy_4_bytes_loop);
+    __ delayed()->nop();
+
+    __ b(l_copy_suffix);
+    __ delayed()->nop();
+    // copy dwords aligned or not with repeat move
+    // l_copy_suffix
+    // copy suffix (0-3 bytes)
+    __ bind(l_copy_suffix);
+    __ andi(T8, T8, 3);
+    __ beq(T8, R0, l_exit);
+    __ delayed()->nop();
+    __ addiu(end_from, end_from, 3);
+    __ addiu(end_to, end_to, 3);
+    __ bind(l_copy_suffix_loop);
+    __ lb(AT, end_from, 0);
+    __ sb(AT, end_to, 0);
+    __ addiu(end_from, end_from, -1);
+    __ addiu(end_to, end_to, -1);
+    __ addiu(T8, T8, -1);
+    __ bne(T8, R0, l_copy_suffix_loop);
+    __ delayed()->nop();
+
+    __ bind(l_copy_byte);
+    __ beq(end_count, R0, l_exit);
+    __ delayed()->nop();
+    __ lb(AT, end_from, -1);
+    __ sb(AT, end_to, -1);
+    __ daddiu(end_from, end_from, -1);
+    __ daddiu(end_to, end_to, -1);
+    __ daddiu(end_count, end_count, -1);
+    __ b(l_copy_byte);
+    __ delayed()->nop();
+
+    __ bind(l_exit);
+    __ pop(T8);
+    __ pop(end_count);
+    __ pop(end_to);
+    __ pop(end_from);
+    __ jr(RA);
+    __ delayed()->nop();
+    return start;
+  }
+
+  // Generate stub for disjoint short copy.  If "aligned" is true, the
+  // "from" and "to" addresses are assumed to be heapword aligned.
+  //
+  // Arguments for generated stub:
+  //      from:  A0
+  //      to:    A1
+  //  elm.count: A2 treated as signed
+  //  one element: 2 bytes
+  //
+  // Strategy for aligned==true:
+  //
+  //  If length <= 9:
+  //     1. copy 1 elements at a time (l_5)
+  //
+  //  If length > 9:
+  //     1. copy 4 elements at a time until less than 4 elements are left (l_7)
+  //     2. copy 2 elements at a time until less than 2 elements are left (l_6)
+  //     3. copy last element if one was left in step 2. (l_1)
+  //
+  //
+  // Strategy for aligned==false:
+  //
+  //  If length <= 9: same as aligned==true case
+  //
+  //  If length > 9:
+  //     1. continue with step 7. if the alignment of from and to mod 4
+  //        is different.
+  //     2. align from and to to 4 bytes by copying 1 element if necessary
+  //     3. at l_2 from and to are 4 byte aligned; continue with
+  //        6. if they cannot be aligned to 8 bytes because they have
+  //        got different alignment mod 8.
+  //     4. at this point we know that both, from and to, have the same
+  //        alignment mod 8, now copy one element if necessary to get
+  //        8 byte alignment of from and to.
+  //     5. copy 4 elements at a time until less than 4 elements are
+  //        left; depending on step 3. all load/stores are aligned.
+  //     6. copy 2 elements at a time until less than 2 elements are
+  //        left. (l_6)
+  //     7. copy 1 element at a time. (l_5)
+  //     8. copy last element if one was left in step 6. (l_1)
+
+  address generate_disjoint_short_copy(bool aligned, const char * name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+
+    Register tmp1 = T0;
+    Register tmp2 = T1;
+    Register tmp3 = T3;
+    Register tmp4 = T8;
+    Register tmp5 = T9;
+    Register tmp6 = T2;
+
+    address start = __ pc();
+
+    __ push(tmp1);
+    __ push(tmp2);
+    __ push(tmp3);
+    __ move(tmp1, A0);
+    __ move(tmp2, A1);
+    __ move(tmp3, A2);
+
+    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
+    Label l_debug;
+    // don't try anything fancy if arrays don't have many elements
+    __ daddiu(AT, tmp3, -23);
+    __ blez(AT, l_14);
+    __ delayed()->nop();
+    // move push here
+    __ push(tmp4);
+    __ push(tmp5);
+    __ push(tmp6);
+
+    if (!aligned) {
+      __ xorr(AT, A0, A1);
+      __ andi(AT, AT, 1);
+      __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
+      __ delayed()->nop();
+
+      __ xorr(AT, A0, A1);
+      __ andi(AT, AT, 3);
+      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
+      __ delayed()->nop();
+
+      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
+
+      // Copy 1 element if necessary to align to 4 bytes.
+      __ andi(AT, A0, 3);
+      __ beq(AT, R0, l_2);
+      __ delayed()->nop();
+
+      __ lhu(AT, tmp1, 0);
+      __ daddiu(tmp1, tmp1, 2);
+      __ sh(AT, tmp2, 0);
+      __ daddiu(tmp2, tmp2, 2);
+      __ daddiu(tmp3, tmp3, -1);
+      __ bind(l_2);
+
+      // At this point the positions of both, from and to, are at least 4 byte aligned.
+
+      // Copy 4 elements at a time.
+      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
+      __ xorr(AT, tmp1, tmp2);
+      __ andi(AT, AT, 7);
+      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
+      __ delayed()->nop();
+
+      // Copy a 2-element word if necessary to align to 8 bytes.
+      __ andi(AT, tmp1, 7);
+      __ beq(AT, R0, l_7);
+      __ delayed()->nop();
+
+      __ lw(AT, tmp1, 0);
+      __ daddiu(tmp3, tmp3, -2);
+      __ sw(AT, tmp2, 0);
+      __ daddiu(tmp1, tmp1, 4);
+      __ daddiu(tmp2, tmp2, 4);
+    }// end of if (!aligned)
+
+    __ bind(l_7);
+    // At this time the position of both, from and to, are at least 8 byte aligned.
+    // Copy 8 elemnets at a time.
+    // Align to 16 bytes, but only if both from and to have same alignment mod 8.
+    __ xorr(AT, tmp1, tmp2);
+    __ andi(AT, AT, 15);
+    __ bne(AT, R0, l_9);
+    __ delayed()->nop();
+
+    // Copy 4-element word if necessary to align to 16 bytes,
+    __ andi(AT, tmp1, 15);
+    __ beq(AT, R0, l_10);
+    __ delayed()->nop();
+
+    __ ld(AT, tmp1, 0);
+    __ daddiu(tmp3, tmp3, -4);
+    __ sd(AT, tmp2, 0);
+    __ daddiu(tmp1, tmp1, 8);
+    __ daddiu(tmp2, tmp2, 8);
+
+    __ bind(l_10);
+
+    // Copy 8 elements at a time; either the loads or the stores can
+    // be unalligned if aligned == false
+
+    { // FasterArrayCopy
+      __ bind(l_11);
+      // For loongson the 128-bit memory access instruction is gslq/gssq
+      if (UseLEXT1) {
+        __ gslq(AT, tmp4, tmp1, 0);
+        __ gslq(tmp5, tmp6, tmp1, 16);
+        __ daddiu(tmp1, tmp1, 32);
+        __ daddiu(tmp2, tmp2, 32);
+        __ gssq(AT, tmp4, tmp2, -32);
+        __ gssq(tmp5, tmp6, tmp2, -16);
+      } else {
+        __ ld(AT, tmp1, 0);
+        __ ld(tmp4, tmp1, 8);
+        __ ld(tmp5, tmp1, 16);
+        __ ld(tmp6, tmp1, 24);
+        __ daddiu(tmp1, tmp1, 32);
+        __ sd(AT, tmp2, 0);
+        __ sd(tmp4, tmp2, 8);
+        __ sd(tmp5, tmp2, 16);
+        __ sd(tmp6, tmp2, 24);
+        __ daddiu(tmp2, tmp2, 32);
+      }
+      __ daddiu(tmp3, tmp3, -16);
+      __ daddiu(AT, tmp3, -16);
+      __ bgez(AT, l_11);
+      __ delayed()->nop();
+    }
+    __ bind(l_9);
+
+    // Copy 4 elements at a time; either the loads or the stores can
+    // be unaligned if aligned == false.
+    { // FasterArrayCopy
+      __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16
+      __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain
+      __ delayed()->nop();
+
+      __ bind(l_8);
+      __ ld(AT, tmp1, 0);
+      __ ld(tmp4, tmp1, 8);
+      __ ld(tmp5, tmp1, 16);
+      __ ld(tmp6, tmp1, 24);
+      __ sd(AT, tmp2, 0);
+      __ sd(tmp4, tmp2, 8);
+      __ sd(tmp5, tmp2,16);
+      __ daddiu(tmp1, tmp1, 32);
+      __ daddiu(tmp2, tmp2, 32);
+      __ daddiu(tmp3, tmp3, -16);
+      __ daddiu(AT, tmp3, -16);
+      __ bgez(AT, l_8);
+      __ delayed()->sd(tmp6, tmp2, -8);
+    }
+    __ bind(l_6);
+
+    // copy 2 element at a time
+    { // FasterArrayCopy
+      __ daddiu(AT, tmp3, -7);
+      __ blez(AT, l_4);
+      __ delayed()->nop();
+
+      __ bind(l_3);
+      __ lw(AT, tmp1, 0);
+      __ lw(tmp4, tmp1, 4);
+      __ lw(tmp5, tmp1, 8);
+      __ lw(tmp6, tmp1, 12);
+      __ sw(AT, tmp2, 0);
+      __ sw(tmp4, tmp2, 4);
+      __ sw(tmp5, tmp2, 8);
+      __ daddiu(tmp1, tmp1, 16);
+      __ daddiu(tmp2, tmp2, 16);
+      __ daddiu(tmp3, tmp3, -8);
+      __ daddiu(AT, tmp3, -8);
+      __ bgez(AT, l_3);
+      __ delayed()->sw(tmp6, tmp2, -4);
+    }
+
+    __ bind(l_1);
+    // do single element copy (8 bit), can this happen?
+    { // FasterArrayCopy
+      __ daddiu(AT, tmp3, -3);
+      __ blez(AT, l_4);
+      __ delayed()->nop();
+
+      __ bind(l_5);
+      __ lhu(AT, tmp1, 0);
+      __ lhu(tmp4, tmp1, 2);
+      __ lhu(tmp5, tmp1, 4);
+      __ lhu(tmp6, tmp1, 6);
+      __ sh(AT, tmp2, 0);
+      __ sh(tmp4, tmp2, 2);
+      __ sh(tmp5, tmp2, 4);
+      __ daddiu(tmp1, tmp1, 8);
+      __ daddiu(tmp2, tmp2, 8);
+      __ daddiu(tmp3, tmp3, -4);
+      __ daddiu(AT, tmp3, -4);
+      __ bgez(AT, l_5);
+      __ delayed()->sh(tmp6, tmp2, -2);
+    }
+    // single element
+    __ bind(l_4);
+
+    __ pop(tmp6);
+    __ pop(tmp5);
+    __ pop(tmp4);
+
+    __ bind(l_14);
+    { // FasterArrayCopy
+      __ beq(R0, tmp3, l_13);
+      __ delayed()->nop();
+
+      __ bind(l_12);
+      __ lhu(AT, tmp1, 0);
+      __ sh(AT, tmp2, 0);
+      __ daddiu(tmp1, tmp1, 2);
+      __ daddiu(tmp2, tmp2, 2);
+      __ daddiu(tmp3, tmp3, -1);
+      __ daddiu(AT, tmp3, -1);
+      __ bgez(AT, l_12);
+      __ delayed()->nop();
+    }
+
+    __ bind(l_13);
+    __ pop(tmp3);
+    __ pop(tmp2);
+    __ pop(tmp1);
+
+    __ jr(RA);
+    __ delayed()->nop();
+
+    __ bind(l_debug);
+    __ stop("generate_disjoint_short_copy should not reach here");
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
+  // let the hardware handle it.  The two or four words within dwords
+  // or qwords that span cache line boundaries will still be loaded
+  // and stored atomically.
+  //
+  address generate_conjoint_short_copy(bool aligned, const char *name) {
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
+
+    address nooverlap_target = aligned ?
+            StubRoutines::arrayof_jshort_disjoint_arraycopy() :
+            StubRoutines::jshort_disjoint_arraycopy();
+
+    array_overlap_test(nooverlap_target, 1);
+
+    const Register from      = A0;   // source array address
+    const Register to        = A1;   // destination array address
+    const Register count     = A2;   // elements count
+    const Register end_from  = T3;   // source array end address
+    const Register end_to    = T0;   // destination array end address
+    const Register end_count = T1;   // destination array end address
+
+    __ push(end_from);
+    __ push(end_to);
+    __ push(end_count);
+    __ push(T8);
+
+    // copy from high to low
+    __ move(end_count, count);
+    __ sll(AT, end_count, Address::times_2);
+    __ daddu(end_from, from, AT);
+    __ daddu(end_to, to, AT);
+
+    // If end_from and end_to has differante alignment, unaligned copy is performed.
+    __ andi(AT, end_from, 3);
+    __ andi(T8, end_to, 3);
+    __ bne(AT, T8, l_copy_short);
+    __ delayed()->nop();
+
+    // First deal with the unaligned data at the top.
+    __ bind(l_unaligned);
+    __ beq(end_count, R0, l_exit);
+    __ delayed()->nop();
+
+    __ andi(AT, end_from, 3);
+    __ bne(AT, R0, l_from_unaligned);
+    __ delayed()->nop();
+
+    __ andi(AT, end_to, 3);
+    __ beq(AT, R0, l_4_bytes_aligned);
+    __ delayed()->nop();
+
+    // Copy 1 element if necessary to align to 4 bytes.
+    __ bind(l_from_unaligned);
+    __ lhu(AT, end_from, -2);
+    __ sh(AT, end_to, -2);
+    __ daddiu(end_from, end_from, -2);
+    __ daddiu(end_to, end_to, -2);
+    __ daddiu(end_count, end_count, -1);
+    __ b(l_unaligned);
+    __ delayed()->nop();
+
+    // now end_to, end_from point to 4-byte aligned high-ends
+    //     end_count contains byte count that is not copied.
+    // copy 4 bytes at a time
+    __ bind(l_4_bytes_aligned);
+
+    __ daddiu(AT, end_count, -1);
+    __ blez(AT, l_copy_short);
+    __ delayed()->nop();
+
+    __ lw(AT, end_from, -4);
+    __ sw(AT, end_to, -4);
+    __ addiu(end_from, end_from, -4);
+    __ addiu(end_to, end_to, -4);
+    __ addiu(end_count, end_count, -2);
+    __ b(l_4_bytes_aligned);
+    __ delayed()->nop();
+
+    // copy 1 element at a time
+    __ bind(l_copy_short);
+    __ beq(end_count, R0, l_exit);
+    __ delayed()->nop();
+    __ lhu(AT, end_from, -2);
+    __ sh(AT, end_to, -2);
+    __ daddiu(end_from, end_from, -2);
+    __ daddiu(end_to, end_to, -2);
+    __ daddiu(end_count, end_count, -1);
+    __ b(l_copy_short);
+    __ delayed()->nop();
+
+    __ bind(l_exit);
+    __ pop(T8);
+    __ pop(end_count);
+    __ pop(end_to);
+    __ pop(end_from);
+    __ jr(RA);
+    __ delayed()->nop();
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  // Side Effects:
+  //   disjoint_int_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_int_oop_copy().
+  //
+  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
+    Label l_3, l_4, l_5, l_6, l_7;
+    StubCodeMark mark(this, "StubRoutines", name);
+
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+    __ push(T3);
+    __ push(T0);
+    __ push(T1);
+    __ push(T8);
+    __ push(T9);
+    __ move(T1, A2);
+    __ move(T3, A0);
+    __ move(T0, A1);
+
+    if (is_oop) {
+      gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
+    }
+
+    if(!aligned) {
+      __ xorr(AT, T3, T0);
+      __ andi(AT, AT, 7);
+      __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
+      __ delayed()->nop();
+
+      __ andi(AT, T3, 7);
+      __ beq(AT, R0, l_6); //copy 2 elements each time
+      __ delayed()->nop();
+
+      __ lw(AT, T3, 0);
+      __ daddiu(T1, T1, -1);
+      __ sw(AT, T0, 0);
+      __ daddiu(T3, T3, 4);
+      __ daddiu(T0, T0, 4);
+    }
+
+    {
+      __ bind(l_6);
+      __ daddiu(AT, T1, -1);
+      __ blez(AT, l_5);
+      __ delayed()->nop();
+
+      __ bind(l_7);
+      __ ld(AT, T3, 0);
+      __ sd(AT, T0, 0);
+      __ daddiu(T3, T3, 8);
+      __ daddiu(T0, T0, 8);
+      __ daddiu(T1, T1, -2);
+      __ daddiu(AT, T1, -2);
+      __ bgez(AT, l_7);
+      __ delayed()->nop();
+    }
+
+    __ bind(l_5);
+    __ beq(T1, R0, l_4);
+    __ delayed()->nop();
+
+    __ align(16);
+    __ bind(l_3);
+    __ lw(AT, T3, 0);
+    __ sw(AT, T0, 0);
+    __ addiu(T3, T3, 4);
+    __ addiu(T0, T0, 4);
+    __ addiu(T1, T1, -1);
+    __ bne(T1, R0, l_3);
+    __ delayed()->nop();
+
+    // exit
+    __ bind(l_4);
+    if (is_oop) {
+      gen_write_ref_array_post_barrier(A1, A2, T1);
+    }
+    __ pop(T9);
+    __ pop(T8);
+    __ pop(T1);
+    __ pop(T0);
+    __ pop(T3);
+    __ jr(RA);
+    __ delayed()->nop();
+
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
+    Label l_2, l_4;
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+    address nooverlap_target;
+
+    if (is_oop) {
+      nooverlap_target = aligned ?
+              StubRoutines::arrayof_oop_disjoint_arraycopy() :
+              StubRoutines::oop_disjoint_arraycopy();
+    } else {
+      nooverlap_target = aligned ?
+              StubRoutines::arrayof_jint_disjoint_arraycopy() :
+              StubRoutines::jint_disjoint_arraycopy();
+    }
+
+    array_overlap_test(nooverlap_target, 2);
+
+    if (is_oop) {
+      gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
+    }
+
+    __ push(T3);
+    __ push(T0);
+    __ push(T1);
+    __ push(T8);
+    __ push(T9);
+
+    __ move(T1, A2);
+    __ move(T3, A0);
+    __ move(T0, A1);
+
+    // T3: source array address
+    // T0: destination array address
+    // T1: element count
+
+    __ sll(AT, T1, Address::times_4);
+    __ addu(AT, T3, AT);
+    __ daddiu(T3, AT, -4);
+    __ sll(AT, T1, Address::times_4);
+    __ addu(AT, T0, AT);
+    __ daddiu(T0, AT, -4);
+
+    __ beq(T1, R0, l_4);
+    __ delayed()->nop();
+
+    __ align(16);
+    __ bind(l_2);
+    __ lw(AT, T3, 0);
+    __ sw(AT, T0, 0);
+    __ addiu(T3, T3, -4);
+    __ addiu(T0, T0, -4);
+    __ addiu(T1, T1, -1);
+    __ bne(T1, R0, l_2);
+    __ delayed()->nop();
+
+    __ bind(l_4);
+    if (is_oop) {
+      gen_write_ref_array_post_barrier(A1, A2, T1);
+    }
+    __ pop(T9);
+    __ pop(T8);
+    __ pop(T1);
+    __ pop(T0);
+    __ pop(T3);
+    __ jr(RA);
+    __ delayed()->nop();
+
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  // Side Effects:
+  //   disjoint_int_copy_entry is set to the no-overlap entry point
+  //   used by generate_conjoint_int_oop_copy().
+  //
+  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
+    Label l_3, l_4;
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    if (is_oop) {
+      gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
+    }
+
+    __ push(T3);
+    __ push(T0);
+    __ push(T1);
+    __ push(T8);
+    __ push(T9);
+
+    __ move(T1, A2);
+    __ move(T3, A0);
+    __ move(T0, A1);
+
+    // T3: source array address
+    // T0: destination array address
+    // T1: element count
+
+    __ beq(T1, R0, l_4);
+    __ delayed()->nop();
+
+    __ align(16);
+    __ bind(l_3);
+    __ ld(AT, T3, 0);
+    __ sd(AT, T0, 0);
+    __ addiu(T3, T3, 8);
+    __ addiu(T0, T0, 8);
+    __ addiu(T1, T1, -1);
+    __ bne(T1, R0, l_3);
+    __ delayed()->nop();
+
+    // exit
+    __ bind(l_4);
+    if (is_oop) {
+      gen_write_ref_array_post_barrier(A1, A2, T1);
+    }
+    __ pop(T9);
+    __ pop(T8);
+    __ pop(T1);
+    __ pop(T0);
+    __ pop(T3);
+    __ jr(RA);
+    __ delayed()->nop();
+    return start;
+  }
+
+  // Arguments:
+  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+  //             ignored
+  //   is_oop  - true => oop array, so generate store check code
+  //   name    - stub name string
+  //
+  // Inputs:
+  //   c_rarg0   - source array address
+  //   c_rarg1   - destination array address
+  //   c_rarg2   - element count, treated as ssize_t, can be zero
+  //
+  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+  // the hardware handle it.  The two dwords within qwords that span
+  // cache line boundaries will still be loaded and stored atomicly.
+  //
+  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
+    Label l_2, l_4;
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+    address nooverlap_target;
+
+    if (is_oop) {
+      nooverlap_target = aligned ?
+              StubRoutines::arrayof_oop_disjoint_arraycopy() :
+              StubRoutines::oop_disjoint_arraycopy();
+    } else {
+      nooverlap_target = aligned ?
+              StubRoutines::arrayof_jlong_disjoint_arraycopy() :
+              StubRoutines::jlong_disjoint_arraycopy();
+    }
+
+    array_overlap_test(nooverlap_target, 3);
+
+    if (is_oop) {
+      gen_write_ref_array_pre_barrier(A1, A2, dest_uninitialized);
+    }
+
+    __ push(T3);
+    __ push(T0);
+    __ push(T1);
+    __ push(T8);
+    __ push(T9);
+
+    __ move(T1, A2);
+    __ move(T3, A0);
+    __ move(T0, A1);
+
+    __ sll(AT, T1, Address::times_8);
+    __ addu(AT, T3, AT);
+    __ daddiu(T3, AT, -8);
+    __ sll(AT, T1, Address::times_8);
+    __ addu(AT, T0, AT);
+    __ daddiu(T0, AT, -8);
+
+    __ beq(T1, R0, l_4);
+    __ delayed()->nop();
+
+    __ align(16);
+    __ bind(l_2);
+    __ ld(AT, T3, 0);
+    __ sd(AT, T0, 0);
+    __ addiu(T3, T3, -8);
+    __ addiu(T0, T0, -8);
+    __ addiu(T1, T1, -1);
+    __ bne(T1, R0, l_2);
+    __ delayed()->nop();
+
+    // exit
+    __ bind(l_4);
+    if (is_oop) {
+      gen_write_ref_array_post_barrier(A1, A2, T1);
+    }
+    __ pop(T9);
+    __ pop(T8);
+    __ pop(T1);
+    __ pop(T0);
+    __ pop(T3);
+    __ jr(RA);
+    __ delayed()->nop();
+    return start;
+  }
+
+  //FIXME
+  address generate_disjoint_long_copy(bool aligned, const char *name) {
+    Label l_1, l_2;
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    __ move(T1, A2);
+    __ move(T3, A0);
+    __ move(T0, A1);
+    __ push(T3);
+    __ push(T0);
+    __ push(T1);
+    __ b(l_2);
+    __ delayed()->nop();
+    __ align(16);
+    __ bind(l_1);
+    __ ld(AT, T3, 0);
+    __ sd (AT, T0, 0);
+    __ addiu(T3, T3, 8);
+    __ addiu(T0, T0, 8);
+    __ bind(l_2);
+    __ addiu(T1, T1, -1);
+    __ bgez(T1, l_1);
+    __ delayed()->nop();
+    __ pop(T1);
+    __ pop(T0);
+    __ pop(T3);
+    __ jr(RA);
+    __ delayed()->nop();
+    return start;
+  }
+
+
+  address generate_conjoint_long_copy(bool aligned, const char *name) {
+    Label l_1, l_2;
+    StubCodeMark mark(this, "StubRoutines", name);
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+    address nooverlap_target = aligned ?
+      StubRoutines::arrayof_jlong_disjoint_arraycopy() :
+      StubRoutines::jlong_disjoint_arraycopy();
+    array_overlap_test(nooverlap_target, 3);
+
+    __ push(T3);
+    __ push(T0);
+    __ push(T1);
+
+    __ move(T1, A2);
+    __ move(T3, A0);
+    __ move(T0, A1);
+    __ sll(AT, T1, Address::times_8);
+    __ addu(AT, T3, AT);
+    __ daddiu(T3, AT, -8);
+    __ sll(AT, T1, Address::times_8);
+    __ addu(AT, T0, AT);
+    __ daddiu(T0, AT, -8);
+
+    __ b(l_2);
+    __ delayed()->nop();
+    __ align(16);
+    __ bind(l_1);
+    __ ld(AT, T3, 0);
+    __ sd (AT, T0, 0);
+    __ addiu(T3, T3, -8);
+    __ addiu(T0, T0,-8);
+    __ bind(l_2);
+    __ addiu(T1, T1, -1);
+    __ bgez(T1, l_1);
+    __ delayed()->nop();
+    __ pop(T1);
+    __ pop(T0);
+    __ pop(T3);
+    __ jr(RA);
+    __ delayed()->nop();
+    return start;
+  }
+
+  void generate_arraycopy_stubs() {
+    if (UseCompressedOops) {
+      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_int_oop_copy(false, true,
+                                                                                      "oop_disjoint_arraycopy");
+      StubRoutines::_oop_arraycopy                   = generate_conjoint_int_oop_copy(false, true,
+                                                                                      "oop_arraycopy");
+      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_int_oop_copy(false, true,
+                                                                                      "oop_disjoint_arraycopy_uninit", true);
+      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_int_oop_copy(false, true,
+                                                                                      "oop_arraycopy_uninit", true);
+    } else {
+      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_long_oop_copy(false, true,
+                                                                                       "oop_disjoint_arraycopy");
+      StubRoutines::_oop_arraycopy                   = generate_conjoint_long_oop_copy(false, true,
+                                                                                       "oop_arraycopy");
+      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_long_oop_copy(false, true,
+                                                                                       "oop_disjoint_arraycopy_uninit", true);
+      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_long_oop_copy(false, true,
+                                                                                       "oop_arraycopy_uninit", true);
+    }
+
+    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
+    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
+    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
+    StubRoutines::_jlong_disjoint_arraycopy          = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
+
+    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
+    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
+    StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
+    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
+
+    // We don't generate specialized code for HeapWord-aligned source
+    // arrays, so just use the code we've already generated
+    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
+    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
+
+    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
+    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
+
+    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
+    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
+
+    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
+    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
+
+    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
+    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
+
+    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
+    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
+  }
+
+  // add a function to implement SafeFetch32 and SafeFetchN
+  void generate_safefetch(const char* name, int size, address* entry,
+                          address* fault_pc, address* continuation_pc) {
+    // safefetch signatures:
+    //   int      SafeFetch32(int*      adr, int      errValue);
+    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
+    //
+    // arguments:
+    //   A0 = adr
+    //   A1 = errValue
+    //
+    // result:
+    //   PPC_RET  = *adr or errValue
+
+    StubCodeMark mark(this, "StubRoutines", name);
+
+    // Entry point, pc or function descriptor.
+    *entry = __ pc();
+
+    // Load *adr into A1, may fault.
+    *fault_pc = __ pc();
+    switch (size) {
+      case 4:
+        // int32_t
+        __ lw(A1, A0, 0);
+        break;
+      case 8:
+        // int64_t
+        __ ld(A1, A0, 0);
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+
+    // return errValue or *adr
+    *continuation_pc = __ pc();
+    __ addu(V0,A1,R0);
+    __ jr(RA);
+    __ delayed()->nop();
+  }
+
+
+#undef __
+#define __ masm->
+
+  // Continuation point for throwing of implicit exceptions that are
+  // not handled in the current activation. Fabricates an exception
+  // oop and initiates normal exception dispatching in this
+  // frame. Since we need to preserve callee-saved values (currently
+  // only for C2, but done for C1 as well) we need a callee-saved oop
+  // map and therefore have to make these stubs into RuntimeStubs
+  // rather than BufferBlobs.  If the compiler needs all registers to
+  // be preserved between the fault point and the exception handler
+  // then it must assume responsibility for that in
+  // AbstractCompiler::continuation_for_implicit_null_exception or
+  // continuation_for_implicit_division_by_zero_exception. All other
+  // implicit exceptions (e.g., NullPointerException or
+  // AbstractMethodError on entry) are either at call sites or
+  // otherwise assume that stack unwinding will be initiated, so
+  // caller saved registers were assumed volatile in the compiler.
+  address generate_throw_exception(const char* name,
+                                   address runtime_entry,
+                                   bool restore_saved_exception_pc) {
+    // Information about frame layout at time of blocking runtime call.
+    // Note that we only have to preserve callee-saved registers since
+    // the compilers are responsible for supplying a continuation point
+    // if they expect all registers to be preserved.
+    enum layout {
+      thread_off,    // last_java_sp
+      S7_off,        // callee saved register      sp + 1
+      S6_off,        // callee saved register      sp + 2
+      S5_off,        // callee saved register      sp + 3
+      S4_off,        // callee saved register      sp + 4
+      S3_off,        // callee saved register      sp + 5
+      S2_off,        // callee saved register      sp + 6
+      S1_off,        // callee saved register      sp + 7
+      S0_off,        // callee saved register      sp + 8
+      FP_off,
+      ret_address,
+      framesize
+    };
+
+    int insts_size = 2048;
+    int locs_size  = 32;
+
+    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
+    //  NULL, NULL, NULL, false, NULL, name, false);
+    CodeBuffer code (name , insts_size, locs_size);
+    OopMapSet* oop_maps  = new OopMapSet();
+    MacroAssembler* masm = new MacroAssembler(&code);
+
+    address start = __ pc();
+
+    // This is an inlined and slightly modified version of call_VM
+    // which has the ability to fetch the return PC out of
+    // thread-local storage and also sets up last_Java_sp slightly
+    // differently than the real call_VM
+#ifndef OPT_THREAD
+    Register java_thread = TREG;
+    __ get_thread(java_thread);
+#else
+    Register java_thread = TREG;
+#endif
+    if (restore_saved_exception_pc) {
+      __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
+    }
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
+    __ sd(S0, SP, S0_off * wordSize);
+    __ sd(S1, SP, S1_off * wordSize);
+    __ sd(S2, SP, S2_off * wordSize);
+    __ sd(S3, SP, S3_off * wordSize);
+    __ sd(S4, SP, S4_off * wordSize);
+    __ sd(S5, SP, S5_off * wordSize);
+    __ sd(S6, SP, S6_off * wordSize);
+    __ sd(S7, SP, S7_off * wordSize);
+
+    int frame_complete = __ pc() - start;
+    // push java thread (becomes first argument of C function)
+    __ sd(java_thread, SP, thread_off * wordSize);
+    if (java_thread != A0)
+      __ move(A0, java_thread);
+
+    // Set up last_Java_sp and last_Java_fp
+    __ set_last_Java_frame(java_thread, SP, FP, NULL);
+    // Align stack
+    __ set64(AT, -(StackAlignmentInBytes));
+    __ andr(SP, SP, AT);
+
+    __ relocate(relocInfo::internal_pc_type);
+    {
+      intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
+      __ patchable_set48(AT, save_pc);
+    }
+    __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
+
+    // Call runtime
+    __ call(runtime_entry);
+    __ delayed()->nop();
+    // Generate oop map
+    OopMap* map =  new OopMap(framesize, 0);
+    oop_maps->add_gc_map(__ offset(),  map);
+
+    // restore the thread (cannot use the pushed argument since arguments
+    // may be overwritten by C code generated by an optimizing compiler);
+    // however can use the register value directly if it is callee saved.
+#ifndef OPT_THREAD
+    __ get_thread(java_thread);
+#endif
+
+    __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
+    __ reset_last_Java_frame(java_thread, true);
+
+    // Restore callee save registers.  This must be done after resetting the Java frame
+    __ ld(S0, SP, S0_off * wordSize);
+    __ ld(S1, SP, S1_off * wordSize);
+    __ ld(S2, SP, S2_off * wordSize);
+    __ ld(S3, SP, S3_off * wordSize);
+    __ ld(S4, SP, S4_off * wordSize);
+    __ ld(S5, SP, S5_off * wordSize);
+    __ ld(S6, SP, S6_off * wordSize);
+    __ ld(S7, SP, S7_off * wordSize);
+
+    // discard arguments
+    __ move(SP, FP); // epilog
+    __ pop(FP);
+    // check for pending exceptions
+#ifdef ASSERT
+    Label L;
+    __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
+    __ bne(AT, R0, L);
+    __ delayed()->nop();
+    __ should_not_reach_here();
+    __ bind(L);
+#endif //ASSERT
+    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
+    __ delayed()->nop();
+    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
+                                                      &code,
+                                                      frame_complete,
+                                                      framesize,
+                                                      oop_maps, false);
+    return stub->entry_point();
+  }
+
+  // Initialization
+  void generate_initial() {
+    // Generates all stubs and initializes the entry points
+
+    //-------------------------------------------------------------
+    //-----------------------------------------------------------
+    // entry points that exist in all platforms
+    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
+    // than the disadvantage of having a much more complicated generator structure.
+    // See also comment in stubRoutines.hpp.
+    StubRoutines::_forward_exception_entry = generate_forward_exception();
+    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
+    // is referenced by megamorphic call
+    StubRoutines::_catch_exception_entry = generate_catch_exception();
+
+    StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
+
+    StubRoutines::_throw_StackOverflowError_entry = generate_throw_exception("StackOverflowError throw_exception",
+                                                                              CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),   false);
+  }
+
+  void generate_all() {
+    // Generates all stubs and initializes the entry points
+
+    // These entry points require SharedInfo::stack0 to be set up in
+    // non-core builds and need to be relocatable, so they each
+    // fabricate a RuntimeStub internally.
+    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
+                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
+
+    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
+                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
+
+    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
+                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
+
+    // entry points that are platform specific
+
+    // support for verify_oop (must happen after universe_init)
+    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
+#ifndef CORE
+    // arraycopy stubs used by compilers
+    generate_arraycopy_stubs();
+#endif
+
+    // Safefetch stubs.
+    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
+                                                       &StubRoutines::_safefetch32_fault_pc,
+                                                       &StubRoutines::_safefetch32_continuation_pc);
+    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
+                                                       &StubRoutines::_safefetchN_fault_pc,
+                                                       &StubRoutines::_safefetchN_continuation_pc);
+
+    if (UseMontgomeryMultiplyIntrinsic) {
+      StubRoutines::_montgomeryMultiply
+        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
+    }
+    if (UseMontgomerySquareIntrinsic) {
+      StubRoutines::_montgomerySquare
+        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
+    }
+  }
+
+ public:
+  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
+    if (all) {
+      generate_all();
+    } else {
+      generate_initial();
+    }
+  }
+}; // end class declaration
+
+void StubGenerator_generate(CodeBuffer* code, bool all) {
+  StubGenerator g(code, all);
+}
diff --git a/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp
new file mode 100644
index 00000000000..733a48b8897
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+
+// a description of how to extend it, see the stubRoutines.hpp file.
+
+//find the last fp value
+address StubRoutines::gs2::_call_stub_compiled_return                        = NULL;
diff --git a/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp
new file mode 100644
index 00000000000..920c08844e1
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/stubRoutines_mips_64.hpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
+#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
+
+// This file holds the platform specific parts of the StubRoutines
+// definition. See stubRoutines.hpp for a description on how to
+// extend it.
+
+static bool    returns_to_call_stub(address return_pc){
+  return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return();
+}
+
+enum platform_dependent_constants {
+  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
+  code_size2 = 40000    // simply increase if too small (assembler will crash if too small)
+};
+
+class gs2 {
+  friend class StubGenerator;
+  friend class VMStructs;
+ private:
+  // If we call compiled code directly from the call stub we will
+  // need to adjust the return back to the call stub to a specialized
+  // piece of code that can handle compiled results and cleaning the fpu
+  // stack. The variable holds that location.
+  static address _call_stub_compiled_return;
+
+public:
+  // Call back points for traps in compiled code
+  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
+  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
+
+};
+
+#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
diff --git a/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp b/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp
new file mode 100644
index 00000000000..a83c3728f87
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/templateInterpreterGenerator_mips.hpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP
+#define CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP
+
+ protected:
+
+ void generate_fixed_frame(bool native_call);
+
+ // address generate_asm_interpreter_entry(bool synchronized);
+
+#endif // CPU_MIPS_VM_TEMPLATEINTERPRETERGENERATOR_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp b/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp
new file mode 100644
index 00000000000..204f1b2f216
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/templateInterpreter_mips.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP
+#define CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP
+
+
+  protected:
+
+  // Size of interpreter code.  Increase if too small.  Interpreter will
+  // fail with a guarantee ("not enough space for interpreter generation");
+  // if too small.
+  // Run with +PrintInterpreter to get the VM to print out the size.
+  // Max size with JVMTI
+  // The sethi() instruction generates lots more instructions when shell
+  // stack limit is unlimited, so that's why this is much bigger.
+  const static int InterpreterCodeSize = 500 * K;
+
+#endif // CPU_MIPS_VM_TEMPLATEINTERPRETER_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp b/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp
new file mode 100644
index 00000000000..0cc5d33070f
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/templateInterpreter_mips_64.cpp
@@ -0,0 +1,2306 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterGenerator.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+#ifndef CC_INTERP
+
+// asm based interpreter deoptimization helpers
+int AbstractInterpreter::size_activation(int max_stack,
+                                         int temps,
+                                         int extra_args,
+                                         int monitors,
+                                         int callee_params,
+                                         int callee_locals,
+                                         bool is_top_frame) {
+  // Note: This calculation must exactly parallel the frame setup
+  // in AbstractInterpreterGenerator::generate_method_entry.
+
+  // fixed size of an interpreter frame:
+  int overhead = frame::sender_sp_offset -
+                 frame::interpreter_frame_initial_sp_offset;
+  // Our locals were accounted for by the caller (or last_frame_adjust
+  // on the transistion) Since the callee parameters already account
+  // for the callee's params we only need to account for the extra
+  // locals.
+  int size = overhead +
+         (callee_locals - callee_params)*Interpreter::stackElementWords +
+         monitors * frame::interpreter_frame_monitor_size() +
+         temps* Interpreter::stackElementWords + extra_args;
+
+  return size;
+}
+
+
+const int Interpreter::return_sentinel = 0xfeedbeed;
+const int method_offset = frame::interpreter_frame_method_offset * wordSize;
+const int bci_offset    = frame::interpreter_frame_bcx_offset    * wordSize;
+const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
+
+//-----------------------------------------------------------------------------
+
+address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
+  address entry = __ pc();
+
+#ifdef ASSERT
+  {
+    Label L;
+    __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    __ subu(T1, T1, SP); // T1 = maximal sp for current fp
+    __ bgez(T1, L);     // check if frame is complete
+    __ delayed()->nop();
+    __ stop("interpreter frame not set up");
+    __ bind(L);
+  }
+#endif // ASSERT
+  // Restore bcp under the assumption that the current frame is still
+  // interpreted
+  // FIXME: please change the func restore_bcp
+  // S0 is the conventional register for bcp
+  __ restore_bcp();
+
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  // throw exception
+  // FIXME: why do not pass parameter thread ?
+  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(
+        const char* name) {
+  address entry = __ pc();
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  __ li(A1, (long)name);
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
+  address entry = __ pc();
+
+  // expression stack must be empty before entering the VM if an
+  // exception happened
+  __ empty_expression_stack();
+  __ empty_FPU_stack();
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_exception_handler_common(
+        const char* name, const char* message, bool pass_oop) {
+  assert(!pass_oop || message == NULL, "either oop or message but not both");
+  address entry = __ pc();
+
+  // expression stack must be empty before entering the VM if an exception happened
+  __ empty_expression_stack();
+  // setup parameters
+  __ li(A1, (long)name);
+  if (pass_oop) {
+    __ call_VM(V0,
+    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
+  } else {
+    __ li(A2, (long)message);
+    __ call_VM(V0,
+    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
+  }
+  // throw exception
+  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
+  __ delayed()->nop();
+  return entry;
+}
+
+
+address TemplateInterpreterGenerator::generate_continuation_for(TosState state) {
+  address entry = __ pc();
+  // NULL last_sp until next java call
+  __ sd(R0,Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+  __ dispatch_next(state);
+  return entry;
+}
+
+
+address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
+
+  address entry = __ pc();
+
+  // Restore stack bottom in case i2c adjusted stack
+  __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
+  // and NULL it as marker that sp is now tos until next java call
+  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+  __ restore_bcp();
+  __ restore_locals();
+
+  // mdp: T8
+  // ret: FSR
+  // tmp: T9
+  if (state == atos) {
+    Register mdp = T8;
+    Register tmp = T9;
+    __ profile_return_type(mdp, FSR, tmp);
+  }
+
+
+  const Register cache = T9;
+  const Register index = T3;
+  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
+
+  const Register flags = cache;
+  __ dsll(AT, index, Address::times_ptr);
+  __ daddu(AT, cache, AT);
+  __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
+  __ dsll(AT, flags, Interpreter::stackElementScale());
+  __ daddu(SP, SP, AT);
+
+  __ dispatch_next(state, step);
+
+  return entry;
+}
+
+
+address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
+                                                               int step) {
+  address entry = __ pc();
+  // NULL last_sp until next java call
+  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ restore_bcp();
+  __ restore_locals();
+  // handle exceptions
+  {
+    Label L;
+    const Register thread = TREG;
+#ifndef OPT_THREAD
+    __ get_thread(thread);
+#endif
+    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
+    __ beq(AT, R0, L);
+    __ delayed()->nop();
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
+    __ should_not_reach_here();
+    __ bind(L);
+  }
+  __ dispatch_next(state, step);
+  return entry;
+}
+
+int AbstractInterpreter::BasicType_as_index(BasicType type) {
+  int i = 0;
+  switch (type) {
+    case T_BOOLEAN: i = 0; break;
+    case T_CHAR   : i = 1; break;
+    case T_BYTE   : i = 2; break;
+    case T_SHORT  : i = 3; break;
+    case T_INT    : // fall through
+    case T_LONG   : // fall through
+    case T_VOID   : i = 4; break;
+    case T_FLOAT  : i = 5; break;
+    case T_DOUBLE : i = 6; break;
+    case T_OBJECT : // fall through
+    case T_ARRAY  : i = 7; break;
+    default       : ShouldNotReachHere();
+  }
+  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
+         "index out of bounds");
+  return i;
+}
+
+
+address TemplateInterpreterGenerator::generate_result_handler_for(
+        BasicType type) {
+  address entry = __ pc();
+  switch (type) {
+    case T_BOOLEAN: __ c2bool(V0);             break;
+    case T_CHAR   : __ andi(V0, V0, 0xFFFF);   break;
+    case T_BYTE   : __ sign_extend_byte (V0);  break;
+    case T_SHORT  : __ sign_extend_short(V0);  break;
+    case T_INT    : /* nothing to do */        break;
+    case T_FLOAT  : /* nothing to do */        break;
+    case T_DOUBLE : /* nothing to do */        break;
+    case T_OBJECT :
+    {
+       __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+      __ verify_oop(V0);         // and verify it
+    }
+                 break;
+    default       : ShouldNotReachHere();
+  }
+  __ jr(RA);                                  // return from result handler
+  __ delayed()->nop();
+  return entry;
+}
+
+address TemplateInterpreterGenerator::generate_safept_entry_for(
+        TosState state,
+        address runtime_entry) {
+  address entry = __ pc();
+  __ push(state);
+  __ call_VM(noreg, runtime_entry);
+  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
+  return entry;
+}
+
+
+
+// Helpers for commoning out cases in the various type of method entries.
+//
+
+
+// increment invocation count & check for overflow
+//
+// Note: checking for negative value instead of overflow
+//       so we have a 'sticky' overflow test
+//
+// Rmethod: method
+// T3     : invocation counter
+//
+void InterpreterGenerator::generate_counter_incr(
+        Label* overflow,
+        Label* profile_method,
+        Label* profile_method_continue) {
+  Label done;
+  if (TieredCompilation) {
+    int increment = InvocationCounter::count_increment;
+    int mask = ((1 << Tier0InvokeNotifyFreqLog)  - 1) << InvocationCounter::count_shift;
+    Label no_mdo;
+    if (ProfileInterpreter) {
+      // Are we profiling?
+      __ ld(FSR, Address(Rmethod, Method::method_data_offset()));
+      __ beq(FSR, R0, no_mdo);
+      __ delayed()->nop();
+      // Increment counter in the MDO
+      const Address mdo_invocation_counter(FSR, in_bytes(MethodData::invocation_counter_offset()) +
+                                                in_bytes(InvocationCounter::counter_offset()));
+      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
+      __ beq(R0, R0, done);
+      __ delayed()->nop();
+    }
+    __ bind(no_mdo);
+    // Increment counter in MethodCounters
+    const Address invocation_counter(FSR,
+                  MethodCounters::invocation_counter_offset() +
+                  InvocationCounter::counter_offset());
+    __ get_method_counters(Rmethod, FSR, done);
+    __ increment_mask_and_jump(invocation_counter, increment, mask, T3, false, Assembler::zero, overflow);
+    __ bind(done);
+  } else {
+    const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
+        + in_bytes(InvocationCounter::counter_offset()));
+    const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
+        + in_bytes(InvocationCounter::counter_offset()));
+
+    __ get_method_counters(Rmethod, FSR, done);
+
+    if (ProfileInterpreter) { // %%% Merge this into methodDataOop
+      __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
+      __ incrementl(T9, 1);
+      __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
+    }
+    // Update standard invocation counters
+    __ lw(T3, invocation_counter);
+    __ increment(T3, InvocationCounter::count_increment);
+    __ sw(T3, invocation_counter);  // save invocation count
+
+    __ lw(FSR, backedge_counter);  // load backedge counter
+    __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
+    __ andr(FSR, FSR, AT);
+
+    __ daddu(T3, T3, FSR);          // add both counters
+
+    if (ProfileInterpreter && profile_method != NULL) {
+      // Test to see if we should create a method data oop
+      if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
+        __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
+      } else {
+        __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
+        __ lw(AT, AT, 0);
+        __ slt(AT, T3, AT);
+      }
+
+      __ bne_far(AT, R0, *profile_method_continue);
+      __ delayed()->nop();
+
+      // if no method data exists, go to profile_method
+      __ test_method_data_pointer(FSR, *profile_method);
+    }
+
+    if (Assembler::is_simm16(CompileThreshold)) {
+      __ srl(AT, T3, InvocationCounter::count_shift);
+      __ slti(AT, AT, CompileThreshold);
+    } else {
+      __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
+      __ lw(AT, AT, 0);
+      __ slt(AT, T3, AT);
+    }
+
+    __ beq_far(AT, R0, *overflow);
+    __ delayed()->nop();
+    __ bind(done);
+  }
+}
+
+void InterpreterGenerator::generate_counter_overflow(Label* do_continue) {
+
+  // Asm interpreter on entry
+  // S7 - locals
+  // S0 - bcp
+  // Rmethod - method
+  // FP - interpreter frame
+
+  // On return (i.e. jump to entry_point)
+  // Rmethod - method
+  // RA - return address of interpreter caller
+  // tos - the last parameter to Java method
+  // SP - sender_sp
+
+
+  // the bcp is valid if and only if it's not null
+  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+      InterpreterRuntime::frequency_counter_overflow), R0);
+  __ ld(Rmethod, FP, method_offset);
+  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
+  __ b_far(*do_continue);
+  __ delayed()->nop();
+}
+
+// See if we've got enough room on the stack for locals plus overhead.
+// The expression stack grows down incrementally, so the normal guard
+// page mechanism will work for that.
+//
+// NOTE: Since the additional locals are also always pushed (wasn't
+// obvious in generate_method_entry) so the guard should work for them
+// too.
+//
+// Args:
+//      T2: number of additional locals this frame needs (what we must check)
+//      T0: Method*
+//
+void InterpreterGenerator::generate_stack_overflow_check(void) {
+  // see if we've got enough room on the stack for locals plus overhead.
+  // the expression stack grows down incrementally, so the normal guard
+  // page mechanism will work for that.
+  //
+  // Registers live on entry:
+  //
+  // T0: Method*
+  // T2: number of additional locals this frame needs (what we must check)
+
+  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
+  // generate_method_entry) so the guard should work for them too.
+  //
+
+  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
+
+  // total overhead size: entry_size + (saved fp thru expr stack bottom).
+  // be sure to change this if you add/subtract anything to/from the overhead area
+  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
+    + entry_size;
+
+  const int page_size = os::vm_page_size();
+
+  Label after_frame_check;
+
+  // see if the frame is greater than one page in size. If so,
+  // then we need to verify there is enough stack space remaining
+  // for the additional locals.
+  __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
+  __ slt(AT, AT, T2);
+  __ beq(AT, R0, after_frame_check);
+  __ delayed()->nop();
+
+  // compute sp as if this were going to be the last frame on
+  // the stack before the red zone
+#ifndef OPT_THREAD
+  Register thread = T1;
+  __ get_thread(thread);
+#else
+  Register thread = TREG;
+#endif
+
+  // locals + overhead, in bytes
+  __ dsll(T3, T2, Interpreter::stackElementScale());
+  __ daddiu(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
+
+#ifdef ASSERT
+  Label stack_base_okay, stack_size_okay;
+  // verify that thread stack base is non-zero
+  __ ld(AT, thread, in_bytes(Thread::stack_base_offset()));
+  __ bne(AT, R0, stack_base_okay);
+  __ delayed()->nop();
+  __ stop("stack base is zero");
+  __ bind(stack_base_okay);
+  // verify that thread stack size is non-zero
+  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));
+  __ bne(AT, R0, stack_size_okay);
+  __ delayed()->nop();
+  __ stop("stack size is zero");
+  __ bind(stack_size_okay);
+#endif
+
+  // Add stack base to locals and subtract stack size
+  __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
+  __ daddu(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
+  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
+  __ dsubu(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
+
+
+  // add in the redzone and yellow size
+  __ move(AT, (StackRedPages+StackYellowPages) * page_size);
+  __ addu(T3, T3, AT);
+
+  // check against the current stack bottom
+  __ slt(AT, T3, SP);
+  __ bne(AT, R0, after_frame_check);
+  __ delayed()->nop();
+
+  // Note: the restored frame is not necessarily interpreted.
+  // Use the shared runtime version of the StackOverflowError.
+  __ move(SP, Rsender);
+  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
+  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
+  __ delayed()->nop();
+
+  // all done with frame size check
+  __ bind(after_frame_check);
+}
+
+// Allocate monitor and lock method (asm interpreter)
+// Rmethod - Method*
+void InterpreterGenerator::lock_method(void) {
+  // synchronize method
+  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+
+#ifdef ASSERT
+  { Label L;
+    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
+    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
+    __ bne(T0, R0, L);
+    __ delayed()->nop();
+    __ stop("method doesn't need synchronization");
+    __ bind(L);
+  }
+#endif // ASSERT
+  // get synchronization object
+  {
+    Label done;
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
+    __ andi(T2, T0, JVM_ACC_STATIC);
+    __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0));
+    __ beq(T2, R0, done);
+    __ delayed()->nop();
+    __ ld(T0, Rmethod, in_bytes(Method::const_offset()));
+    __ ld(T0, T0, in_bytes(ConstMethod::constants_offset()));
+    __ ld(T0, T0, ConstantPool::pool_holder_offset_in_bytes());
+    __ ld(T0, T0, mirror_offset);
+    __ bind(done);
+  }
+  // add space for monitor & lock
+  __ daddiu(SP, SP, (-1) * entry_size);           // add space for a monitor entry
+  __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+  // set new monitor block top
+  __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
+  // FIXME: I do not know what lock_object will do and what it will need
+  __ move(c_rarg0, SP);      // object address
+  __ lock_object(c_rarg0);
+}
+
+// Generate a fixed interpreter frame. This is identical setup for
+// interpreted methods and for native methods hence the shared code.
+void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
+
+  // [ local var m-1      ] <--- sp
+  //   ...
+  // [ local var 0        ]
+  // [ argumnet word n-1  ] <--- T0(sender's sp)
+  //   ...
+  // [ argument word 0    ] <--- S7
+
+  // initialize fixed part of activation frame
+  // sender's sp in Rsender
+  int i = 0;
+  int frame_size = 9;
+#ifndef CORE
+  ++frame_size;
+#endif
+  __ daddiu(SP, SP, (-frame_size) * wordSize);
+  __ sd(RA, SP, (frame_size - 1) * wordSize);   // save return address
+  __ sd(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
+  __ daddiu(FP, SP, (frame_size - 2) * wordSize);
+  __ sd(Rsender, FP, (-++i) * wordSize);  // save sender's sp
+  __ sd(R0, FP,(-++i) * wordSize);       //save last_sp as null
+  __ sd(LVP, FP, (-++i) * wordSize);  // save locals offset
+  __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
+  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
+  __ sd(Rmethod, FP, (-++i) * wordSize);                              // save Method*
+#ifndef CORE
+  if (ProfileInterpreter) {
+    Label method_data_continue;
+    __ ld(AT, Rmethod,  in_bytes(Method::method_data_offset()));
+    __ beq(AT, R0, method_data_continue);
+    __ delayed()->nop();
+    __ daddiu(AT, AT, in_bytes(MethodData::data_offset()));
+    __ bind(method_data_continue);
+    __ sd(AT, FP,  (-++i) * wordSize);
+  } else {
+    __ sd(R0, FP, (-++i) * wordSize);
+  }
+#endif // !CORE
+
+  __ ld(T2, Rmethod, in_bytes(Method::const_offset()));
+  __ ld(T2, T2, in_bytes(ConstMethod::constants_offset()));
+  __ ld(T2, T2, ConstantPool::cache_offset_in_bytes());
+  __ sd(T2, FP, (-++i) * wordSize);                    // set constant pool cache
+  if (native_call) {
+    __ sd(R0, FP, (-++i) * wordSize);          // no bcp
+  } else {
+    __ sd(BCP, FP, (-++i) * wordSize);          // set bcp
+  }
+  __ sd(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
+  assert(i + 2 == frame_size, "i + 2 should be equal to frame_size");
+}
+
+// End of helpers
+
+// Various method entries
+//------------------------------------------------------------------------------------------------------------------------
+//
+//
+
+// Call an accessor method (assuming it is resolved, otherwise drop
+// into vanilla (slow path) entry
+address InterpreterGenerator::generate_accessor_entry(void) {
+
+  // Rmethod: Method*
+  // V0: receiver (preserve for slow entry into asm interpreter)
+  //  Rsender: senderSP must preserved for slow path, set SP to it on fast path
+
+  address entry_point = __ pc();
+  Label xreturn_path;
+  // do fastpath for resolved accessor methods
+  if (UseFastAccessorMethods) {
+    Label slow_path;
+    __ li(T2, SafepointSynchronize::address_of_state());
+    __ lw(AT, T2, 0);
+    __ daddiu(AT, AT, -(SafepointSynchronize::_not_synchronized));
+    __ bne(AT, R0, slow_path);
+    __ delayed()->nop();
+    // Code: _aload_0, _(i|a)getfield, _(i|a)return or any rewrites thereof;
+    // parameter size = 1
+    // Note: We can only use this code if the getfield has been resolved
+    //       and if we don't have a null-pointer exception => check for
+    //       these conditions first and use slow path if necessary.
+    // Rmethod: method
+    // V0: receiver
+
+    // [ receiver  ] <-- sp
+    __ ld(T0, SP, 0);
+
+    // check if local 0 != NULL and read field
+    __ beq(T0, R0, slow_path);
+    __ delayed()->nop();
+    __ ld(T2, Rmethod, in_bytes(Method::const_offset()));
+    __ ld(T2, T2, in_bytes(ConstMethod::constants_offset()));
+    // read first instruction word and extract bytecode @ 1 and index @ 2
+    __ ld(T3, Rmethod, in_bytes(Method::const_offset()));
+    __ lw(T3, T3, in_bytes(ConstMethod::codes_offset()));
+    // Shift codes right to get the index on the right.
+    // The bytecode fetched looks like <index><0xb4><0x2a>
+    __ dsrl(T3, T3, 2 * BitsPerByte);
+    // FIXME: maybe it's wrong
+    __ dsll(T3, T3, exact_log2(in_words(ConstantPoolCacheEntry::size())));
+    __ ld(T2, T2, ConstantPool::cache_offset_in_bytes());
+
+    // T0: local 0
+    // Rmethod: method
+    // V0: receiver - do not destroy since it is needed for slow path!
+    // T1: scratch use which register instead ?
+    // T3: constant pool cache index
+    // T2: constant pool cache
+    // Rsender: send's sp
+    // check if getfield has been resolved and read constant pool cache entry
+    // check the validity of the cache entry by testing whether _indices field
+    // contains Bytecode::_getfield in b1 byte.
+    assert(in_words(ConstantPoolCacheEntry::size()) == 4, "adjust shift below");
+
+    __ dsll(T8, T3, Address::times_8);
+    __ move(T1, in_bytes(ConstantPoolCache::base_offset()
+    + ConstantPoolCacheEntry::indices_offset()));
+    __ daddu(T1, T8, T1);
+    __ daddu(T1, T1, T2);
+    __ lw(T1, T1, 0);
+    __ dsrl(T1, T1, 2 * BitsPerByte);
+    __ andi(T1, T1, 0xFF);
+    __ daddiu(T1, T1, (-1) * Bytecodes::_getfield);
+    __ bne(T1, R0, slow_path);
+    __ delayed()->nop();
+
+    // Note: constant pool entry is not valid before bytecode is resolved
+
+    __ move(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+    __ daddu(T1, T1, T8);
+    __ daddu(T1, T1, T2);
+    __ lw(AT, T1, 0);
+
+    __ move(T1, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+    __ daddu(T1, T1, T8);
+    __ daddu(T1, T1, T2);
+    __ lw(T3, T1, 0);
+
+    Label notByte, notBool, notShort, notChar, notObj;
+
+    // Need to differentiate between igetfield, agetfield, bgetfield etc.
+    // because they are different sizes.
+    // Use the type from the constant pool cache
+    __ srl(T3, T3, ConstantPoolCacheEntry::tos_state_shift);
+    // Make sure we don't need to mask T3 for tosBits after the above shift
+    ConstantPoolCacheEntry::verify_tos_state_shift();
+    // btos = 0
+    __ bne(T3, R0, notByte);
+    __ delayed()->daddu(T0, T0, AT);
+
+    __ lb(V0, T0, 0);
+    __ b(xreturn_path);
+    __ delayed()->nop();
+
+    //ztos
+    __ bind(notByte);
+    __ daddiu(T1, T3, (-1) * ztos);
+    __ bne(T1, R0, notBool);
+    __ delayed()->nop();
+    __ lb(V0, T0, 0);
+    __ b(xreturn_path);
+    __ delayed()->nop();
+
+    //stos
+    __ bind(notBool);
+    __ daddiu(T1, T3, (-1) * stos);
+    __ bne(T1, R0, notShort);
+    __ delayed()->nop();
+    __ lh(V0, T0, 0);
+    __ b(xreturn_path);
+    __ delayed()->nop();
+
+    //ctos
+    __ bind(notShort);
+    __ daddiu(T1, T3, (-1) * ctos);
+    __ bne(T1, R0, notChar);
+    __ delayed()->nop();
+    __ lhu(V0, T0, 0);
+    __ b(xreturn_path);
+    __ delayed()->nop();
+
+    //atos
+    __ bind(notChar);
+    __ daddiu(T1, T3, (-1) * atos);
+    __ bne(T1, R0, notObj);
+    __ delayed()->nop();
+    //add for compressedoops
+    __ load_heap_oop(V0, Address(T0, 0));
+    __ b(xreturn_path);
+    __ delayed()->nop();
+
+    //itos
+    __ bind(notObj);
+#ifdef ASSERT
+    Label okay;
+    __ daddiu(T1, T3, (-1) * itos);
+    __ beq(T1, R0, okay);
+    __ delayed()->nop();
+    __ stop("what type is this?");
+    __ bind(okay);
+#endif // ASSERT
+    __ lw(V0, T0, 0);
+
+    __ bind(xreturn_path);
+
+    // _ireturn/_areturn
+    //FIXME
+    __ move(SP, Rsender);//FIXME, set sender's fp to SP
+    __ jr(RA);
+    __ delayed()->nop();
+
+    // generate a vanilla interpreter entry as the slow path
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+  } else {
+    (void) generate_normal_entry(false);
+  }
+
+  return entry_point;
+}
+
+// Method entry for java.lang.ref.Reference.get.
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#if INCLUDE_ALL_GCS
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    2. The slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:-
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_enty.
+  //
+  // Rmethod: Method*
+
+  // Rsender: senderSP must preserve for slow path, set SP to it on fast path (Rsender)
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  if (UseG1GC) {
+    Label slow_path;
+
+    // Check if local 0 != NULL
+    // If the receiver is null then it is OK to jump to the slow path.
+    __ ld(V0, SP, 0);
+
+    __ beq(V0, R0, slow_path);
+    __ delayed()->nop();
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+
+    // Load the value of the referent field.
+    const Address field_address(V0, referent_offset);
+    __ load_heap_oop(V0, field_address);
+
+    __ push(RA);
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    __ g1_write_barrier_pre(noreg /* obj */,
+                            V0 /* pre_val */,
+                            TREG /* thread */,
+                            Rmethod /* tmp */,
+                            true /* tosca_live */,
+                            true /* expand_call */);
+    __ pop(RA);
+
+    __ jr(RA);
+    __ delayed()->daddu(SP, Rsender, R0);      // set sp to sender sp
+
+    // generate a vanilla interpreter entry as the slow path
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+
+    return entry;
+  }
+#endif // INCLUDE_ALL_GCS
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
+// Interpreter stub for calling a native method. (asm interpreter)
+// This sets up a somewhat different looking stack for calling the
+// native method than the typical interpreter frame setup.
+address InterpreterGenerator::generate_native_entry(bool synchronized) {
+  // determine code generation flags
+  bool inc_counter  = UseCompiler || CountCompiledCalls;
+  // Rsender: sender's sp
+  // Rmethod: Method*
+  address entry_point = __ pc();
+
+#ifndef CORE
+  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
+  InvocationCounter::counter_offset()));
+#endif
+
+  // get parameter size (always needed)
+  // the size in the java stack
+  __ ld(V0, Rmethod, in_bytes(Method::const_offset()));
+  __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
+
+  // native calls don't need the stack size check since they have no expression stack
+  // and the arguments are already on the stack and we only add a handful of words
+  // to the stack
+
+  // Rmethod: Method*
+  // V0: size of parameters
+  // Layout of frame at this point
+  //
+  // [ argument word n-1  ] <--- sp
+  //   ...
+  // [ argument word 0    ]
+
+  // for natives the size of locals is zero
+
+  // compute beginning of parameters (S7)
+  __ dsll(LVP, V0, Address::times_8);
+  __ daddiu(LVP, LVP, (-1) * wordSize);
+  __ daddu(LVP, LVP, SP);
+
+
+  // add 2 zero-initialized slots for native calls
+  // 1 slot for native oop temp offset (setup via runtime)
+  // 1 slot for static native result handler3 (setup via runtime)
+  __ push2(R0, R0);
+
+  // Layout of frame at this point
+  // [ method holder mirror  ] <--- sp
+  // [ result type info      ]
+  // [ argument word n-1     ] <--- T0
+  //   ...
+  // [ argument word 0       ] <--- LVP
+
+
+#ifndef CORE
+  if (inc_counter) __ lw(T3, invocation_counter);  // (pre-)fetch invocation count
+#endif
+
+  // initialize fixed part of activation frame
+  generate_fixed_frame(true);
+  // after this function, the layout of frame is as following
+  //
+  // [ monitor block top        ] <--- sp ( the top monitor entry )
+  // [ byte code pointer (0)    ] (if native, bcp = 0)
+  // [ constant pool cache      ]
+  // [ Method*                  ]
+  // [ locals offset            ]
+  // [ sender's sp              ]
+  // [ sender's fp              ]
+  // [ return address           ] <--- fp
+  // [ method holder mirror     ]
+  // [ result type info         ]
+  // [ argumnet word n-1        ] <--- sender's sp
+  //   ...
+  // [ argument word 0          ] <--- S7
+
+
+  // make sure method is native & not abstract
+#ifdef ASSERT
+  __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
+  {
+    Label L;
+    __ andi(AT, T0, JVM_ACC_NATIVE);
+    __ bne(AT, R0, L);
+    __ delayed()->nop();
+    __ stop("tried to execute native method as non-native");
+    __ bind(L);
+  }
+  {
+    Label L;
+    __ andi(AT, T0, JVM_ACC_ABSTRACT);
+    __ beq(AT, R0, L);
+    __ delayed()->nop();
+    __ stop("tried to execute abstract method in interpreter");
+    __ bind(L);
+  }
+#endif
+
+  // Since at this point in the method invocation the exception handler
+  // would try to exit the monitor of synchronized methods which hasn't
+  // been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. The remove_activation will
+  // check this flag.
+  Register thread = TREG;
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ move(AT, (int)true);
+  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+#ifndef CORE
+  // increment invocation count & check for overflow
+  Label invocation_counter_overflow;
+  if (inc_counter) {
+    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
+  }
+
+  Label continue_after_compile;
+  __ bind(continue_after_compile);
+#endif // CORE
+
+  bang_stack_shadow_pages(true);
+
+  // reset the _do_not_unlock_if_synchronized flag
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+  // check for synchronized methods
+  // Must happen AFTER invocation_counter check and stack overflow check,
+  // so method is not locked if overflows.
+  if (synchronized) {
+    lock_method();
+  } else {
+    // no synchronization necessary
+#ifdef ASSERT
+    {
+      Label L;
+      __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
+      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
+      __ beq(AT, R0, L);
+      __ delayed()->nop();
+      __ stop("method needs synchronization");
+      __ bind(L);
+    }
+#endif
+  }
+
+  // after method_lock, the layout of frame is as following
+  //
+  // [ monitor entry            ] <--- sp
+  //   ...
+  // [ monitor entry            ]
+  // [ monitor block top        ] ( the top monitor entry )
+  // [ byte code pointer (0)    ] (if native, bcp = 0)
+  // [ constant pool cache      ]
+  // [ Method*                  ]
+  // [ locals offset            ]
+  // [ sender's sp              ]
+  // [ sender's fp              ]
+  // [ return address           ] <--- fp
+  // [ method holder mirror     ]
+  // [ result type info         ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- S7
+
+  // start execution
+#ifdef ASSERT
+  {
+    Label L;
+    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    __ beq(AT, SP, L);
+    __ delayed()->nop();
+    __ stop("broken stack frame setup in interpreter in asm");
+    __ bind(L);
+  }
+#endif
+
+  // jvmti/jvmpi support
+  __ notify_method_entry();
+
+  // work registers
+  const Register method = Rmethod;
+  //const Register thread = T2;
+  const Register t      = T8;
+
+  __ get_method(method);
+  __ verify_oop(method);
+  {
+    Label L, Lstatic;
+    __ ld(t,method,in_bytes(Method::const_offset()));
+    __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
+    // MIPS n64 ABI: caller does not reserve space for the register auguments.
+    // A0 and A1(if needed)
+    __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
+    __ andi(AT, AT, JVM_ACC_STATIC);
+    __ beq(AT, R0, Lstatic);
+    __ delayed()->nop();
+    __ daddiu(t, t, 1);
+    __ bind(Lstatic);
+    __ daddiu(t, t, -7);
+    __ blez(t, L);
+    __ delayed()->nop();
+    __ dsll(t, t, Address::times_8);
+    __ dsubu(SP, SP, t);
+    __ bind(L);
+  }
+  __ move(AT, -(StackAlignmentInBytes));
+  __ andr(SP, SP, AT);
+  __ move(AT, SP);
+  // [                          ] <--- sp
+  //   ...                        (size of parameters - 8 )
+  // [ monitor entry            ]
+  //   ...
+  // [ monitor entry            ]
+  // [ monitor block top        ] ( the top monitor entry )
+  // [ byte code pointer (0)    ] (if native, bcp = 0)
+  // [ constant pool cache      ]
+  // [ Method*                  ]
+  // [ locals offset            ]
+  // [ sender's sp              ]
+  // [ sender's fp              ]
+  // [ return address           ] <--- fp
+  // [ method holder mirror     ]
+  // [ result type info         ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- LVP
+
+  // get signature handler
+  {
+    Label L;
+    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
+    __ bne(T9, R0, L);
+    __ delayed()->nop();
+    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+               InterpreterRuntime::prepare_native_call), method);
+    __ get_method(method);
+    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
+    __ bind(L);
+  }
+
+  // call signature handler
+  // FIXME: when change codes in InterpreterRuntime, note this point
+  // from: begin of parameters
+  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
+  // to: current sp
+  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
+  // temp: T3
+  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
+
+  __ jalr(T9);
+  __ delayed()->nop();
+  __ get_method(method);
+
+  //
+  // if native function is static, and its second parameter has type length of double word,
+  // and first parameter has type length of word, we have to reserve one word
+  // for the first parameter, according to mips o32 abi.
+  // if native function is not static, and its third parameter has type length of double word,
+  // and second parameter has type length of word, we have to reserve one word for the second
+  // parameter.
+  //
+
+
+  // result handler is in V0
+  // set result handler
+  __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
+
+#define FIRSTPARA_SHIFT_COUNT 5
+#define SECONDPARA_SHIFT_COUNT 9
+#define THIRDPARA_SHIFT_COUNT 13
+#define PARA_MASK  0xf
+
+  // pass mirror handle if static call
+  {
+    Label L;
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+    __ lw(t, method, in_bytes(Method::access_flags_offset()));
+    __ andi(AT, t, JVM_ACC_STATIC);
+    __ beq(AT, R0, L);
+    __ delayed()->nop();
+
+    // get mirror
+    __ ld(t, method, in_bytes(Method:: const_offset()));
+    __ ld(t, t, in_bytes(ConstMethod::constants_offset())); //??
+    __ ld(t, t, ConstantPool::pool_holder_offset_in_bytes());
+    __ ld(t, t, mirror_offset);
+    // copy mirror into activation frame
+    //__ sw(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+    // pass handle to mirror
+    __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+    __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
+    __ move(A1, t);
+    __ bind(L);
+  }
+
+  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
+  // [                          ]                              |
+  //   ...                        size of parameters(or +1)    |
+  // [ monitor entry            ]                              |
+  //   ...                                                     |
+  // [ monitor entry            ]                              |
+  // [ monitor block top        ] ( the top monitor entry )    |
+  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
+  // [ constant pool cache      ]                              |
+  // [ Method*                  ]                              |
+  // [ locals offset            ]                              |
+  // [ sender's sp              ]                              |
+  // [ sender's fp              ]                              |
+  // [ return address           ] <--- fp                      |
+  // [ method holder mirror     ] <----------------------------|
+  // [ result type info         ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- S7
+
+  // get native function entry point
+  { Label L;
+    __ ld(T9, method, in_bytes(Method::native_function_offset()));
+    __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
+    __ bne(V1, T9, L);
+    __ delayed()->nop();
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
+    __ get_method(method);
+    __ verify_oop(method);
+    __ ld(T9, method, in_bytes(Method::native_function_offset()));
+    __ bind(L);
+  }
+
+  // pass JNIEnv
+  // native function in T9
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset()));
+  __ move(A0, t);
+  // [ jni environment          ] <--- sp
+  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
+  // [                          ]                              |
+  //   ...                        size of parameters           |
+  // [ monitor entry            ]                              |
+  //   ...                                                     |
+  // [ monitor entry            ]                              |
+  // [ monitor block top        ] ( the top monitor entry )    |
+  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
+  // [ constant pool cache      ]                              |
+  // [ Method*                  ]                              |
+  // [ locals offset            ]                              |
+  // [ sender's sp              ]                              |
+  // [ sender's fp              ]                              |
+  // [ return address           ] <--- fp                      |
+  // [ method holder mirror     ] <----------------------------|
+  // [ result type info         ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- S7
+
+  // set_last_Java_frame_before_call
+  __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset()));
+  // Change state to native (we save the return address in the thread, since it might not
+  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
+  // points into the right code segment. It does not have to be the correct return pc.
+  __ li(t, __ pc());
+  __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset()));
+  __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
+
+  // change thread state
+#ifdef ASSERT
+  {
+    Label L;
+    __ lw(t, thread, in_bytes(JavaThread::thread_state_offset()));
+    __ daddiu(t, t, (-1) * _thread_in_Java);
+    __ beq(t, R0, L);
+    __ delayed()->nop();
+    __ stop("Wrong thread state in native stub");
+    __ bind(L);
+  }
+#endif
+
+  __ move(t, _thread_in_native);
+  if(os::is_MP()) {
+    __ sync(); // store release
+  }
+  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
+
+  // call native method
+  __ jalr(T9);
+  __ delayed()->nop();
+  // result potentially in V0 or F0
+
+
+  // via _last_native_pc and not via _last_jave_sp
+  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
+  //  If the order changes or anything else is added to the stack the code in
+  // interpreter_frame_result will have to be changed.
+  //FIXME, should modify here
+  // save return value to keep the value from being destroyed by other calls
+  __ push(dtos);
+  __ push(ltos);
+
+  // change thread state
+  __ get_thread(thread);
+  __ move(t, _thread_in_native_trans);
+  if(os::is_MP()) {
+    __ sync(); // store release
+  }
+  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
+
+  if(os::is_MP()) {
+    if (UseMembar) {
+      // Force this write out before the read below
+      __ sync();
+    } else {
+      // Write serialization page so VM thread can do a pseudo remote membar.
+      // We use the current thread pointer to calculate a thread specific
+      // offset to write to within the page. This minimizes bus traffic
+      // due to cache line collision.
+      __ serialize_memory(thread, A0);
+    }
+  }
+
+  // check for safepoint operation in progress and/or pending suspend requests
+  { Label Continue;
+
+    // Don't use call_VM as it will see a possible pending exception and forward it
+    // and never return here preventing us from clearing _last_native_pc down below.
+    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
+    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
+    // by hand.
+    //
+    Label L;
+    __ li(AT, SafepointSynchronize::address_of_state());
+    __ lw(AT, AT, 0);
+    __ bne(AT, R0, L);
+    __ delayed()->nop();
+    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
+    __ beq(AT, R0, Continue);
+    __ delayed()->nop();
+    __ bind(L);
+    __ move(A0, thread);
+    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
+                             relocInfo::runtime_call_type);
+    __ delayed()->nop();
+
+#ifndef OPT_THREAD
+    __ get_thread(thread);
+#endif
+    //add for compressedoops
+    __ reinit_heapbase();
+    __ bind(Continue);
+  }
+
+  // change thread state
+  __ move(t, _thread_in_Java);
+  if(os::is_MP()) {
+    __ sync(); // store release
+  }
+  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
+  __ reset_last_Java_frame(thread, true);
+
+  // reset handle block
+  __ ld(t, thread, in_bytes(JavaThread::active_handles_offset()));
+  __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes());
+
+  // If result was an oop then unbox and save it in the frame
+  {
+    Label no_oop;
+    //FIXME, addiu only support 16-bit imeditate
+    __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
+    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
+    __ bne(AT, T0, no_oop);
+    __ delayed()->nop();
+    __ pop(ltos);
+    // Unbox oop result, e.g. JNIHandles::resolve value.
+    __ resolve_jobject(V0, thread, T9);
+    __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
+    // keep stack depth as expected by pushing oop which will eventually be discarded
+    __ push(ltos);
+    __ bind(no_oop);
+  }
+  {
+    Label no_reguard;
+    __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
+    __ move(AT,(int) JavaThread::stack_guard_yellow_disabled);
+    __ bne(t, AT, no_reguard);
+    __ delayed()->nop();
+    __ pushad();
+    __ move(S5_heapbase, SP);
+    __ move(AT, -StackAlignmentInBytes);
+    __ andr(SP, SP, AT);
+    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
+    __ delayed()->nop();
+    __ move(SP, S5_heapbase);
+    __ popad();
+    //add for compressedoops
+    __ reinit_heapbase();
+    __ bind(no_reguard);
+  }
+  // restore BCP to have legal interpreter frame,
+  // i.e., bci == 0 <=> BCP == code_base()
+  // Can't call_VM until bcp is within reasonable.
+  __ get_method(method);      // method is junk from thread_in_native to now.
+  __ verify_oop(method);
+  __ ld(BCP, method, in_bytes(Method::const_offset()));
+  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
+  // handle exceptions (exception handling will handle unlocking!)
+  {
+    Label L;
+    __ ld(t, thread, in_bytes(Thread::pending_exception_offset()));
+    __ beq(t, R0, L);
+    __ delayed()->nop();
+    // Note: At some point we may want to unify this with the code used in
+    // call_VM_base();
+    // i.e., we should use the StubRoutines::forward_exception code. For now this
+    // doesn't work here because the sp is not correctly set at this point.
+    __ MacroAssembler::call_VM(noreg,
+                               CAST_FROM_FN_PTR(address,
+                               InterpreterRuntime::throw_pending_exception));
+    __ should_not_reach_here();
+    __ bind(L);
+  }
+
+  // do unlocking if necessary
+  {
+    Label L;
+    __ lw(t, method, in_bytes(Method::access_flags_offset()));
+    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
+    __ beq(t, R0, L);
+    // the code below should be shared with interpreter macro assembler implementation
+    {
+      Label unlock;
+      // BasicObjectLock will be first in list,
+      // since this is a synchronized method. However, need
+      // to check that the object has not been unlocked by
+      // an explicit monitorexit bytecode.
+      __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
+      // address of first monitor
+
+      __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
+      __ bne(t, R0, unlock);
+      __ delayed()->nop();
+
+      // Entry already unlocked, need to throw exception
+      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
+      InterpreterRuntime::throw_illegal_monitor_state_exception));
+      __ should_not_reach_here();
+
+      __ bind(unlock);
+      __ unlock_object(c_rarg0);
+    }
+    __ bind(L);
+  }
+
+  // jvmti/jvmpi support
+  // Note: This must happen _after_ handling/throwing any exceptions since
+  //       the exception handler code notifies the runtime of method exits
+  //       too. If this happens before, method entry/exit notifications are
+  //       not properly paired (was bug - gri 11/22/99).
+  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
+
+  // restore potential result in V0,
+  // call result handler to restore potential result in ST0 & handle result
+
+  __ pop(ltos);
+  __ pop(dtos);
+
+  __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
+  __ jalr(t);
+  __ delayed()->nop();
+
+
+  // remove activation
+  __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
+  __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address
+  __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
+  __ jr(RA);
+  __ delayed()->nop();
+
+#ifndef CORE
+  if (inc_counter) {
+    // Handle overflow of counter and compile method
+    __ bind(invocation_counter_overflow);
+    generate_counter_overflow(&continue_after_compile);
+    // entry_point is the beginning of this
+    // function and checks again for compiled code
+  }
+#endif
+  return entry_point;
+}
+
+//
+// Generic interpreted method entry to (asm) interpreter
+//
+// Layout of frame just at the entry
+//
+//   [ argument word n-1  ] <--- sp
+//     ...
+//   [ argument word 0    ]
+// assume Method* in Rmethod before call this method.
+// prerequisites to the generated stub : the callee Method* in Rmethod
+// note you must save the caller bcp before call the generated stub
+//
+address InterpreterGenerator::generate_normal_entry(bool synchronized) {
+  // determine code generation flags
+  bool inc_counter  = UseCompiler || CountCompiledCalls;
+
+  // Rmethod: Method*
+  // Rsender: sender 's sp
+  address entry_point = __ pc();
+
+  const Address invocation_counter(Rmethod,
+      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
+
+  // get parameter size (always needed)
+  __ ld(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
+  __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
+
+  // Rmethod: Method*
+  // V0: size of parameters
+  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
+  // get size of locals in words to T2
+  __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
+  // T2 = no. of additional locals, locals include parameters
+  __ dsubu(T2, T2, V0);
+
+  // see if we've got enough room on the stack for locals plus overhead.
+  // Layout of frame at this point
+  //
+  // [ argument word n-1  ] <--- sp
+  //   ...
+  // [ argument word 0    ]
+  generate_stack_overflow_check();
+  // after this function, the layout of frame does not change
+
+  // compute beginning of parameters (LVP)
+  __ dsll(LVP, V0, LogBytesPerWord);
+  __ daddiu(LVP, LVP, (-1) * wordSize);
+  __ daddu(LVP, LVP, SP);
+
+  // T2 - # of additional locals
+  // allocate space for locals
+  // explicitly initialize locals
+  {
+    Label exit, loop;
+    __ beq(T2, R0, exit);
+    __ delayed()->nop();
+
+    __ bind(loop);
+    __ daddiu(SP, SP, (-1) * wordSize);
+    __ daddiu(T2, T2, -1);               // until everything initialized
+    __ bne(T2, R0, loop);
+    __ delayed()->sd(R0, SP, 0);     // initialize local variables
+
+    __ bind(exit);
+  }
+
+  //
+  // [ local var m-1      ] <--- sp
+  //   ...
+  // [ local var 0        ]
+  // [ argument word n-1  ] <--- T0?
+  //   ...
+  // [ argument word 0    ] <--- LVP
+
+  // initialize fixed part of activation frame
+
+  generate_fixed_frame(false);
+
+
+  // after this function, the layout of frame is as following
+  //
+  // [ monitor block top        ] <--- sp ( the top monitor entry )
+  // [ byte code pointer        ] (if native, bcp = 0)
+  // [ constant pool cache      ]
+  // [ Method*                  ]
+  // [ locals offset            ]
+  // [ sender's sp              ]
+  // [ sender's fp              ] <--- fp
+  // [ return address           ]
+  // [ local var m-1            ]
+  //   ...
+  // [ local var 0              ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- LVP
+
+
+  // make sure method is not native & not abstract
+#ifdef ASSERT
+  __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset()));
+  {
+    Label L;
+    __ andi(T2, AT, JVM_ACC_NATIVE);
+    __ beq(T2, R0, L);
+    __ delayed()->nop();
+    __ stop("tried to execute native method as non-native");
+    __ bind(L);
+  }
+  {
+    Label L;
+    __ andi(T2, AT, JVM_ACC_ABSTRACT);
+    __ beq(T2, R0, L);
+    __ delayed()->nop();
+    __ stop("tried to execute abstract method in interpreter");
+    __ bind(L);
+  }
+#endif
+
+  // Since at this point in the method invocation the exception handler
+  // would try to exit the monitor of synchronized methods which hasn't
+  // been entered yet, we set the thread local variable
+  // _do_not_unlock_if_synchronized to true. The remove_activation will
+  // check this flag.
+
+#ifndef OPT_THREAD
+  Register thread = T8;
+  __ get_thread(thread);
+#else
+  Register thread = TREG;
+#endif
+  __ move(AT, (int)true);
+  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+#ifndef CORE
+
+  // mdp : T8
+  // tmp1: T9
+  // tmp2: T2
+   __ profile_parameters_type(T8, T9, T2);
+
+  // increment invocation count & check for overflow
+  Label invocation_counter_overflow;
+  Label profile_method;
+  Label profile_method_continue;
+  if (inc_counter) {
+    generate_counter_incr(&invocation_counter_overflow,
+                          &profile_method,
+                          &profile_method_continue);
+    if (ProfileInterpreter) {
+      __ bind(profile_method_continue);
+    }
+  }
+
+  Label continue_after_compile;
+  __ bind(continue_after_compile);
+
+#endif // CORE
+
+  bang_stack_shadow_pages(false);
+
+  // reset the _do_not_unlock_if_synchronized flag
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+
+  // check for synchronized methods
+  // Must happen AFTER invocation_counter check and stack overflow check,
+  // so method is not locked if overflows.
+  //
+  if (synchronized) {
+    // Allocate monitor and lock method
+    lock_method();
+  } else {
+    // no synchronization necessary
+#ifdef ASSERT
+    { Label L;
+      __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
+      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
+      __ beq(T2, R0, L);
+      __ delayed()->nop();
+      __ stop("method needs synchronization");
+      __ bind(L);
+    }
+#endif
+  }
+
+  // layout of frame after lock_method
+  // [ monitor entry            ] <--- sp
+  //   ...
+  // [ monitor entry            ]
+  // [ monitor block top        ] ( the top monitor entry )
+  // [ byte code pointer        ] (if native, bcp = 0)
+  // [ constant pool cache      ]
+  // [ Method*                  ]
+  // [ locals offset            ]
+  // [ sender's sp              ]
+  // [ sender's fp              ]
+  // [ return address           ] <--- fp
+  // [ local var m-1            ]
+  //   ...
+  // [ local var 0              ]
+  // [ argumnet word n-1        ] <--- ( sender's sp )
+  //   ...
+  // [ argument word 0          ] <--- LVP
+
+
+  // start execution
+#ifdef ASSERT
+  {
+    Label L;
+    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    __ beq(AT, SP, L);
+    __ delayed()->nop();
+    __ stop("broken stack frame setup in interpreter in native");
+    __ bind(L);
+  }
+#endif
+
+  // jvmti/jvmpi support
+  __ notify_method_entry();
+
+  __ dispatch_next(vtos);
+
+  // invocation counter overflow
+  if (inc_counter) {
+    if (ProfileInterpreter) {
+      // We have decided to profile this method in the interpreter
+      __ bind(profile_method);
+      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                 InterpreterRuntime::profile_method));
+      __ set_method_data_pointer_for_bcp();
+      __ get_method(Rmethod);
+      __ b(profile_method_continue);
+      __ delayed()->nop();
+    }
+    // Handle overflow of counter and compile method
+    __ bind(invocation_counter_overflow);
+    generate_counter_overflow(&continue_after_compile);
+  }
+
+  return entry_point;
+}
+
+// Entry points
+//
+// Here we generate the various kind of entries into the interpreter.
+// The two main entry type are generic bytecode methods and native
+// call method.  These both come in synchronized and non-synchronized
+// versions but the frame layout they create is very similar. The
+// other method entry types are really just special purpose entries
+// that are really entry and interpretation all in one. These are for
+// trivial methods like accessor, empty, or special math methods.
+//
+// When control flow reaches any of the entry types for the interpreter
+// the following holds ->
+//
+// Arguments:
+//
+// Rmethod: Method*
+// V0: receiver
+//
+//
+// Stack layout immediately at entry
+//
+// [ parameter n-1            ] <--- sp
+//   ...
+// [ parameter 0              ]
+// [ expression stack         ] (caller's java expression stack)
+
+// Assuming that we don't go to one of the trivial specialized entries
+// the stack will look like below when we are ready to execute the
+// first bytecode (or call the native routine). The register usage
+// will be as the template based interpreter expects (see
+// interpreter_mips_64.hpp).
+//
+// local variables follow incoming parameters immediately; i.e.
+// the return address is moved to the end of the locals).
+//
+// [ monitor entry            ] <--- sp
+//   ...
+// [ monitor entry            ]
+// [ monitor block top        ] ( the top monitor entry )
+// [ byte code pointer        ] (if native, bcp = 0)
+// [ constant pool cache      ]
+// [ Method*                  ]
+// [ locals offset            ]
+// [ sender's sp              ]
+// [ sender's fp              ]
+// [ return address           ] <--- fp
+// [ local var m-1            ]
+//   ...
+// [ local var 0              ]
+// [ argumnet word n-1        ] <--- ( sender's sp )
+//   ...
+// [ argument word 0          ] <--- S7
+
+address AbstractInterpreterGenerator::generate_method_entry(
+                                        AbstractInterpreter::MethodKind kind) {
+  // determine code generation flags
+  bool synchronized = false;
+  address entry_point = NULL;
+  switch (kind) {
+    case Interpreter::zerolocals             :
+      break;
+    case Interpreter::zerolocals_synchronized:
+      synchronized = true;
+      break;
+    case Interpreter::native                 :
+      entry_point = ((InterpreterGenerator*)this)->generate_native_entry(false);
+      break;
+    case Interpreter::native_synchronized    :
+      entry_point = ((InterpreterGenerator*)this)->generate_native_entry(true);
+      break;
+    case Interpreter::empty                  :
+      entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();
+      break;
+    case Interpreter::accessor               :
+      entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();
+      break;
+    case Interpreter::abstract               :
+      entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();
+      break;
+
+    case Interpreter::java_lang_math_sin     : // fall thru
+    case Interpreter::java_lang_math_cos     : // fall thru
+    case Interpreter::java_lang_math_tan     : // fall thru
+    case Interpreter::java_lang_math_log     : // fall thru
+    case Interpreter::java_lang_math_log10   : // fall thru
+    case Interpreter::java_lang_math_pow     : // fall thru
+    case Interpreter::java_lang_math_exp     : break;
+    case Interpreter::java_lang_math_abs     : // fall thru
+    case Interpreter::java_lang_math_sqrt    :
+      entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);    break;
+    case Interpreter::java_lang_ref_reference_get:
+      entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+    default:
+      fatal(err_msg("unexpected method kind: %d", kind));
+      break;
+  }
+  if (entry_point) return entry_point;
+
+  return ((InterpreterGenerator*)this)->generate_normal_entry(synchronized);
+}
+
+// These should never be compiled since the interpreter will prefer
+// the compiled version to the intrinsic version.
+bool AbstractInterpreter::can_be_compiled(methodHandle m) {
+  switch (method_kind(m)) {
+    case Interpreter::java_lang_math_sin     : // fall thru
+    case Interpreter::java_lang_math_cos     : // fall thru
+    case Interpreter::java_lang_math_tan     : // fall thru
+    case Interpreter::java_lang_math_abs     : // fall thru
+    case Interpreter::java_lang_math_log     : // fall thru
+    case Interpreter::java_lang_math_log10   : // fall thru
+    case Interpreter::java_lang_math_sqrt    : // fall thru
+    case Interpreter::java_lang_math_pow     : // fall thru
+    case Interpreter::java_lang_math_exp     :
+      return false;
+    default:
+      return true;
+  }
+}
+
+// How much stack a method activation needs in words.
+int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
+
+  const int entry_size    = frame::interpreter_frame_monitor_size();
+
+  // total overhead size: entry_size + (saved fp thru expr stack bottom).
+  // be sure to change this if you add/subtract anything to/from the overhead area
+  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
+
+  const int stub_code = 6;  // see generate_call_stub
+  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
+  const int method_stack = (method->max_locals() + method->max_stack()) *
+          Interpreter::stackElementWords;
+  return overhead_size + method_stack + stub_code;
+}
+
+void AbstractInterpreter::layout_activation(Method* method,
+                                           int tempcount,
+                                           int popframe_extra_args,
+                                           int moncount,
+                                           int caller_actual_parameters,
+                                           int callee_param_count,
+                                           int callee_locals,
+                                           frame* caller,
+                                           frame* interpreter_frame,
+                                           bool is_top_frame,
+                                           bool is_bottom_frame) {
+  // Note: This calculation must exactly parallel the frame setup
+  // in AbstractInterpreterGenerator::generate_method_entry.
+  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
+  // The frame interpreter_frame, if not NULL, is guaranteed to be the
+  // right size, as determined by a previous call to this method.
+  // It is also guaranteed to be walkable even though it is in a skeletal state
+
+  // fixed size of an interpreter frame:
+
+  int max_locals = method->max_locals() * Interpreter::stackElementWords;
+  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
+
+#ifdef ASSERT
+  if (!EnableInvokeDynamic) {
+    // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences?
+    // Probably, since deoptimization doesn't work yet.
+    assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
+  }
+  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
+#endif
+
+    interpreter_frame->interpreter_frame_set_method(method);
+    // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
+    // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
+    // and sender_sp is fp+8
+    intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
+
+#ifdef ASSERT
+  if (caller->is_interpreted_frame()) {
+    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
+  }
+#endif
+
+  interpreter_frame->interpreter_frame_set_locals(locals);
+  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
+  BasicObjectLock* monbot = montop - moncount;
+  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
+
+  //set last sp;
+  intptr_t*  sp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
+                      popframe_extra_args;
+  interpreter_frame->interpreter_frame_set_last_sp(sp);
+  // All frames but the initial interpreter frame we fill in have a
+  // value for sender_sp that allows walking the stack but isn't
+  // truly correct. Correct the value here.
+  //
+    if (extra_locals != 0 &&
+        interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
+      interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
+    }
+    *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateInterpreterGenerator::generate_throw_exception() {
+  // Entry point in previous activation (i.e., if the caller was
+  // interpreted)
+  Interpreter::_rethrow_exception_entry = __ pc();
+  // Restore sp to interpreter_frame_last_sp even though we are going
+  // to empty the expression stack for the exception processing.
+  __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+  // V0: exception
+  // V1: return address/pc that threw exception
+  __ restore_bcp();                              // BCP points to call/send
+  __ restore_locals();
+
+  //add for compressedoops
+  __ reinit_heapbase();
+  // Entry point for exceptions thrown within interpreter code
+  Interpreter::_throw_exception_entry = __ pc();
+  // expression stack is undefined here
+  // V0: exception
+  // BCP: exception bcp
+  __ verify_oop(V0);
+
+  // expression stack must be empty before entering the VM in case of an exception
+  __ empty_expression_stack();
+  // find exception handler address and preserve exception oop
+  __ move(A1, V0);
+  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
+  // V0: exception handler entry point
+  // V1: preserved exception oop
+  // S0: bcp for exception handler
+  __ push(V1);                                 // push exception which is now the only value on the stack
+  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
+  __ delayed()->nop();
+
+  // If the exception is not handled in the current frame the frame is removed and
+  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
+  //
+  // Note: At this point the bci is still the bxi for the instruction which caused
+  //       the exception and the expression stack is empty. Thus, for any VM calls
+  //       at this point, GC will find a legal oop map (with empty expression stack).
+
+  // In current activation
+  // V0: exception
+  // BCP: exception bcp
+
+  //
+  // JVMTI PopFrame support
+  //
+
+  Interpreter::_remove_activation_preserving_args_entry = __ pc();
+  __ empty_expression_stack();
+  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
+  // currently handling popframe, so that call_VMs that may happen later do not trigger new
+  // popframe handling cycles.
+#ifndef OPT_THREAD
+  Register thread = T2;
+  __ get_thread(T2);
+#else
+  Register thread = TREG;
+#endif
+  __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
+  __ ori(T3, T3, JavaThread::popframe_processing_bit);
+  __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
+
+#ifndef CORE
+  {
+    // Check to see whether we are returning to a deoptimized frame.
+    // (The PopFrame call ensures that the caller of the popped frame is
+    // either interpreted or compiled and deoptimizes it if compiled.)
+    // In this case, we can't call dispatch_next() after the frame is
+    // popped, but instead must save the incoming arguments and restore
+    // them after deoptimization has occurred.
+    //
+    // Note that we don't compare the return PC against the
+    // deoptimization blob's unpack entry because of the presence of
+    // adapter frames in C2.
+    Label caller_not_deoptimized;
+    __ ld(A0, FP, frame::return_addr_offset * wordSize);
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
+    __ bne(V0, R0, caller_not_deoptimized);
+    __ delayed()->nop();
+
+    // Compute size of arguments for saving when returning to deoptimized caller
+    __ get_method(A1);
+    __ verify_oop(A1);
+    __ ld(A1, A1, in_bytes(Method::const_offset()));
+    __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
+    __ shl(A1, Interpreter::logStackElementSize);
+    __ restore_locals();
+    __ dsubu(A2, LVP, A1);
+    __ daddiu(A2, A2, wordSize);
+    // Save these arguments
+#ifndef OPT_THREAD
+    __ get_thread(A0);
+#else
+    __ move(A0, TREG);
+#endif
+    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
+
+    __ remove_activation(vtos, T9, false, false, false);
+
+    // Inform deoptimization that it is responsible for restoring these arguments
+#ifndef OPT_THREAD
+    __ get_thread(thread);
+#endif
+    __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit);
+    __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
+    // Continue in deoptimization handler
+    __ jr(T9);
+    __ delayed()->nop();
+
+    __ bind(caller_not_deoptimized);
+  }
+#endif /* !CORE */
+
+  __ remove_activation(vtos, T3,
+                       /* throw_monitor_exception */ false,
+                       /* install_monitor_exception */ false,
+                       /* notify_jvmdi */ false);
+
+  // Clear the popframe condition flag
+  // Finish with popframe handling
+  // A previous I2C followed by a deoptimization might have moved the
+  // outgoing arguments further up the stack. PopFrame expects the
+  // mutations to those outgoing arguments to be preserved and other
+  // constraints basically require this frame to look exactly as
+  // though it had previously invoked an interpreted activation with
+  // no space between the top of the expression stack (current
+  // last_sp) and the top of stack. Rather than force deopt to
+  // maintain this kind of invariant all the time we call a small
+  // fixup routine to move the mutated arguments onto the top of our
+  // expression stack if necessary.
+  __ move(T8, SP);
+  __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  // PC must point into interpreter here
+  __ set_last_Java_frame(thread, noreg, FP, __ pc());
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
+  __ reset_last_Java_frame(thread, true);
+  // Restore the last_sp and null it out
+  __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
+
+
+
+  __ move(AT, JavaThread::popframe_inactive);
+  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
+
+  // Finish with popframe handling
+  __ restore_bcp();
+  __ restore_locals();
+#ifndef CORE
+  // The method data pointer was incremented already during
+  // call profiling. We have to restore the mdp for the current bcp.
+  if (ProfileInterpreter) {
+    __ set_method_data_pointer_for_bcp();
+  }
+#endif // !CORE
+  // Clear the popframe condition flag
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ move(AT, JavaThread::popframe_inactive);
+  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
+
+#if INCLUDE_JVMTI
+  {
+    Label L_done;
+
+    __ lbu(AT, BCP, 0);
+    __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic);
+    __ bne(AT, R0, L_done);
+    __ delayed()->nop();
+
+    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
+    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
+
+    __ get_method(T9);
+    __ ld(T8, LVP, 0);
+    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP);
+
+    __ beq(T8, R0, L_done);
+    __ delayed()->nop();
+
+    __ sd(T8, SP, 0);
+    __ bind(L_done);
+  }
+#endif // INCLUDE_JVMTI
+
+  __ dispatch_next(vtos);
+  // end of PopFrame support
+
+  Interpreter::_remove_activation_entry = __ pc();
+
+  // preserve exception over this code sequence
+  __ pop(T0);
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset()));
+  // remove the activation (without doing throws on illegalMonitorExceptions)
+  __ remove_activation(vtos, T3, false, true, false);
+  // restore exception
+  __ get_vm_result(T0, thread);
+  __ verify_oop(T0);
+
+  // In between activations - previous activation type unknown yet
+  // compute continuation point - the continuation point expects
+  // the following registers set up:
+  //
+  // T0: exception
+  // T1: return address/pc that threw exception
+  // SP: expression stack of caller
+  // FP: fp of caller
+  __ push2(T0, T3);             // save exception and return address
+  __ move(A1, T3);
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
+  __ move(T9, V0);                             // save exception handler
+  __ pop2(V0, V1);                   // restore return address and exception
+
+  // Note that an "issuing PC" is actually the next PC after the call
+  __ jr(T9);                                   // jump to exception handler of caller
+  __ delayed()->nop();
+}
+
+
+//
+// JVMTI ForceEarlyReturn support
+//
+address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
+  address entry = __ pc();
+  __ restore_bcp();
+  __ restore_locals();
+  __ empty_expression_stack();
+  __ empty_FPU_stack();
+  __ load_earlyret_value(state);
+
+#ifndef OPT_THREAD
+  __ get_thread(TREG);
+#endif
+  __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
+  const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset()));
+  // Clear the earlyret state
+  __ move(AT, JvmtiThreadState::earlyret_inactive);
+  __ sw(AT, cond_addr);
+  __ sync();
+
+
+  __ remove_activation(state, T0,
+                         false, /* throw_monitor_exception */
+                         false, /* install_monitor_exception */
+                         true); /* notify_jvmdi */
+  __ sync();
+  __ jr(T0);
+  __ delayed()->nop();
+  return entry;
+} // end of ForceEarlyReturn support
+
+
+//-----------------------------------------------------------------------------
+// Helper for vtos entry point generation
+
+void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
+                                                         address& bep,
+                                                         address& cep,
+                                                         address& sep,
+                                                         address& aep,
+                                                         address& iep,
+                                                         address& lep,
+                                                         address& fep,
+                                                         address& dep,
+                                                         address& vep) {
+  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
+  Label L;
+  fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop();
+  dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop();
+  lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop();
+  aep  =__ pc(); __ push(atos); __ b(L); __ delayed()->nop();
+  bep = cep = sep =
+  iep = __ pc(); __ push(itos);
+  vep = __ pc();
+  __ bind(L);
+  generate_and_dispatch(t);
+}
+
+
+//-----------------------------------------------------------------------------
+// Generation of individual instructions
+
+// helpers for generate_and_dispatch
+
+
+InterpreterGenerator::InterpreterGenerator(StubQueue* code)
+  : TemplateInterpreterGenerator(code) {
+   generate_all(); // down here so it can be "virtual"
+}
+
+//-----------------------------------------------------------------------------
+
+// Non-product code
+#ifndef PRODUCT
+address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
+  address entry = __ pc();
+
+  // prepare expression stack
+  __ push(state);       // save tosca
+
+  // tos & tos2
+  // trace_bytecode need actually 4 args, the last two is tos&tos2
+  // this work fine for x86. but mips o32 call convention will store A2-A3
+  // to the stack position it think is the tos&tos2
+  // when the expression stack have no more than 2 data, error occur.
+  __ ld(A2, SP, 0);
+  __ ld(A3, SP, 1 * wordSize);
+
+  // pass arguments & call tracer
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), RA, A2, A3);
+  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
+
+  // restore expression stack
+  __ pop(state);        // restore tosca
+
+  // return
+  __ jr(RA);
+  __ delayed()->nop();
+
+  return entry;
+}
+
+void TemplateInterpreterGenerator::count_bytecode() {
+  __ li(T8, (long)&BytecodeCounter::_counter_value);
+  __ lw(AT, T8, 0);
+  __ daddiu(AT, AT, 1);
+  __ sw(AT, T8, 0);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
+  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
+  __ lw(AT, T8, 0);
+  __ daddiu(AT, AT, 1);
+  __ sw(AT, T8, 0);
+}
+
+void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
+  __ li(T8, (long)&BytecodePairHistogram::_index);
+  __ lw(T9, T8, 0);
+  __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes);
+  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
+  __ orr(T9, T9, T8);
+  __ li(T8, (long)&BytecodePairHistogram::_index);
+  __ sw(T9, T8, 0);
+  __ dsll(T9, T9, 2);
+  __ li(T8, (long)BytecodePairHistogram::_counters);
+  __ daddu(T8, T8, T9);
+  __ lw(AT, T8, 0);
+  __ daddiu(AT, AT, 1);
+  __ sw(AT, T8, 0);
+}
+
+
+void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
+  // Call a little run-time stub to avoid blow-up for each bytecode.
+  // The run-time runtime saves the right registers, depending on
+  // the tosca in-state for the given template.
+
+  address entry = Interpreter::trace_code(t->tos_in());
+  assert(entry != NULL, "entry must have been generated");
+  __ call(entry, relocInfo::none);
+  __ delayed()->nop();
+  //add for compressedoops
+  __ reinit_heapbase();
+}
+
+
+void TemplateInterpreterGenerator::stop_interpreter_at() {
+  Label L;
+  __ li(T8, long(&BytecodeCounter::_counter_value));
+  __ lw(T8, T8, 0);
+  __ move(AT, StopInterpreterAt);
+  __ bne(T8, AT, L);
+  __ delayed()->nop();
+  __ brk(5);
+  __ delayed()->nop();
+  __ bind(L);
+}
+#endif // !PRODUCT
+#endif // ! CC_INTERP
diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips.hpp b/hotspot/src/cpu/mips/vm/templateTable_mips.hpp
new file mode 100644
index 00000000000..d879e6dc924
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/templateTable_mips.hpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+  static void prepare_invoke(Register method, Register index, int byte_no,
+                             Bytecodes::Code code);
+  static void invokevirtual_helper(Register index, Register recv,
+                                   Register flags);
+  static void volatile_barrier();
+
+  // Helpers
+  static void index_check(Register array, Register index);
+  static void index_check_without_pop(Register array, Register index);
diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp b/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp
new file mode 100644
index 00000000000..7415511b99c
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/templateTable_mips_64.cpp
@@ -0,0 +1,4623 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/methodData.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "utilities/macros.hpp"
+
+
+#ifndef CC_INTERP
+
+#define __ _masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+// Platform-dependent initialization
+
+void TemplateTable::pd_initialize() {
+  // No mips specific initialization
+}
+
+// Address computation: local variables
+
+static inline Address iaddress(int n) {
+  return Address(LVP, Interpreter::local_offset_in_bytes(n));
+}
+
+static inline Address laddress(int n) {
+  return iaddress(n + 1);
+}
+
+static inline Address faddress(int n) {
+  return iaddress(n);
+}
+
+static inline Address daddress(int n) {
+  return laddress(n);
+}
+
+static inline Address aaddress(int n) {
+  return iaddress(n);
+}
+static inline Address haddress(int n)            { return iaddress(n + 0); }
+
+
+static inline Address at_sp()             {  return Address(SP,   0); }
+static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
+static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
+
+// At top of Java expression stack which may be different than sp().  It
+// isn't for category 1 objects.
+static inline Address at_tos   () {
+  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
+  return tos;
+}
+
+static inline Address at_tos_p1() {
+  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
+}
+
+static inline Address at_tos_p2() {
+  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
+}
+
+static inline Address at_tos_p3() {
+  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
+}
+
+// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
+Address TemplateTable::at_bcp(int offset) {
+  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
+  return Address(BCP, offset);
+}
+
+// Miscelaneous helper routines
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == V0, "parameter is just for looks");
+  switch (barrier) {
+#if INCLUDE_ALL_GCS
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != T3) {
+            __ move(T3, obj.base());
+          }
+        } else {
+          __ lea(T3, obj);
+        }
+        __ g1_write_barrier_pre(T3 /* obj */,
+                                T1 /* pre_val */,
+                                TREG /* thread */,
+                                T9  /* tmp */,
+                                val != noreg /* tosca_live */,
+                                false /* expand_call */);
+        if (val == noreg) {
+          __ store_heap_oop_null(Address(T3, 0));
+        } else {
+          // G1 barrier needs uncompressed oop for region cross check.
+          Register new_val = val;
+          if (UseCompressedOops) {
+            new_val = T1;
+            __ move(new_val, val);
+          }
+          __ store_heap_oop(Address(T3, 0), val);
+          __ g1_write_barrier_post(T3 /* store_adr */,
+                                   new_val /* new_val */,
+                                   TREG /* thread */,
+                                   T9 /* tmp */,
+                                   T1 /* tmp2 */);
+        }
+      }
+      break;
+#endif // INCLUDE_ALL_GCS
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ store_heap_oop_null(obj);
+        } else {
+          __ store_heap_oop(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ lea(T9, obj);
+            __ store_check(T9);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ store_heap_oop_null(obj);
+      } else {
+        __ store_heap_oop(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
+// bytecode folding
+void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
+                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
+                                   int byte_no) {
+  if (!RewriteBytecodes)  return;
+  Label L_patch_done;
+
+  switch (bc) {
+  case Bytecodes::_fast_aputfield:
+  case Bytecodes::_fast_bputfield:
+  case Bytecodes::_fast_zputfield:
+  case Bytecodes::_fast_cputfield:
+  case Bytecodes::_fast_dputfield:
+  case Bytecodes::_fast_fputfield:
+  case Bytecodes::_fast_iputfield:
+  case Bytecodes::_fast_lputfield:
+  case Bytecodes::_fast_sputfield:
+    {
+      // We skip bytecode quickening for putfield instructions when
+      // the put_code written to the constant pool cache is zero.
+      // This is required so that every execution of this instruction
+      // calls out to InterpreterRuntime::resolve_get_put to do
+      // additional, required work.
+      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
+      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
+      __ daddiu(bc_reg, R0, bc);
+      __ beq(tmp_reg, R0, L_patch_done);
+      __ delayed()->nop();
+    }
+    break;
+  default:
+    assert(byte_no == -1, "sanity");
+    // the pair bytecodes have already done the load.
+    if (load_bc_into_bc_reg) {
+      __ move(bc_reg, bc);
+    }
+  }
+
+  if (JvmtiExport::can_post_breakpoint()) {
+    Label L_fast_patch;
+    // if a breakpoint is present we can't rewrite the stream directly
+    __ lbu(tmp_reg, at_bcp(0));
+    __ move(AT, Bytecodes::_breakpoint);
+    __ bne(tmp_reg, AT, L_fast_patch);
+    __ delayed()->nop();
+
+    __ get_method(tmp_reg);
+    // Let breakpoint table handling rewrite to quicker bytecode
+    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
+
+    __ b(L_patch_done);
+    __ delayed()->nop();
+    __ bind(L_fast_patch);
+  }
+
+#ifdef ASSERT
+  Label L_okay;
+  __ lbu(tmp_reg, at_bcp(0));
+  __ move(AT, (int)Bytecodes::java_code(bc));
+  __ beq(tmp_reg, AT, L_okay);
+  __ delayed()->nop();
+  __ beq(tmp_reg, bc_reg, L_patch_done);
+  __ delayed()->nop();
+  __ stop("patching the wrong bytecode");
+  __ bind(L_okay);
+#endif
+
+  // patch bytecode
+  __ sb(bc_reg, at_bcp(0));
+  __ bind(L_patch_done);
+}
+
+
+// Individual instructions
+
+void TemplateTable::nop() {
+  transition(vtos, vtos);
+  // nothing to do
+}
+
+void TemplateTable::shouldnotreachhere() {
+  transition(vtos, vtos);
+  __ stop("shouldnotreachhere bytecode");
+}
+
+void TemplateTable::aconst_null() {
+  transition(vtos, atos);
+  __ move(FSR, R0);
+}
+
+void TemplateTable::iconst(int value) {
+  transition(vtos, itos);
+  if (value == 0) {
+    __ move(FSR, R0);
+  } else {
+    __ move(FSR, value);
+  }
+}
+
+void TemplateTable::lconst(int value) {
+  transition(vtos, ltos);
+  if (value == 0) {
+    __ move(FSR, R0);
+  } else {
+    __ move(FSR, value);
+  }
+}
+
+void TemplateTable::fconst(int value) {
+  transition(vtos, ftos);
+  switch( value ) {
+    case 0:  __ mtc1(R0, FSF);    return;
+    case 1:  __ addiu(AT, R0, 1); break;
+    case 2:  __ addiu(AT, R0, 2); break;
+    default: ShouldNotReachHere();
+  }
+  __ mtc1(AT, FSF);
+  __ cvt_s_w(FSF, FSF);
+}
+
+void TemplateTable::dconst(int value) {
+  transition(vtos, dtos);
+  switch( value ) {
+    case 0:  __ dmtc1(R0, FSF);
+             return;
+    case 1:  __ daddiu(AT, R0, 1);
+             __ dmtc1(AT, FSF);
+             __ cvt_d_w(FSF, FSF);
+             break;
+    default: ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::bipush() {
+  transition(vtos, itos);
+  __ lb(FSR, at_bcp(1));
+}
+
+void TemplateTable::sipush() {
+  transition(vtos, itos);
+  __ lb(FSR, BCP, 1);
+  __ lbu(AT, BCP, 2);
+  __ dsll(FSR, FSR, 8);
+  __ orr(FSR, FSR, AT);
+}
+
+// T1 : tags
+// T2 : index
+// T3 : cpool
+// T8 : tag
+void TemplateTable::ldc(bool wide) {
+  transition(vtos, vtos);
+  Label call_ldc, notFloat, notClass, Done;
+  // get index in cpool
+  if (wide) {
+    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
+  } else {
+    __ lbu(T2, at_bcp(1));
+  }
+
+  __ get_cpool_and_tags(T3, T1);
+
+  const int base_offset = ConstantPool::header_size() * wordSize;
+  const int tags_offset = Array<u1>::base_offset_in_bytes();
+
+  // get type
+  if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) {
+    __ gslbx(T1, T1, T2, tags_offset);
+  } else {
+    __ daddu(AT, T1, T2);
+    __ lb(T1, AT, tags_offset);
+  }
+  if(os::is_MP()) {
+    __ sync(); // load acquire
+  }
+  //now T1 is the tag
+
+  // unresolved class - get the resolved class
+  __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
+  __ beq(AT, R0, call_ldc);
+  __ delayed()->nop();
+
+  // unresolved class in error (resolution failed) - call into runtime
+  // so that the same error from first resolution attempt is thrown.
+  __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
+  __ beq(AT, R0, call_ldc);
+  __ delayed()->nop();
+
+  // resolved class - need to call vm to get java mirror of the class
+  __ daddiu(AT, T1, - JVM_CONSTANT_Class);
+  __ bne(AT, R0, notClass);
+  __ delayed()->dsll(T2, T2, Address::times_8);
+
+  __ bind(call_ldc);
+  __ move(A1, wide);
+  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
+  //__ push(atos);
+  __ daddiu(SP, SP, - Interpreter::stackElementSize);
+  __ b(Done);
+  __ delayed()->sd(FSR, SP, 0); // added for performance issue
+
+  __ bind(notClass);
+  __ daddiu(AT, T1, -JVM_CONSTANT_Float);
+  __ bne(AT, R0, notFloat);
+  __ delayed()->nop();
+  // ftos
+  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
+    __ gslwxc1(FSF, T3, T2, base_offset);
+  } else {
+    __ daddu(AT, T3, T2);
+    __ lwc1(FSF, AT, base_offset);
+  }
+  //__ push_f();
+  __ daddiu(SP, SP, - Interpreter::stackElementSize);
+  __ b(Done);
+  __ delayed()->swc1(FSF, SP, 0);
+
+  __ bind(notFloat);
+#ifdef ASSERT
+  {
+    Label L;
+    __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
+    __ beq(AT, R0, L);
+    __ delayed()->nop();
+    __ stop("unexpected tag type in ldc");
+    __ bind(L);
+  }
+#endif
+  // itos JVM_CONSTANT_Integer only
+  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
+    __ gslwx(FSR, T3, T2, base_offset);
+  } else {
+    __ daddu(T0, T3, T2);
+    __ lw(FSR, T0, base_offset);
+  }
+  __ push(itos);
+  __ bind(Done);
+}
+
+// Fast path for caching oop constants.
+void TemplateTable::fast_aldc(bool wide) {
+  transition(vtos, atos);
+
+  Register result = FSR;
+  Register tmp = SSR;
+  int index_size = wide ? sizeof(u2) : sizeof(u1);
+
+  Label resolved;
+
+  // We are resolved if the resolved reference cache entry contains a
+  // non-null object (String, MethodType, etc.)
+  assert_different_registers(result, tmp);
+  __ get_cache_index_at_bcp(tmp, 1, index_size);
+  __ load_resolved_reference_at_index(result, tmp);
+  __ bne(result, R0, resolved);
+  __ delayed()->nop();
+
+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
+  // first time invocation - must resolve first
+  int i = (int)bytecode();
+  __ move(tmp, i);
+  __ call_VM(result, entry, tmp);
+
+  __ bind(resolved);
+
+  if (VerifyOops) {
+    __ verify_oop(result);
+  }
+}
+
+
+// used register: T2, T3, T1
+// T2 : index
+// T3 : cpool
+// T1 : tag
+void TemplateTable::ldc2_w() {
+  transition(vtos, vtos);
+  Label Long, Done;
+
+  // get index in cpool
+  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
+
+  __ get_cpool_and_tags(T3, T1);
+
+  const int base_offset = ConstantPool::header_size() * wordSize;
+  const int tags_offset = Array<u1>::base_offset_in_bytes();
+
+  // get type in T1
+  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
+    __ gslbx(T1, T1, T2, tags_offset);
+  } else {
+    __ daddu(AT, T1, T2);
+    __ lb(T1, AT, tags_offset);
+  }
+
+  __ daddiu(AT, T1, - JVM_CONSTANT_Double);
+  __ bne(AT, R0, Long);
+  __ delayed()->dsll(T2, T2, Address::times_8);
+
+  // dtos
+  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
+    __ gsldxc1(FSF, T3, T2, base_offset);
+  } else {
+    __ daddu(AT, T3, T2);
+    __ ldc1(FSF, AT, base_offset);
+  }
+  __ push(dtos);
+  __ b(Done);
+  __ delayed()->nop();
+
+  // ltos
+  __ bind(Long);
+  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
+    __ gsldx(FSR, T3, T2, base_offset);
+  } else {
+    __ daddu(AT, T3, T2);
+    __ ld(FSR, AT, base_offset);
+  }
+  __ push(ltos);
+
+  __ bind(Done);
+}
+
+// we compute the actual local variable address here
+// the x86 dont do so for it has scaled index memory access model, we dont have, so do here
+void TemplateTable::locals_index(Register reg, int offset) {
+  __ lbu(reg, at_bcp(offset));
+  __ dsll(reg, reg, Address::times_8);
+  __ dsubu(reg, LVP, reg);
+}
+
+// this method will do bytecode folding of the two form:
+// iload iload      iload caload
+// used register : T2, T3
+// T2 : bytecode
+// T3 : folded code
+void TemplateTable::iload() {
+  transition(vtos, itos);
+  if (RewriteFrequentPairs) {
+    Label rewrite, done;
+    // get the next bytecode in T2
+    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
+    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
+    // last two iloads in a pair.  Comparing against fast_iload means that
+    // the next bytecode is neither an iload or a caload, and therefore
+    // an iload pair.
+    __ move(AT, Bytecodes::_iload);
+    __ beq(AT, T2, done);
+    __ delayed()->nop();
+
+    __ move(T3, Bytecodes::_fast_iload2);
+    __ move(AT, Bytecodes::_fast_iload);
+    __ beq(AT, T2, rewrite);
+    __ delayed()->nop();
+
+    // if _caload, rewrite to fast_icaload
+    __ move(T3, Bytecodes::_fast_icaload);
+    __ move(AT, Bytecodes::_caload);
+    __ beq(AT, T2, rewrite);
+    __ delayed()->nop();
+
+    // rewrite so iload doesn't check again.
+    __ move(T3, Bytecodes::_fast_iload);
+
+    // rewrite
+    // T3 : fast bytecode
+    __ bind(rewrite);
+    patch_bytecode(Bytecodes::_iload, T3, T2, false);
+    __ bind(done);
+  }
+
+  // Get the local value into tos
+  locals_index(T2);
+  __ lw(FSR, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::fast_iload2() {
+  transition(vtos, itos);
+  locals_index(T2);
+  __ lw(FSR, T2, 0);
+  __ push(itos);
+  locals_index(T2, 3);
+  __ lw(FSR, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::fast_iload() {
+  transition(vtos, itos);
+  locals_index(T2);
+  __ lw(FSR, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::lload() {
+  transition(vtos, ltos);
+  locals_index(T2);
+  __ ld(FSR, T2, -wordSize);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::fload() {
+  transition(vtos, ftos);
+  locals_index(T2);
+  __ lwc1(FSF, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::dload() {
+  transition(vtos, dtos);
+  locals_index(T2);
+  __ ldc1(FSF, T2, -wordSize);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::aload() {
+  transition(vtos, atos);
+  locals_index(T2);
+  __ ld(FSR, T2, 0);
+}
+
+void TemplateTable::locals_index_wide(Register reg) {
+  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
+  __ dsll(reg, reg, Address::times_8);
+  __ dsubu(reg, LVP, reg);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_iload() {
+  transition(vtos, itos);
+  locals_index_wide(T2);
+  __ ld(FSR, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_lload() {
+  transition(vtos, ltos);
+  locals_index_wide(T2);
+  __ ld(FSR, T2, -wordSize);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_fload() {
+  transition(vtos, ftos);
+  locals_index_wide(T2);
+  __ lwc1(FSF, T2, 0);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_dload() {
+  transition(vtos, dtos);
+  locals_index_wide(T2);
+  __ ldc1(FSF, T2, -wordSize);
+}
+
+// used register T2
+// T2 : index
+void TemplateTable::wide_aload() {
+  transition(vtos, atos);
+  locals_index_wide(T2);
+  __ ld(FSR, T2, 0);
+}
+
+// we use A2 as the regiser for index, BE CAREFUL!
+// we dont use our tge 29 now, for later optimization
+void TemplateTable::index_check(Register array, Register index) {
+  // Pop ptr into array
+  __ pop_ptr(array);
+  index_check_without_pop(array, index);
+}
+
+void TemplateTable::index_check_without_pop(Register array, Register index) {
+  // destroys A2
+  // check array
+  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
+
+  // sign extend since tos (index) might contain garbage in upper bits
+  __ sll(index, index, 0);
+
+  // check index
+  Label ok;
+  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
+#ifndef OPT_RANGECHECK
+  __ sltu(AT, index, AT);
+  __ bne(AT, R0, ok);
+  __ delayed()->nop();
+
+  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
+  if (A2 != index) __ move(A2, index);
+  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
+  __ delayed()->nop();
+  __ bind(ok);
+#else
+  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
+  __ move(A2, index);
+  __ tgeu(A2, AT, 29);
+#endif
+}
+
+void TemplateTable::iaload() {
+  transition(itos, itos);
+  if(UseBoundCheckInstruction) {
+    __ pop(SSR); //SSR:array    FSR： index
+    __ dsll(FSR, FSR, 2);
+    __ daddu(FSR, SSR, FSR);
+    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
+
+    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
+    __ dsll(AT, AT, 2);
+    __ daddu(AT, SSR, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
+
+    __ gslwle(FSR, FSR, AT);
+  } else {
+    index_check(SSR, FSR);
+    __ dsll(FSR, FSR, 2);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_INT), 8)) {
+      __ gslwx(FSR, FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));
+    } else {
+      __ daddu(FSR, SSR, FSR);
+      __ lw(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
+    }
+  }
+}
+
+void TemplateTable::laload() {
+  transition(itos, ltos);
+  if(UseBoundCheckInstruction) {
+    __ pop(SSR); //SSR:array    FSR： index
+    __ dsll(FSR, FSR, Address::times_8);
+    __ daddu(FSR, SSR, FSR);
+    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
+
+    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
+    __ dsll(AT, AT, Address::times_8);
+    __ daddu(AT, SSR, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
+
+    __ gsldle(FSR, FSR, AT);
+  } else {
+    index_check(SSR, FSR);
+    __ dsll(AT, FSR, Address::times_8);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_LONG), 8)) {
+      __ gsldx(FSR, SSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG));
+    } else {
+      __ daddu(AT, SSR, AT);
+      __ ld(FSR, AT, arrayOopDesc::base_offset_in_bytes(T_LONG));
+    }
+  }
+}
+
+void TemplateTable::faload() {
+  transition(itos, ftos);
+  if(UseBoundCheckInstruction) {
+    __ pop(SSR); //SSR:array    FSR： index
+    __ shl(FSR, 2);
+    __ daddu(FSR, SSR, FSR);
+    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
+
+    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
+    __ shl(AT, 2);
+    __ daddu(AT, SSR, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
+
+    __ gslwlec1(FSF, FSR, AT);
+  } else {
+    index_check(SSR, FSR);
+    __ shl(FSR, 2);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_FLOAT), 8)) {
+      __ gslwxc1(FSF, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
+    } else {
+      __ daddu(FSR, SSR, FSR);
+      __ lwc1(FSF, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
+    }
+  }
+}
+
+void TemplateTable::daload() {
+  transition(itos, dtos);
+  if(UseBoundCheckInstruction) {
+    __ pop(SSR); //SSR:array    FSR： index
+    __ dsll(FSR, FSR, 3);
+    __ daddu(FSR, SSR, FSR);
+    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
+
+    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
+    __ dsll(AT, AT, 3);
+    __ daddu(AT, SSR, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
+
+    __ gsldlec1(FSF, FSR, AT);
+  } else {
+    index_check(SSR, FSR);
+    __ dsll(AT, FSR, 3);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_DOUBLE), 8)) {
+      __ gsldxc1(FSF, SSR, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
+    } else {
+      __ daddu(AT, SSR, AT);
+      __ ldc1(FSF, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
+    }
+  }
+}
+
+void TemplateTable::aaload() {
+  transition(itos, atos);
+  index_check(SSR, FSR);
+  __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
+  __ daddu(FSR, SSR, FSR);
+  //add for compressedoops
+  __ load_heap_oop(FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+}
+
+void TemplateTable::baload() {
+  transition(itos, itos);
+  if(UseBoundCheckInstruction) {
+    __ pop(SSR); //SSR:array   FSR:index
+    __ daddu(FSR, SSR, FSR);
+    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
+
+    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());
+    __ daddu(AT, SSR, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
+
+    __ gslble(FSR, FSR, AT);
+  } else {
+    index_check(SSR, FSR);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_BYTE), 8)) {
+      __ gslbx(FSR, SSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+    } else {
+      __ daddu(FSR, SSR, FSR);
+      __ lb(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+    }
+  }
+}
+
+void TemplateTable::caload() {
+  transition(itos, itos);
+  index_check(SSR, FSR);
+  __ dsll(FSR, FSR, Address::times_2);
+  __ daddu(FSR, SSR, FSR);
+  __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
+}
+
+// iload followed by caload frequent pair
+// used register : T2
+// T2 : index
+void TemplateTable::fast_icaload() {
+  transition(vtos, itos);
+  // load index out of locals
+  locals_index(T2);
+  __ lw(FSR, T2, 0);
+  index_check(SSR, FSR);
+  __ dsll(FSR, FSR, 1);
+  __ daddu(FSR, SSR, FSR);
+  __ lhu(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_CHAR));
+}
+
+void TemplateTable::saload() {
+  transition(itos, itos);
+  if(UseBoundCheckInstruction) {
+    __ pop(SSR); //SSR:array    FSR： index
+    __ dsll(FSR, FSR, Address::times_2);
+    __ daddu(FSR, SSR, FSR);
+    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
+
+    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
+    __ dsll(AT, AT, Address::times_2);
+    __ daddu(AT, SSR, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
+
+    __ gslhle(FSR, FSR, AT);
+  } else {
+    index_check(SSR, FSR);
+    __ dsll(FSR, FSR, Address::times_2);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_SHORT), 8)) {
+      __ gslhx(FSR, SSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
+    } else {
+      __ daddu(FSR, SSR, FSR);
+      __ lh(FSR, FSR,  arrayOopDesc::base_offset_in_bytes(T_SHORT));
+    }
+  }
+}
+
+void TemplateTable::iload(int n) {
+  transition(vtos, itos);
+  __ lw(FSR, iaddress(n));
+}
+
+void TemplateTable::lload(int n) {
+  transition(vtos, ltos);
+  __ ld(FSR, laddress(n));
+}
+
+void TemplateTable::fload(int n) {
+  transition(vtos, ftos);
+  __ lwc1(FSF, faddress(n));
+}
+
+void TemplateTable::dload(int n) {
+  transition(vtos, dtos);
+  __ ldc1(FSF, laddress(n));
+}
+
+void TemplateTable::aload(int n) {
+  transition(vtos, atos);
+  __ ld(FSR, aaddress(n));
+}
+
+// used register : T2, T3
+// T2 : bytecode
+// T3 : folded code
+void TemplateTable::aload_0() {
+  transition(vtos, atos);
+  // According to bytecode histograms, the pairs:
+  //
+  // _aload_0, _fast_igetfield
+  // _aload_0, _fast_agetfield
+  // _aload_0, _fast_fgetfield
+  //
+  // occur frequently. If RewriteFrequentPairs is set, the (slow)
+  // _aload_0 bytecode checks if the next bytecode is either
+  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
+  // rewrites the current bytecode into a pair bytecode; otherwise it
+  // rewrites the current bytecode into _fast_aload_0 that doesn't do
+  // the pair check anymore.
+  //
+  // Note: If the next bytecode is _getfield, the rewrite must be
+  //       delayed, otherwise we may miss an opportunity for a pair.
+  //
+  // Also rewrite frequent pairs
+  //   aload_0, aload_1
+  //   aload_0, iload_1
+  // These bytecodes with a small amount of code are most profitable
+  // to rewrite
+  if (RewriteFrequentPairs) {
+    Label rewrite, done;
+    // get the next bytecode in T2
+    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
+
+    // do actual aload_0
+    aload(0);
+
+    // if _getfield then wait with rewrite
+    __ move(AT, Bytecodes::_getfield);
+    __ beq(AT, T2, done);
+    __ delayed()->nop();
+
+    // if _igetfield then reqrite to _fast_iaccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
+        Bytecodes::_aload_0,
+        "fix bytecode definition");
+    __ move(T3, Bytecodes::_fast_iaccess_0);
+    __ move(AT, Bytecodes::_fast_igetfield);
+    __ beq(AT, T2, rewrite);
+    __ delayed()->nop();
+
+    // if _agetfield then reqrite to _fast_aaccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
+        Bytecodes::_aload_0,
+        "fix bytecode definition");
+    __ move(T3, Bytecodes::_fast_aaccess_0);
+    __ move(AT, Bytecodes::_fast_agetfield);
+    __ beq(AT, T2, rewrite);
+    __ delayed()->nop();
+
+    // if _fgetfield then reqrite to _fast_faccess_0
+    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
+        Bytecodes::_aload_0,
+        "fix bytecode definition");
+    __ move(T3, Bytecodes::_fast_faccess_0);
+    __ move(AT, Bytecodes::_fast_fgetfield);
+    __ beq(AT, T2, rewrite);
+    __ delayed()->nop();
+
+    // else rewrite to _fast_aload0
+    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
+        Bytecodes::_aload_0,
+        "fix bytecode definition");
+    __ move(T3, Bytecodes::_fast_aload_0);
+
+    // rewrite
+    __ bind(rewrite);
+    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
+
+    __ bind(done);
+  } else {
+    aload(0);
+  }
+}
+
+void TemplateTable::istore() {
+  transition(itos, vtos);
+  locals_index(T2);
+  __ sw(FSR, T2, 0);
+}
+
+void TemplateTable::lstore() {
+  transition(ltos, vtos);
+  locals_index(T2);
+  __ sd(FSR, T2, -wordSize);
+}
+
+void TemplateTable::fstore() {
+  transition(ftos, vtos);
+  locals_index(T2);
+  __ swc1(FSF, T2, 0);
+}
+
+void TemplateTable::dstore() {
+  transition(dtos, vtos);
+  locals_index(T2);
+  __ sdc1(FSF, T2, -wordSize);
+}
+
+void TemplateTable::astore() {
+  transition(vtos, vtos);
+  __ pop_ptr(FSR);
+  locals_index(T2);
+  __ sd(FSR, T2, 0);
+}
+
+void TemplateTable::wide_istore() {
+  transition(vtos, vtos);
+  __ pop_i(FSR);
+  locals_index_wide(T2);
+  __ sd(FSR, T2, 0);
+}
+
+void TemplateTable::wide_lstore() {
+  transition(vtos, vtos);
+  __ pop_l(FSR);
+  locals_index_wide(T2);
+  __ sd(FSR, T2, -wordSize);
+}
+
+void TemplateTable::wide_fstore() {
+  wide_istore();
+}
+
+void TemplateTable::wide_dstore() {
+  wide_lstore();
+}
+
+void TemplateTable::wide_astore() {
+  transition(vtos, vtos);
+  __ pop_ptr(FSR);
+  locals_index_wide(T2);
+  __ sd(FSR, T2, 0);
+}
+
+// used register : T2
+void TemplateTable::iastore() {
+  transition(itos, vtos);
+  __ pop_i(SSR);   // T2: array  SSR: index
+  if(UseBoundCheckInstruction) {
+    __ pop_ptr(T2);
+    __ dsll(SSR, SSR, Address::times_4);
+    __ daddu(SSR, T2, SSR);
+    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
+
+    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
+    __ dsll(AT, AT, Address::times_4);
+    __ daddu(AT, T2, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
+
+    __ gsswle(FSR, SSR, AT);
+  } else {
+    index_check(T2, SSR);  // prefer index in SSR
+    __ dsll(SSR, SSR, Address::times_4);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_INT), 8)) {
+      __ gsswx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));
+    } else {
+      __ daddu(T2, T2, SSR);
+      __ sw(FSR, T2, arrayOopDesc::base_offset_in_bytes(T_INT));
+    }
+  }
+}
+
+
+
+// used register T2, T3
+void TemplateTable::lastore() {
+  transition(ltos, vtos);
+  __ pop_i (T2);
+  if(UseBoundCheckInstruction) {
+    __ pop_ptr(T3);
+    __ dsll(T2, T2, Address::times_8);
+    __ daddu(T2, T3, T2);
+    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
+
+    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
+    __ dsll(AT, AT, Address::times_8);
+    __ daddu(AT, T3, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
+
+    __ gssdle(FSR, T2, AT);
+  } else {
+    index_check(T3, T2);
+    __ dsll(T2, T2, Address::times_8);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_LONG), 8)) {
+      __ gssdx(FSR, T3, T2, arrayOopDesc::base_offset_in_bytes(T_LONG));
+    } else {
+      __ daddu(T3, T3, T2);
+      __ sd(FSR, T3, arrayOopDesc::base_offset_in_bytes(T_LONG));
+    }
+  }
+}
+
+// used register T2
+void TemplateTable::fastore() {
+  transition(ftos, vtos);
+  __ pop_i(SSR);
+  if(UseBoundCheckInstruction) {
+    __ pop_ptr(T2);
+    __ dsll(SSR, SSR, Address::times_4);
+    __ daddu(SSR, T2, SSR);
+    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
+
+    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
+    __ dsll(AT, AT, Address::times_4);
+    __ daddu(AT, T2, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
+
+    __ gsswlec1(FSF, SSR, AT);
+  } else {
+    index_check(T2, SSR);
+    __ dsll(SSR, SSR, Address::times_4);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_FLOAT), 8)) {
+      __ gsswxc1(FSF, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
+    } else {
+      __ daddu(T2, T2, SSR);
+      __ swc1(FSF, T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
+    }
+  }
+}
+
+// used register T2, T3
+void TemplateTable::dastore() {
+  transition(dtos, vtos);
+  __ pop_i (T2);
+  if(UseBoundCheckInstruction) {
+    __ pop_ptr(T3);
+    __ dsll(T2, T2, Address::times_8);
+    __ daddu(T2, T3, T2);
+    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
+
+    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
+    __ dsll(AT, AT, Address::times_8);
+    __ daddu(AT, T3, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
+
+    __ gssdlec1(FSF, T2, AT);
+  } else {
+    index_check(T3, T2);
+    __ dsll(T2, T2, Address::times_8);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_DOUBLE), 8)) {
+      __ gssdxc1(FSF, T3, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
+    } else {
+      __ daddu(T3, T3, T2);
+      __ sdc1(FSF, T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE));
+    }
+  }
+}
+
+// used register : T2, T3, T8
+// T2 : array
+// T3 : subklass
+// T8 : supklass
+void TemplateTable::aastore() {
+  Label is_null, ok_is_subtype, done;
+  transition(vtos, vtos);
+  // stack: ..., array, index, value
+  __ ld(FSR, at_tos());     // Value
+  __ lw(SSR, at_tos_p1());  // Index
+  __ ld(T2, at_tos_p2());  // Array
+
+  // index_check(T2, SSR);
+  index_check_without_pop(T2, SSR);
+  // do array store check - check for NULL value first
+  __ beq(FSR, R0, is_null);
+  __ delayed()->nop();
+
+  // Move subklass into T3
+  //add for compressedoops
+  __ load_klass(T3, FSR);
+  // Move superklass into T8
+  //add for compressedoops
+  __ load_klass(T8, T2);
+  __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
+  // Compress array+index*4+12 into a single register. T2
+  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
+  __ daddu(T2, T2, AT);
+  __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
+  // Generate subtype check.
+  // Superklass in T8.  Subklass in T3.
+  __ gen_subtype_check(T8, T3, ok_is_subtype);
+  // Come here on failure
+  // object is at FSR
+  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
+  __ delayed()->nop();
+  // Come here on success
+  __ bind(ok_is_subtype);
+  do_oop_store(_masm, Address(T2, 0), FSR, _bs->kind(), true);
+  __ b(done);
+  __ delayed()->nop();
+
+  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
+  __ bind(is_null);
+  __ profile_null_seen(T9);
+  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
+  __ daddu(T2, T2, AT);
+  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, _bs->kind(), true);
+
+  __ bind(done);
+  __ daddiu(SP, SP, 3 * Interpreter::stackElementSize);
+}
+
+void TemplateTable::bastore() {
+  transition(itos, vtos);
+  __ pop_i(SSR);
+  if(UseBoundCheckInstruction) {
+    guarantee(false, "unimplemented yet!");
+    __ pop_ptr(T2);
+    __ daddu(SSR, T2, SSR);
+    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
+
+    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
+    __ daddu(AT, T2, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
+
+    __ gssble(FSR, SSR, AT);
+  } else {
+    index_check(T2, SSR);
+
+    // Need to check whether array is boolean or byte
+    // since both types share the bastore bytecode.
+    __ load_klass(T9, T2);
+    __ lw(T9, T9, in_bytes(Klass::layout_helper_offset()));
+
+    int diffbit = Klass::layout_helper_boolean_diffbit();
+    __ move(AT, diffbit);
+
+    Label L_skip;
+    __ andr(AT, T9, AT);
+    __ beq(AT, R0, L_skip);
+    __ delayed()->nop();
+    __ andi(FSR, FSR, 0x1);
+    __ bind(L_skip);
+
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_BYTE), 8)) {
+      __ gssbx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+    } else {
+      __ daddu(SSR, T2, SSR);
+      __ sb(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+    }
+  }
+}
+
+void TemplateTable::castore() {
+  transition(itos, vtos);
+  __ pop_i(SSR);
+  if(UseBoundCheckInstruction) {
+    __ pop_ptr(T2);
+    __ dsll(SSR, SSR, Address::times_2);
+    __ daddu(SSR, T2, SSR);
+    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
+
+    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
+    __ dsll(AT, AT, Address::times_2);
+    __ daddu(AT, T2, AT);
+    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
+
+    __ gsshle(FSR, SSR, AT);
+  } else {
+    index_check(T2, SSR);
+    __ dsll(SSR, SSR, Address::times_2);
+    if (UseLEXT1 && Assembler::is_simm(arrayOopDesc::base_offset_in_bytes(T_CHAR), 8)) {
+      __ gsshx(FSR, T2, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
+    } else {
+      __ daddu(SSR, T2, SSR);
+      __ sh(FSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));
+    }
+  }
+}
+
+void TemplateTable::sastore() {
+  castore();
+}
+
+void TemplateTable::istore(int n) {
+  transition(itos, vtos);
+  __ sw(FSR, iaddress(n));
+}
+
+void TemplateTable::lstore(int n) {
+  transition(ltos, vtos);
+  __ sd(FSR, laddress(n));
+}
+
+void TemplateTable::fstore(int n) {
+  transition(ftos, vtos);
+  __ swc1(FSF, faddress(n));
+}
+
+void TemplateTable::dstore(int n) {
+  transition(dtos, vtos);
+  __ sdc1(FSF, laddress(n));
+}
+
+void TemplateTable::astore(int n) {
+  transition(vtos, vtos);
+  __ pop_ptr(FSR);
+  __ sd(FSR, aaddress(n));
+}
+
+void TemplateTable::pop() {
+  transition(vtos, vtos);
+  __ daddiu(SP, SP, Interpreter::stackElementSize);
+}
+
+void TemplateTable::pop2() {
+  transition(vtos, vtos);
+  __ daddiu(SP, SP, 2 * Interpreter::stackElementSize);
+}
+
+void TemplateTable::dup() {
+  transition(vtos, vtos);
+  // stack: ..., a
+  __ load_ptr(0, FSR);
+  __ push_ptr(FSR);
+  // stack: ..., a, a
+}
+
+// blows FSR
+void TemplateTable::dup_x1() {
+  transition(vtos, vtos);
+  // stack: ..., a, b
+  __ load_ptr(0, FSR);  // load b
+  __ load_ptr(1, A5);  // load a
+  __ store_ptr(1, FSR); // store b
+  __ store_ptr(0, A5); // store a
+  __ push_ptr(FSR);             // push b
+  // stack: ..., b, a, b
+}
+
+// blows FSR
+void TemplateTable::dup_x2() {
+  transition(vtos, vtos);
+  // stack: ..., a, b, c
+  __ load_ptr(0, FSR);  // load c
+  __ load_ptr(2, A5);  // load a
+  __ store_ptr(2, FSR); // store c in a
+  __ push_ptr(FSR);             // push c
+  // stack: ..., c, b, c, c
+  __ load_ptr(2, FSR);  // load b
+  __ store_ptr(2, A5); // store a in b
+  // stack: ..., c, a, c, c
+  __ store_ptr(1, FSR); // store b in c
+  // stack: ..., c, a, b, c
+}
+
+// blows FSR
+void TemplateTable::dup2() {
+  transition(vtos, vtos);
+  // stack: ..., a, b
+  __ load_ptr(1, FSR);  // load a
+  __ push_ptr(FSR);             // push a
+  __ load_ptr(1, FSR);  // load b
+  __ push_ptr(FSR);             // push b
+  // stack: ..., a, b, a, b
+}
+
+// blows FSR
+void TemplateTable::dup2_x1() {
+  transition(vtos, vtos);
+  // stack: ..., a, b, c
+  __ load_ptr(0, T2);  // load c
+  __ load_ptr(1, FSR);  // load b
+  __ push_ptr(FSR);             // push b
+  __ push_ptr(T2);             // push c
+  // stack: ..., a, b, c, b, c
+  __ store_ptr(3, T2); // store c in b
+  // stack: ..., a, c, c, b, c
+  __ load_ptr(4, T2);  // load a
+  __ store_ptr(2, T2); // store a in 2nd c
+  // stack: ..., a, c, a, b, c
+  __ store_ptr(4, FSR); // store b in a
+  // stack: ..., b, c, a, b, c
+
+  // stack: ..., b, c, a, b, c
+}
+
+// blows FSR, SSR
+void TemplateTable::dup2_x2() {
+  transition(vtos, vtos);
+  // stack: ..., a, b, c, d
+  // stack: ..., a, b, c, d
+  __ load_ptr(0, T2);  // load d
+  __ load_ptr(1, FSR);  // load c
+  __ push_ptr(FSR);             // push c
+  __ push_ptr(T2);             // push d
+  // stack: ..., a, b, c, d, c, d
+  __ load_ptr(4, FSR);  // load b
+  __ store_ptr(2, FSR); // store b in d
+  __ store_ptr(4, T2); // store d in b
+  // stack: ..., a, d, c, b, c, d
+  __ load_ptr(5, T2);  // load a
+  __ load_ptr(3, FSR);  // load c
+  __ store_ptr(3, T2); // store a in c
+  __ store_ptr(5, FSR); // store c in a
+  // stack: ..., c, d, a, b, c, d
+
+  // stack: ..., c, d, a, b, c, d
+}
+
+// blows FSR
+void TemplateTable::swap() {
+  transition(vtos, vtos);
+  // stack: ..., a, b
+
+  __ load_ptr(1, A5);  // load a
+  __ load_ptr(0, FSR);  // load b
+  __ store_ptr(0, A5); // store a in b
+  __ store_ptr(1, FSR); // store b in a
+
+  // stack: ..., b, a
+}
+
+void TemplateTable::iop2(Operation op) {
+  transition(itos, itos);
+
+  __ pop_i(SSR);
+  switch (op) {
+    case add  : __ addu32(FSR, SSR, FSR); break;
+    case sub  : __ subu32(FSR, SSR, FSR); break;
+    case mul  : __ mul(FSR, SSR, FSR);    break;
+    case _and : __ andr(FSR, SSR, FSR);   break;
+    case _or  : __ orr(FSR, SSR, FSR);    break;
+    case _xor : __ xorr(FSR, SSR, FSR);   break;
+    case shl  : __ sllv(FSR, SSR, FSR);   break;
+    case shr  : __ srav(FSR, SSR, FSR);   break;
+    case ushr : __ srlv(FSR, SSR, FSR);   break;
+    default   : ShouldNotReachHere();
+  }
+}
+
+// the result stored in FSR, SSR,
+// used registers : T2, T3
+void TemplateTable::lop2(Operation op) {
+  transition(ltos, ltos);
+  __ pop_l(T2);
+
+  switch (op) {
+    case add : __ daddu(FSR, T2, FSR); break;
+    case sub : __ dsubu(FSR, T2, FSR); break;
+    case _and: __ andr(FSR, T2, FSR);  break;
+    case _or : __ orr(FSR, T2, FSR);   break;
+    case _xor: __ xorr(FSR, T2, FSR);  break;
+    default : ShouldNotReachHere();
+  }
+}
+
+// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
+// the result is 0x80000000
+// the godson2 cpu do the same, so we need not handle this specially like x86
+void TemplateTable::idiv() {
+  transition(itos, itos);
+  Label not_zero;
+
+  __ bne(FSR, R0, not_zero);
+  __ delayed()->nop();
+  __ jmp(Interpreter::_throw_ArithmeticException_entry);
+  __ delayed()->nop();
+  __ bind(not_zero);
+
+  __ pop_i(SSR);
+  if (UseLEXT1) {
+    __ gsdiv(FSR, SSR, FSR);
+  } else {
+    __ div(SSR, FSR);
+    __ mflo(FSR);
+  }
+}
+
+void TemplateTable::irem() {
+  transition(itos, itos);
+  Label not_zero;
+  __ pop_i(SSR);
+  __ div(SSR, FSR);
+
+  __ bne(FSR, R0, not_zero);
+  __ delayed()->nop();
+  //__ brk(7);
+  __ jmp(Interpreter::_throw_ArithmeticException_entry);
+  __ delayed()->nop();
+
+  __ bind(not_zero);
+  __ mfhi(FSR);
+}
+
+void TemplateTable::lmul() {
+  transition(ltos, ltos);
+  __ pop_l(T2);
+  if (UseLEXT1) {
+    __ gsdmult(FSR, T2, FSR);
+  } else {
+    __ dmult(T2, FSR);
+    __ mflo(FSR);
+  }
+}
+
+// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
+void TemplateTable::ldiv() {
+  transition(ltos, ltos);
+  Label normal;
+
+  __ bne(FSR, R0, normal);
+  __ delayed()->nop();
+
+  //__ brk(7);    //generate FPE
+  __ jmp(Interpreter::_throw_ArithmeticException_entry);
+  __ delayed()->nop();
+
+  __ bind(normal);
+  __ pop_l(A2);
+  if (UseLEXT1) {
+    __ gsddiv(FSR, A2, FSR);
+  } else {
+    __ ddiv(A2, FSR);
+    __ mflo(FSR);
+  }
+}
+
+// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
+void TemplateTable::lrem() {
+  transition(ltos, ltos);
+  Label normal;
+
+  __ bne(FSR, R0, normal);
+  __ delayed()->nop();
+
+  __ jmp(Interpreter::_throw_ArithmeticException_entry);
+  __ delayed()->nop();
+
+  __ bind(normal);
+  __ pop_l (A2);
+
+  if (UseLEXT1) {
+    __ gsdmod(FSR, A2, FSR);
+  } else {
+    __ ddiv(A2, FSR);
+    __ mfhi(FSR);
+  }
+}
+
+// result in FSR
+// used registers : T0
+void TemplateTable::lshl() {
+  transition(itos, ltos);
+  __ pop_l(T0);
+  __ dsllv(FSR, T0, FSR);
+}
+
+// used registers : T0
+void TemplateTable::lshr() {
+  transition(itos, ltos);
+  __ pop_l(T0);
+  __ dsrav(FSR, T0, FSR);
+}
+
+// used registers : T0
+void TemplateTable::lushr() {
+  transition(itos, ltos);
+  __ pop_l(T0);
+  __ dsrlv(FSR, T0, FSR);
+}
+
+// result in FSF
+void TemplateTable::fop2(Operation op) {
+  transition(ftos, ftos);
+  switch (op) {
+    case add:
+      __ lwc1(FTF, at_sp());
+      __ add_s(FSF, FTF, FSF);
+      break;
+    case sub:
+      __ lwc1(FTF, at_sp());
+      __ sub_s(FSF, FTF, FSF);
+      break;
+    case mul:
+      __ lwc1(FTF, at_sp());
+      __ mul_s(FSF, FTF, FSF);
+      break;
+    case div:
+      __ lwc1(FTF, at_sp());
+      __ div_s(FSF, FTF, FSF);
+      break;
+    case rem:
+      __ mov_s(F13, FSF);
+      __ lwc1(F12, at_sp());
+       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
+      break;
+    default : ShouldNotReachHere();
+  }
+
+  __ daddiu(SP, SP, 1 * wordSize);
+}
+
+// result in SSF||FSF
+// i dont handle the strict flags
+void TemplateTable::dop2(Operation op) {
+  transition(dtos, dtos);
+  switch (op) {
+    case add:
+      __ ldc1(FTF, at_sp());
+      __ add_d(FSF, FTF, FSF);
+      break;
+    case sub:
+      __ ldc1(FTF, at_sp());
+      __ sub_d(FSF, FTF, FSF);
+      break;
+    case mul:
+      __ ldc1(FTF, at_sp());
+      __ mul_d(FSF, FTF, FSF);
+      break;
+    case div:
+      __ ldc1(FTF, at_sp());
+      __ div_d(FSF, FTF, FSF);
+      break;
+    case rem:
+      __ mov_d(F13, FSF);
+      __ ldc1(F12, at_sp());
+      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
+      break;
+    default : ShouldNotReachHere();
+  }
+
+  __ daddiu(SP, SP, 2 * wordSize);
+}
+
+void TemplateTable::ineg() {
+  transition(itos, itos);
+  __ subu32(FSR, R0, FSR);
+}
+
+void TemplateTable::lneg() {
+  transition(ltos, ltos);
+  __ dsubu(FSR, R0, FSR);
+}
+
+void TemplateTable::fneg() {
+  transition(ftos, ftos);
+  __ neg_s(FSF, FSF);
+}
+
+void TemplateTable::dneg() {
+  transition(dtos, dtos);
+  __ neg_d(FSF, FSF);
+}
+
+// used registers : T2
+void TemplateTable::iinc() {
+  transition(vtos, vtos);
+  locals_index(T2);
+  __ lw(FSR, T2, 0);
+  __ lb(AT, at_bcp(2));           // get constant
+  __ daddu(FSR, FSR, AT);
+  __ sw(FSR, T2, 0);
+}
+
+// used register : T2
+void TemplateTable::wide_iinc() {
+  transition(vtos, vtos);
+  locals_index_wide(T2);
+  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
+  __ hswap(FSR);
+  __ lw(AT, T2, 0);
+  __ daddu(FSR, AT, FSR);
+  __ sw(FSR, T2, 0);
+}
+
+void TemplateTable::convert() {
+  // Checking
+#ifdef ASSERT
+  {
+    TosState tos_in  = ilgl;
+    TosState tos_out = ilgl;
+    switch (bytecode()) {
+      case Bytecodes::_i2l: // fall through
+      case Bytecodes::_i2f: // fall through
+      case Bytecodes::_i2d: // fall through
+      case Bytecodes::_i2b: // fall through
+      case Bytecodes::_i2c: // fall through
+      case Bytecodes::_i2s: tos_in = itos; break;
+      case Bytecodes::_l2i: // fall through
+      case Bytecodes::_l2f: // fall through
+      case Bytecodes::_l2d: tos_in = ltos; break;
+      case Bytecodes::_f2i: // fall through
+      case Bytecodes::_f2l: // fall through
+      case Bytecodes::_f2d: tos_in = ftos; break;
+      case Bytecodes::_d2i: // fall through
+      case Bytecodes::_d2l: // fall through
+      case Bytecodes::_d2f: tos_in = dtos; break;
+      default             : ShouldNotReachHere();
+    }
+    switch (bytecode()) {
+      case Bytecodes::_l2i: // fall through
+      case Bytecodes::_f2i: // fall through
+      case Bytecodes::_d2i: // fall through
+      case Bytecodes::_i2b: // fall through
+      case Bytecodes::_i2c: // fall through
+      case Bytecodes::_i2s: tos_out = itos; break;
+      case Bytecodes::_i2l: // fall through
+      case Bytecodes::_f2l: // fall through
+      case Bytecodes::_d2l: tos_out = ltos; break;
+      case Bytecodes::_i2f: // fall through
+      case Bytecodes::_l2f: // fall through
+      case Bytecodes::_d2f: tos_out = ftos; break;
+      case Bytecodes::_i2d: // fall through
+      case Bytecodes::_l2d: // fall through
+      case Bytecodes::_f2d: tos_out = dtos; break;
+      default             : ShouldNotReachHere();
+    }
+    transition(tos_in, tos_out);
+  }
+#endif // ASSERT
+
+  // Conversion
+  switch (bytecode()) {
+    case Bytecodes::_i2l:
+      __ sll(FSR, FSR, 0);
+      break;
+    case Bytecodes::_i2f:
+      __ mtc1(FSR, FSF);
+      __ cvt_s_w(FSF, FSF);
+      break;
+    case Bytecodes::_i2d:
+      __ mtc1(FSR, FSF);
+      __ cvt_d_w(FSF, FSF);
+      break;
+    case Bytecodes::_i2b:
+      __ seb(FSR, FSR);
+      break;
+    case Bytecodes::_i2c:
+      __ andi(FSR, FSR, 0xFFFF);  // truncate upper 56 bits
+      break;
+    case Bytecodes::_i2s:
+      __ seh(FSR, FSR);
+      break;
+    case Bytecodes::_l2i:
+      __ sll(FSR, FSR, 0);
+      break;
+    case Bytecodes::_l2f:
+      __ dmtc1(FSR, FSF);
+      __ cvt_s_l(FSF, FSF);
+      break;
+    case Bytecodes::_l2d:
+      __ dmtc1(FSR, FSF);
+      __ cvt_d_l(FSF, FSF);
+      break;
+    case Bytecodes::_f2i:
+    {
+      Label L;
+
+      __ trunc_w_s(F12, FSF);
+      __ move(AT, 0x7fffffff);
+      __ mfc1(FSR, F12);
+      __ c_un_s(FSF, FSF);    //NaN?
+      __ movt(FSR, R0);
+
+      __ bne(AT, FSR, L);
+      __ delayed()->lui(T9, 0x8000);
+
+      __ mfc1(AT, FSF);
+      __ andr(AT, AT, T9);
+
+      __ movn(FSR, T9, AT);
+
+      __ bind(L);
+    }
+      break;
+    case Bytecodes::_f2l:
+    {
+      Label L;
+
+      __ trunc_l_s(F12, FSF);
+      __ daddiu(AT, R0, -1);
+      __ dsrl(AT, AT, 1);
+      __ dmfc1(FSR, F12);
+      __ c_un_s(FSF, FSF);    //NaN?
+      __ movt(FSR, R0);
+
+      __ bne(AT, FSR, L);
+      __ delayed()->lui(T9, 0x8000);
+
+      __ mfc1(AT, FSF);
+      __ andr(AT, AT, T9);
+
+      __ dsll32(T9, T9, 0);
+      __ movn(FSR, T9, AT);
+
+      __ bind(L);
+    }
+      break;
+    case Bytecodes::_f2d:
+      __ cvt_d_s(FSF, FSF);
+      break;
+    case Bytecodes::_d2i:
+    {
+      Label L;
+
+      __ trunc_w_d(F12, FSF);
+      __ move(AT, 0x7fffffff);
+      __ mfc1(FSR, F12);
+
+      __ bne(FSR, AT, L);
+      __ delayed()->mtc1(R0, F12);
+
+      __ cvt_d_w(F12, F12);
+      __ c_ult_d(FSF, F12);
+      __ bc1f(L);
+      __ delayed()->addiu(T9, R0, -1);
+
+      __ c_un_d(FSF, FSF);    //NaN?
+      __ subu32(FSR, T9, AT);
+      __ movt(FSR, R0);
+
+      __ bind(L);
+    }
+      break;
+    case Bytecodes::_d2l:
+    {
+      Label L;
+
+      __ trunc_l_d(F12, FSF);
+      __ daddiu(AT, R0, -1);
+      __ dsrl(AT, AT, 1);
+      __ dmfc1(FSR, F12);
+
+      __ bne(FSR, AT, L);
+      __ delayed()->mtc1(R0, F12);
+
+      __ cvt_d_w(F12, F12);
+      __ c_ult_d(FSF, F12);
+      __ bc1f(L);
+      __ delayed()->daddiu(T9, R0, -1);
+
+      __ c_un_d(FSF, FSF);    //NaN?
+      __ subu(FSR, T9, AT);
+      __ movt(FSR, R0);
+
+    __ bind(L);
+    }
+      break;
+    case Bytecodes::_d2f:
+      __ cvt_s_d(FSF, FSF);
+      break;
+    default             :
+      ShouldNotReachHere();
+  }
+}
+
+void TemplateTable::lcmp() {
+  transition(ltos, itos);
+
+  Label low, high, done;
+  __ pop(T0);
+  __ pop(R0);
+  __ slt(AT, T0, FSR);
+  __ bne(AT, R0, low);
+  __ delayed()->nop();
+
+  __ bne(T0, FSR, high);
+  __ delayed()->nop();
+
+  __ li(FSR, (long)0);
+  __ b(done);
+  __ delayed()->nop();
+
+  __ bind(low);
+  __ li(FSR, (long)-1);
+  __ b(done);
+  __ delayed()->nop();
+
+  __ bind(high);
+  __ li(FSR, (long)1);
+  __ b(done);
+  __ delayed()->nop();
+
+  __ bind(done);
+}
+
+void TemplateTable::float_cmp(bool is_float, int unordered_result) {
+  Label less, done;
+
+  __ move(FSR, R0);
+
+  if (is_float) {
+    __ lwc1(FTF, at_sp());
+    __ c_eq_s(FTF, FSF);
+    __ bc1t(done);
+    __ delayed()->daddiu(SP, SP, 1 * wordSize);
+
+    if (unordered_result<0)
+      __ c_ult_s(FTF, FSF);
+    else
+      __ c_olt_s(FTF, FSF);
+  } else {
+    __ ldc1(FTF, at_sp());
+    __ c_eq_d(FTF, FSF);
+    __ bc1t(done);
+    __ delayed()->daddiu(SP, SP, 2 * wordSize);
+
+    if (unordered_result<0)
+      __ c_ult_d(FTF, FSF);
+    else
+      __ c_olt_d(FTF, FSF);
+  }
+  __ bc1t(less);
+  __ delayed()->nop();
+  __ move(FSR, 1);
+  __ b(done);
+  __ delayed()->nop();
+  __ bind(less);
+  __ move(FSR, -1);
+  __ bind(done);
+}
+
+
+// used registers : T3, A7, Rnext
+// FSR : return bci, this is defined by the vm specification
+// T2 : MDO taken count
+// T3 : method
+// A7 : offset
+// Rnext : next bytecode, this is required by dispatch_base
+void TemplateTable::branch(bool is_jsr, bool is_wide) {
+  __ get_method(T3);
+  __ profile_taken_branch(A7, T2);    // only C2 meaningful
+
+  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
+                             InvocationCounter::counter_offset();
+  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
+                              InvocationCounter::counter_offset();
+
+  // Load up T4 with the branch displacement
+  if (!is_wide) {
+    __ lb(A7, BCP, 1);
+    __ lbu(AT, BCP, 2);
+    __ dsll(A7, A7, 8);
+    __ orr(A7, A7, AT);
+  } else {
+    __ get_4_byte_integer_at_bcp(A7, AT, 1);
+    __ swap(A7);
+  }
+
+  // Handle all the JSR stuff here, then exit.
+  // It's much shorter and cleaner than intermingling with the non-JSR
+  // normal-branch stuff occuring below.
+  if (is_jsr) {
+    // Pre-load the next target bytecode into Rnext
+    __ daddu(AT, BCP, A7);
+    __ lbu(Rnext, AT, 0);
+
+    // compute return address as bci in FSR
+    __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
+    __ ld(AT, T3, in_bytes(Method::const_offset()));
+    __ dsubu(FSR, FSR, AT);
+    // Adjust the bcp in BCP by the displacement in A7
+    __ daddu(BCP, BCP, A7);
+    // jsr returns atos that is not an oop
+    // Push return address
+    __ push_i(FSR);
+    // jsr returns vtos
+    __ dispatch_only_noverify(vtos);
+
+    return;
+  }
+
+  // Normal (non-jsr) branch handling
+
+  // Adjust the bcp in S0 by the displacement in T4
+  __ daddu(BCP, BCP, A7);
+
+  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
+  Label backedge_counter_overflow;
+  Label profile_method;
+  Label dispatch;
+  if (UseLoopCounter) {
+    // increment backedge counter for backward branches
+    // T3: method
+    // T4: target offset
+    // BCP: target bcp
+    // LVP: locals pointer
+    __ bgtz(A7, dispatch);  // check if forward or backward branch
+    __ delayed()->nop();
+
+    // check if MethodCounters exists
+    Label has_counters;
+    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
+    __ bne(AT, R0, has_counters);
+    __ delayed()->nop();
+    __ push(T3);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
+               T3);
+    __ pop(T3);
+    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
+    __ beq(AT, R0, dispatch);
+    __ delayed()->nop();
+    __ bind(has_counters);
+
+    if (TieredCompilation) {
+      Label no_mdo;
+      int increment = InvocationCounter::count_increment;
+      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
+      if (ProfileInterpreter) {
+        // Are we profiling?
+        __ ld(T0, Address(T3, in_bytes(Method::method_data_offset())));
+        __ beq(T0, R0, no_mdo);
+        __ delayed()->nop();
+        // Increment the MDO backedge counter
+        const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
+                                           in_bytes(InvocationCounter::counter_offset()));
+        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
+                                   T1, false, Assembler::zero, &backedge_counter_overflow);
+        __ beq(R0, R0, dispatch);
+        __ delayed()->nop();
+      }
+      __ bind(no_mdo);
+      // Increment backedge counter in MethodCounters*
+      __ ld(T0, Address(T3, Method::method_counters_offset()));
+      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
+                                 T1, false, Assembler::zero, &backedge_counter_overflow);
+      if (!UseOnStackReplacement) {
+        __ bind(backedge_counter_overflow);
+      }
+    } else {
+      // increment back edge counter
+      __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
+      __ lw(T0, T1, in_bytes(be_offset));
+      __ increment(T0, InvocationCounter::count_increment);
+      __ sw(T0, T1, in_bytes(be_offset));
+
+      // load invocation counter
+      __ lw(T1, T1, in_bytes(inv_offset));
+      // buffer bit added, mask no needed
+
+      // daddu backedge counter & invocation counter
+      __ daddu(T1, T1, T0);
+
+      if (ProfileInterpreter) {
+        // Test to see if we should create a method data oop
+        // T1 : backedge counter & invocation counter
+        if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
+          __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
+        } else {
+          __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
+          __ lw(AT, AT, 0);
+          __ slt(AT, T1, AT);
+        }
+
+        __ bne(AT, R0, dispatch);
+        __ delayed()->nop();
+
+        // if no method data exists, go to profile method
+        __ test_method_data_pointer(T1, profile_method);
+
+        if (UseOnStackReplacement) {
+          if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) {
+            __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
+          } else {
+            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
+            __ lw(AT, AT, 0);
+            __ slt(AT, T2, AT);
+          }
+
+          __ bne(AT, R0, dispatch);
+          __ delayed()->nop();
+
+          // When ProfileInterpreter is on, the backedge_count comes
+          // from the methodDataOop, which value does not get reset on
+          // the call to  frequency_counter_overflow().
+          // To avoid excessive calls to the overflow routine while
+          // the method is being compiled, daddu a second test to make
+          // sure the overflow function is called only once every
+          // overflow_frequency.
+          const int overflow_frequency = 1024;
+          __ andi(AT, T2, overflow_frequency-1);
+          __ beq(AT, R0, backedge_counter_overflow);
+          __ delayed()->nop();
+        }
+      } else {
+        if (UseOnStackReplacement) {
+          // check for overflow against AT, which is the sum of the counters
+          __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
+          __ lw(AT, AT, 0);
+          __ slt(AT, T1, AT);
+          __ beq(AT, R0, backedge_counter_overflow);
+          __ delayed()->nop();
+        }
+      }
+    }
+    __ bind(dispatch);
+  }
+
+  // Pre-load the next target bytecode into Rnext
+  __ lbu(Rnext, BCP, 0);
+
+  // continue with the bytecode @ target
+  // FSR: return bci for jsr's, unused otherwise
+  // Rnext: target bytecode
+  // BCP: target bcp
+  __ dispatch_only(vtos);
+
+  if (UseLoopCounter) {
+    if (ProfileInterpreter) {
+      // Out-of-line code to allocate method data oop.
+      __ bind(profile_method);
+      __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
+      __ lbu(Rnext, BCP, 0);
+      __ set_method_data_pointer_for_bcp();
+      __ b(dispatch);
+      __ delayed()->nop();
+    }
+
+    if (UseOnStackReplacement) {
+      // invocation counter overflow
+      __ bind(backedge_counter_overflow);
+      __ subu(A7, BCP, A7);  // branch bcp
+      call_VM(NOREG, CAST_FROM_FN_PTR(address,
+      InterpreterRuntime::frequency_counter_overflow), A7);
+      __ lbu(Rnext, BCP, 0);
+
+      // V0: osr nmethod (osr ok) or NULL (osr not possible)
+      // V1: osr adapter frame return address
+      // Rnext: target bytecode
+      // LVP: locals pointer
+      // BCP: bcp
+      __ beq(V0, R0, dispatch);
+      __ delayed()->nop();
+      // nmethod may have been invalidated (VM may block upon call_VM return)
+      __ lw(T3, V0, nmethod::entry_bci_offset());
+      __ move(AT, InvalidOSREntryBci);
+      __ beq(AT, T3, dispatch);
+      __ delayed()->nop();
+      // We need to prepare to execute the OSR method. First we must
+      // migrate the locals and monitors off of the stack.
+      //V0: osr nmethod (osr ok) or NULL (osr not possible)
+      //V1: osr adapter frame return address
+      //Rnext: target bytecode
+      //LVP: locals pointer
+      //BCP: bcp
+      __ move(BCP, V0);
+      const Register thread = TREG;
+#ifndef OPT_THREAD
+      __ get_thread(thread);
+#endif
+      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
+
+      // V0 is OSR buffer, move it to expected parameter location
+      // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
+      __ move(T0, V0);
+
+      // pop the interpreter frame
+      __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
+      //FIXME, shall we keep the return address on the stack?
+      __ leave();                                // remove frame anchor
+      __ move(LVP, RA);
+      __ move(SP, A7);
+
+      __ move(AT, -(StackAlignmentInBytes));
+      __ andr(SP , SP , AT);
+
+      // push the (possibly adjusted) return address
+      //refer to osr_entry in c1_LIRAssembler_mips.cpp
+      __ ld(AT, BCP, nmethod::osr_entry_point_offset());
+      __ jr(AT);
+      __ delayed()->nop();
+    }
+  }
+}
+
+
+void TemplateTable::if_0cmp(Condition cc) {
+  transition(itos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  switch(cc) {
+    case not_equal:
+      __ beq(FSR, R0, not_taken);
+      break;
+    case equal:
+      __ bne(FSR, R0, not_taken);
+      break;
+    case less:
+      __ bgez(FSR, not_taken);
+      break;
+    case less_equal:
+      __ bgtz(FSR, not_taken);
+      break;
+    case greater:
+      __ blez(FSR, not_taken);
+      break;
+    case greater_equal:
+      __ bltz(FSR, not_taken);
+      break;
+  }
+  __ delayed()->nop();
+
+  branch(false, false);
+
+  __ bind(not_taken);
+  __ profile_not_taken_branch(FSR);
+}
+
+void TemplateTable::if_icmp(Condition cc) {
+  transition(itos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+
+  __ pop_i(SSR);
+  switch(cc) {
+    case not_equal:
+      __ beq(SSR, FSR, not_taken);
+      break;
+    case equal:
+      __ bne(SSR, FSR, not_taken);
+      break;
+    case less:
+      __ slt(AT, SSR, FSR);
+      __ beq(AT, R0, not_taken);
+      break;
+    case less_equal:
+      __ slt(AT, FSR, SSR);
+      __ bne(AT, R0, not_taken);
+      break;
+    case greater:
+      __ slt(AT, FSR, SSR);
+      __ beq(AT, R0, not_taken);
+      break;
+    case greater_equal:
+      __ slt(AT, SSR, FSR);
+      __ bne(AT, R0, not_taken);
+      break;
+  }
+  __ delayed()->nop();
+
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(FSR);
+}
+
+void TemplateTable::if_nullcmp(Condition cc) {
+  transition(atos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  switch(cc) {
+    case not_equal:
+      __ beq(FSR, R0, not_taken);
+      break;
+    case equal:
+      __ bne(FSR, R0, not_taken);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+  __ delayed()->nop();
+
+  branch(false, false);
+  __ bind(not_taken);
+  __ profile_not_taken_branch(FSR);
+}
+
+
+void TemplateTable::if_acmp(Condition cc) {
+  transition(atos, vtos);
+  // assume branch is more often taken than not (loops use backward branches)
+  Label not_taken;
+  //  __ lw(SSR, SP, 0);
+  __ pop_ptr(SSR);
+  switch(cc) {
+    case not_equal:
+      __ beq(SSR, FSR, not_taken);
+      break;
+    case equal:
+      __ bne(SSR, FSR, not_taken);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+  __ delayed()->nop();
+
+  branch(false, false);
+
+  __ bind(not_taken);
+  __ profile_not_taken_branch(FSR);
+}
+
+// used registers : T1, T2, T3
+// T1 : method
+// T2 : returb bci
+void TemplateTable::ret() {
+  transition(vtos, vtos);
+
+  locals_index(T2);
+  __ ld(T2, T2, 0);
+  __ profile_ret(T2, T3);
+
+  __ get_method(T1);
+  __ ld(BCP, T1, in_bytes(Method::const_offset()));
+  __ daddu(BCP, BCP, T2);
+  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
+
+  __ dispatch_next(vtos);
+}
+
+// used registers : T1, T2, T3
+// T1 : method
+// T2 : returb bci
+void TemplateTable::wide_ret() {
+  transition(vtos, vtos);
+
+  locals_index_wide(T2);
+  __ ld(T2, T2, 0);                   // get return bci, compute return bcp
+  __ profile_ret(T2, T3);
+
+  __ get_method(T1);
+  __ ld(BCP, T1, in_bytes(Method::const_offset()));
+  __ daddu(BCP, BCP, T2);
+  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
+
+  __ dispatch_next(vtos);
+}
+
+// used register T2, T3, A7, Rnext
+// T2 : bytecode pointer
+// T3 : low
+// A7 : high
+// Rnext : dest bytecode, required by dispatch_base
+void TemplateTable::tableswitch() {
+  Label default_case, continue_execution;
+  transition(itos, vtos);
+
+  // align BCP
+  __ daddiu(T2, BCP, BytesPerInt);
+  __ li(AT, -BytesPerInt);
+  __ andr(T2, T2, AT);
+
+  // load lo & hi
+  __ lw(T3, T2, 1 * BytesPerInt);
+  __ swap(T3);
+  __ lw(A7, T2, 2 * BytesPerInt);
+  __ swap(A7);
+
+  // check against lo & hi
+  __ slt(AT, FSR, T3);
+  __ bne(AT, R0, default_case);
+  __ delayed()->nop();
+
+  __ slt(AT, A7, FSR);
+  __ bne(AT, R0, default_case);
+  __ delayed()->nop();
+
+  // lookup dispatch offset, in A7 big endian
+  __ dsubu(FSR, FSR, T3);
+  __ dsll(AT, FSR, Address::times_4);
+  __ daddu(AT, T2, AT);
+  __ lw(A7, AT, 3 * BytesPerInt);
+  __ profile_switch_case(FSR, T9, T3);
+
+  __ bind(continue_execution);
+  __ swap(A7);
+  __ daddu(BCP, BCP, A7);
+  __ lbu(Rnext, BCP, 0);
+  __ dispatch_only(vtos);
+
+  // handle default
+  __ bind(default_case);
+  __ profile_switch_default(FSR);
+  __ lw(A7, T2, 0);
+  __ b(continue_execution);
+  __ delayed()->nop();
+}
+
+void TemplateTable::lookupswitch() {
+  transition(itos, itos);
+  __ stop("lookupswitch bytecode should have been rewritten");
+}
+
+// used registers : T2, T3, A7, Rnext
+// T2 : bytecode pointer
+// T3 : pair index
+// A7 : offset
+// Rnext : dest bytecode
+// the data after the opcode is the same as lookupswitch
+// see Rewriter::rewrite_method for more information
+void TemplateTable::fast_linearswitch() {
+  transition(itos, vtos);
+  Label loop_entry, loop, found, continue_execution;
+
+  // swap FSR so we can avoid swapping the table entries
+  __ swap(FSR);
+
+  // align BCP
+  __ daddiu(T2, BCP, BytesPerInt);
+  __ li(AT, -BytesPerInt);
+  __ andr(T2, T2, AT);
+
+  // set counter
+  __ lw(T3, T2, BytesPerInt);
+  __ swap(T3);
+  __ b(loop_entry);
+  __ delayed()->nop();
+
+  // table search
+  __ bind(loop);
+  // get the entry value
+  __ dsll(AT, T3, Address::times_8);
+  __ daddu(AT, T2, AT);
+  __ lw(AT, AT, 2 * BytesPerInt);
+
+  // found?
+  __ beq(FSR, AT, found);
+  __ delayed()->nop();
+
+  __ bind(loop_entry);
+  __ bgtz(T3, loop);
+  __ delayed()->daddiu(T3, T3, -1);
+
+  // default case
+  __ profile_switch_default(FSR);
+  __ lw(A7, T2, 0);
+  __ b(continue_execution);
+  __ delayed()->nop();
+
+  // entry found -> get offset
+  __ bind(found);
+  __ dsll(AT, T3, Address::times_8);
+  __ daddu(AT, T2, AT);
+  __ lw(A7, AT, 3 * BytesPerInt);
+  __ profile_switch_case(T3, FSR, T2);
+
+  // continue execution
+  __ bind(continue_execution);
+  __ swap(A7);
+  __ daddu(BCP, BCP, A7);
+  __ lbu(Rnext, BCP, 0);
+  __ dispatch_only(vtos);
+}
+
+// used registers : T0, T1, T2, T3, A7, Rnext
+// T2 : pairs address(array)
+// Rnext : dest bytecode
+// the data after the opcode is the same as lookupswitch
+// see Rewriter::rewrite_method for more information
+void TemplateTable::fast_binaryswitch() {
+  transition(itos, vtos);
+  // Implementation using the following core algorithm:
+  //
+  // int binary_search(int key, LookupswitchPair* array, int n) {
+  //   // Binary search according to "Methodik des Programmierens" by
+  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
+  //   int i = 0;
+  //   int j = n;
+  //   while (i+1 < j) {
+  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
+  //     // with      Q: for all i: 0 <= i < n: key < a[i]
+  //     // where a stands for the array and assuming that the (inexisting)
+  //     // element a[n] is infinitely big.
+  //     int h = (i + j) >> 1;
+  //     // i < h < j
+  //     if (key < array[h].fast_match()) {
+  //       j = h;
+  //     } else {
+  //       i = h;
+  //     }
+  //   }
+  //   // R: a[i] <= key < a[i+1] or Q
+  //   // (i.e., if key is within array, i is the correct index)
+  //   return i;
+  // }
+
+  // register allocation
+  const Register array = T2;
+  const Register i = T3, j = A7;
+  const Register h = T1;
+  const Register temp = T0;
+  const Register key = FSR;
+
+  // setup array
+  __ daddiu(array, BCP, 3*BytesPerInt);
+  __ li(AT, -BytesPerInt);
+  __ andr(array, array, AT);
+
+  // initialize i & j
+  __ move(i, R0);
+  __ lw(j, array, - 1 * BytesPerInt);
+  // Convert j into native byteordering
+  __ swap(j);
+
+  // and start
+  Label entry;
+  __ b(entry);
+  __ delayed()->nop();
+
+  // binary search loop
+  {
+    Label loop;
+    __ bind(loop);
+    // int h = (i + j) >> 1;
+    __ daddu(h, i, j);
+    __ dsrl(h, h, 1);
+    // if (key < array[h].fast_match()) {
+    //   j = h;
+    // } else {
+    //   i = h;
+    // }
+    // Convert array[h].match to native byte-ordering before compare
+    __ dsll(AT, h, Address::times_8);
+    __ daddu(AT, array, AT);
+    __ lw(temp, AT, 0 * BytesPerInt);
+    __ swap(temp);
+
+    {
+      Label set_i, end_of_if;
+      __ slt(AT, key, temp);
+      __ beq(AT, R0, set_i);
+      __ delayed()->nop();
+
+      __ b(end_of_if);
+      __ delayed(); __ move(j, h);
+
+      __ bind(set_i);
+      __ move(i, h);
+
+      __ bind(end_of_if);
+    }
+    // while (i+1 < j)
+    __ bind(entry);
+    __ daddiu(h, i, 1);
+    __ slt(AT, h, j);
+    __ bne(AT, R0, loop);
+    __ delayed()->nop();
+  }
+
+  // end of binary search, result index is i (must check again!)
+  Label default_case;
+  // Convert array[i].match to native byte-ordering before compare
+  __ dsll(AT, i, Address::times_8);
+  __ daddu(AT, array, AT);
+  __ lw(temp, AT, 0 * BytesPerInt);
+  __ swap(temp);
+  __ bne(key, temp, default_case);
+  __ delayed()->nop();
+
+  // entry found -> j = offset
+  __ dsll(AT, i, Address::times_8);
+  __ daddu(AT, array, AT);
+  __ lw(j, AT, 1 * BytesPerInt);
+  __ profile_switch_case(i, key, array);
+  __ swap(j);
+
+  __ daddu(BCP, BCP, j);
+  __ lbu(Rnext, BCP, 0);
+  __ dispatch_only(vtos);
+
+  // default case -> j = default offset
+  __ bind(default_case);
+  __ profile_switch_default(i);
+  __ lw(j, array, - 2 * BytesPerInt);
+  __ swap(j);
+  __ daddu(BCP, BCP, j);
+  __ lbu(Rnext, BCP, 0);
+  __ dispatch_only(vtos);
+}
+
+void TemplateTable::_return(TosState state) {
+  transition(state, state);
+  assert(_desc->calls_vm(),
+      "inconsistent calls_vm information"); // call in remove_activation
+
+  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
+    assert(state == vtos, "only valid state");
+    __ ld(T1, aaddress(0));
+    __ load_klass(LVP, T1);
+    __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
+    __ move(AT, JVM_ACC_HAS_FINALIZER);
+    __ andr(AT, AT, LVP);
+    Label skip_register_finalizer;
+    __ beq(AT, R0, skip_register_finalizer);
+    __ delayed()->nop();
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+    InterpreterRuntime::register_finalizer), T1);
+    __ bind(skip_register_finalizer);
+  }
+
+  // Narrow result if state is itos but result type is smaller.
+  // Need to narrow in the return bytecode rather than in generate_return_entry
+  // since compiled code callers expect the result to already be narrowed.
+  if (state == itos) {
+    __ narrow(FSR);
+  }
+
+  __ remove_activation(state, T9);
+  __ sync();
+
+  __ jr(T9);
+  __ delayed()->nop();
+}
+
+// ----------------------------------------------------------------------------
+// Volatile variables demand their effects be made known to all CPU's
+// in order.  Store buffers on most chips allow reads & writes to
+// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
+// without some kind of memory barrier (i.e., it's not sufficient that
+// the interpreter does not reorder volatile references, the hardware
+// also must not reorder them).
+//
+// According to the new Java Memory Model (JMM):
+// (1) All volatiles are serialized wrt to each other.  ALSO reads &
+//     writes act as aquire & release, so:
+// (2) A read cannot let unrelated NON-volatile memory refs that
+//     happen after the read float up to before the read.  It's OK for
+//     non-volatile memory refs that happen before the volatile read to
+//     float down below it.
+// (3) Similar a volatile write cannot let unrelated NON-volatile
+//     memory refs that happen BEFORE the write float down to after the
+//     write.  It's OK for non-volatile memory refs that happen after the
+//     volatile write to float up before it.
+//
+// We only put in barriers around volatile refs (they are expensive),
+// not _between_ memory refs (that would require us to track the
+// flavor of the previous memory refs).  Requirements (2) and (3)
+// require some barriers before volatile stores and after volatile
+// loads.  These nearly cover requirement (1) but miss the
+// volatile-store-volatile-load case.  This final case is placed after
+// volatile-stores although it could just as well go before
+// volatile-loads.
+void TemplateTable::volatile_barrier() {
+  if(os::is_MP()) __ sync();
+}
+
+// we dont shift left 2 bits in get_cache_and_index_at_bcp
+// for we always need shift the index we use it. the ConstantPoolCacheEntry
+// is 16-byte long, index is the index in
+// ConstantPoolCache, so cache + base_offset() + index * 16 is
+// the corresponding ConstantPoolCacheEntry
+// used registers : T2
+// NOTE : the returned index need also shift left 4 to get the address!
+void TemplateTable::resolve_cache_and_index(int byte_no,
+                                            Register Rcache,
+                                            Register index,
+                                            size_t index_size) {
+  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+  const Register temp = A1;
+  assert_different_registers(Rcache, index);
+
+  Label resolved;
+  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
+  // is resolved?
+  int i = (int)bytecode();
+  __ addiu(temp, temp, -i);
+  __ beq(temp, R0, resolved);
+  __ delayed()->nop();
+  // resolve first time through
+  address entry;
+  switch (bytecode()) {
+    case Bytecodes::_getstatic      : // fall through
+    case Bytecodes::_putstatic      : // fall through
+    case Bytecodes::_getfield       : // fall through
+    case Bytecodes::_putfield       :
+      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
+      break;
+    case Bytecodes::_invokevirtual  : // fall through
+    case Bytecodes::_invokespecial  : // fall through
+    case Bytecodes::_invokestatic   : // fall through
+    case Bytecodes::_invokeinterface:
+      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
+      break;
+    case Bytecodes::_invokehandle:
+      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
+      break;
+    case Bytecodes::_invokedynamic:
+      entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
+      break;
+    default                          :
+      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
+      break;
+  }
+
+  __ move(temp, i);
+  __ call_VM(NOREG, entry, temp);
+
+  // Update registers with resolved info
+  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
+  __ bind(resolved);
+}
+
+// The Rcache and index registers must be set before call
+void TemplateTable::load_field_cp_cache_entry(Register obj,
+                                              Register cache,
+                                              Register index,
+                                              Register off,
+                                              Register flags,
+                                              bool is_static = false) {
+  assert_different_registers(cache, index, flags, off);
+
+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+  // Field offset
+  __ dsll(AT, index, Address::times_ptr);
+  __ daddu(AT, cache, AT);
+  __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
+  // Flags
+  __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
+
+  // klass overwrite register
+  if (is_static) {
+    __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+    __ ld(obj, Address(obj, mirror_offset));
+
+    __ verify_oop(obj);
+  }
+}
+
+// get the method, itable_index and flags of the current invoke
+void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
+                                               Register method,
+                                               Register itable_index,
+                                               Register flags,
+                                               bool is_invokevirtual,
+                                               bool is_invokevfinal, /*unused*/
+                                               bool is_invokedynamic) {
+  // setup registers
+  const Register cache = T3;
+  const Register index = T1;
+  assert_different_registers(method, flags);
+  assert_different_registers(method, cache, index);
+  assert_different_registers(itable_index, flags);
+  assert_different_registers(itable_index, cache, index);
+  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
+  // determine constant pool cache field offsets
+  const int method_offset = in_bytes(
+    ConstantPoolCache::base_offset() +
+      ((byte_no == f2_byte)
+       ? ConstantPoolCacheEntry::f2_offset()
+       : ConstantPoolCacheEntry::f1_offset()));
+  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
+                                    ConstantPoolCacheEntry::flags_offset());
+  // access constant pool cache fields
+  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
+                                    ConstantPoolCacheEntry::f2_offset());
+
+  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
+  resolve_cache_and_index(byte_no, cache, index, index_size);
+
+  //assert(wordSize == 8, "adjust code below");
+  // note we shift 4 not 2, for we get is the true inde
+  // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
+  __ dsll(AT, index, Address::times_ptr);
+  __ daddu(AT, cache, AT);
+  __ ld(method, AT, method_offset);
+
+  if (itable_index != NOREG) {
+    __ ld(itable_index, AT, index_offset);
+  }
+  __ ld(flags, AT, flags_offset);
+}
+
+// The registers cache and index expected to be set before call.
+// Correct values of the cache and index registers are preserved.
+void TemplateTable::jvmti_post_field_access(Register cache, Register index,
+                                            bool is_static, bool has_tos) {
+  // do the JVMTI work here to avoid disturbing the register state below
+  // We use c_rarg registers here because we want to use the register used in
+  // the call to the VM
+  if (JvmtiExport::can_post_field_access()) {
+    // Check to see if a field access watch has been set before we
+    // take the time to call into the VM.
+    Label L1;
+    // kill FSR
+    Register tmp1 = T2;
+    Register tmp2 = T1;
+    Register tmp3 = T3;
+    assert_different_registers(cache, index, AT);
+    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
+    __ lw(AT, AT, 0);
+    __ beq(AT, R0, L1);
+    __ delayed()->nop();
+
+    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
+
+    // cache entry pointer
+    __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
+    __ shl(tmp3, LogBytesPerWord);
+    __ daddu(tmp2, tmp2, tmp3);
+    if (is_static) {
+      __ move(tmp1, R0);
+    } else {
+      __ ld(tmp1, SP, 0);
+      __ verify_oop(tmp1);
+    }
+    // tmp1: object pointer or NULL
+    // tmp2: cache entry pointer
+    // tmp3: jvalue object on the stack
+    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+                                       InterpreterRuntime::post_field_access),
+               tmp1, tmp2, tmp3);
+    __ get_cache_and_index_at_bcp(cache, index, 1);
+    __ bind(L1);
+  }
+}
+
+void TemplateTable::pop_and_check_object(Register r) {
+  __ pop_ptr(r);
+  __ null_check(r);  // for field access must check obj.
+  __ verify_oop(r);
+}
+
+// used registers : T1, T2, T3, T1
+// T1 : flags
+// T2 : off
+// T3 : obj
+// T1 : field address
+// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
+// following mapping to the TosState states:
+// btos: 0
+// ctos: 1
+// stos: 2
+// itos: 3
+// ltos: 4
+// ftos: 5
+// dtos: 6
+// atos: 7
+// vtos: 8
+// see ConstantPoolCacheEntry::set_field for more info
+void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
+  transition(vtos, vtos);
+
+  const Register cache = T3;
+  const Register index = T0;
+
+  const Register obj   = T3;
+  const Register off   = T2;
+  const Register flags = T1;
+
+  const Register scratch = T8;
+
+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+  jvmti_post_field_access(cache, index, is_static, false);
+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+  {
+    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
+    __ andr(scratch, scratch, flags);
+
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ delayed()->nop();
+    volatile_barrier();
+    __ bind(notVolatile);
+  }
+
+  if (!is_static) pop_and_check_object(obj);
+  __ daddu(index, obj, off);
+
+
+  Label Done, notByte, notBool, notInt, notShort, notChar,
+              notLong, notFloat, notObj, notDouble;
+
+  assert(btos == 0, "change code, btos != 0");
+  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
+  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
+  __ bne(flags, R0, notByte);
+  __ delayed()->nop();
+
+  // btos
+  __ lb(FSR, index, 0);
+  __ push(btos);
+
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+
+  __ bind(notByte);
+  __ move(AT, ztos);
+  __ bne(flags, AT, notBool);
+  __ delayed()->nop();
+
+  // ztos
+  __ lb(FSR, index, 0);
+  __ push(ztos);
+
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+
+  __ bind(notBool);
+  __ move(AT, itos);
+  __ bne(flags, AT, notInt);
+  __ delayed()->nop();
+
+  // itos
+  __ lw(FSR, index, 0);
+  __ push(itos);
+
+  // Rewrite bytecode to be faster
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  __ bind(notInt);
+  __ move(AT, atos);
+  __ bne(flags, AT, notObj);
+  __ delayed()->nop();
+
+  // atos
+  //add for compressedoops
+  __ load_heap_oop(FSR, Address(index, 0));
+  __ push(atos);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  __ bind(notObj);
+  __ move(AT, ctos);
+  __ bne(flags, AT, notChar);
+  __ delayed()->nop();
+
+  // ctos
+  __ lhu(FSR, index, 0);
+  __ push(ctos);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  __ bind(notChar);
+  __ move(AT, stos);
+  __ bne(flags, AT, notShort);
+  __ delayed()->nop();
+
+  // stos
+  __ lh(FSR, index, 0);
+  __ push(stos);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  __ bind(notShort);
+  __ move(AT, ltos);
+  __ bne(flags, AT, notLong);
+  __ delayed()->nop();
+
+  // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
+  // ltos
+  __ ld(FSR, index, 0 * wordSize);
+  __ push(ltos);
+
+  // Don't rewrite to _fast_lgetfield for potential volatile case.
+  __ b(Done);
+  __ delayed()->nop();
+
+  __ bind(notLong);
+  __ move(AT, ftos);
+  __ bne(flags, AT, notFloat);
+  __ delayed()->nop();
+
+  // ftos
+  __ lwc1(FSF, index, 0);
+  __ push(ftos);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  __ bind(notFloat);
+  __ move(AT, dtos);
+#ifdef ASSERT
+  __ bne(flags, AT, notDouble);
+  __ delayed()->nop();
+#endif
+
+  // dtos
+  __ ldc1(FSF, index, 0 * wordSize);
+  __ push(dtos);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
+  }
+
+
+#ifdef ASSERT
+  __ b(Done);
+  __ delayed()->nop();
+  __ bind(notDouble);
+  __ stop("Bad state");
+#endif
+
+  __ bind(Done);
+
+  {
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ delayed()->nop();
+    volatile_barrier();
+    __ bind(notVolatile);
+  }
+}
+
+
+void TemplateTable::getfield(int byte_no) {
+  getfield_or_static(byte_no, false);
+}
+
+void TemplateTable::getstatic(int byte_no) {
+  getfield_or_static(byte_no, true);
+}
+
+// The registers cache and index expected to be set before call.
+// The function may destroy various registers, just not the cache and index registers.
+void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
+  transition(vtos, vtos);
+
+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+
+  if (JvmtiExport::can_post_field_modification()) {
+    // Check to see if a field modification watch has been set before
+    // we take the time to call into the VM.
+    Label L1;
+    //kill AT, T1, T2, T3, T9
+    Register tmp1 = T2;
+    Register tmp2 = T1;
+    Register tmp3 = T3;
+    Register tmp4 = T9;
+    assert_different_registers(cache, index, tmp4);
+
+    __ li(AT, JvmtiExport::get_field_modification_count_addr());
+    __ lw(AT, AT, 0);
+    __ beq(AT, R0, L1);
+    __ delayed()->nop();
+
+    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
+
+    if (is_static) {
+      __ move(tmp1, R0);
+    } else {
+      // Life is harder. The stack holds the value on top, followed by
+      // the object.  We don't know the size of the value, though; it
+      // could be one or two words depending on its type. As a result,
+      // we must find the type to determine where the object is.
+      Label two_word, valsize_known;
+      __ dsll(AT, tmp4, Address::times_8);
+      __ daddu(AT, tmp2, AT);
+      __ ld(tmp3, AT, in_bytes(cp_base_offset +
+                               ConstantPoolCacheEntry::flags_offset()));
+      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
+
+      ConstantPoolCacheEntry::verify_tos_state_shift();
+      __ move(tmp1, SP);
+      __ move(AT, ltos);
+      __ beq(tmp3, AT, two_word);
+      __ delayed()->nop();
+      __ move(AT, dtos);
+      __ beq(tmp3, AT, two_word);
+      __ delayed()->nop();
+      __ b(valsize_known);
+      __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
+
+      __ bind(two_word);
+      __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
+
+      __ bind(valsize_known);
+      // setup object pointer
+      __ ld(tmp1, tmp1, 0*wordSize);
+    }
+    // cache entry pointer
+    __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset));
+    __ shl(tmp4, LogBytesPerWord);
+    __ daddu(tmp2, tmp2, tmp4);
+    // object (tos)
+    __ move(tmp3, SP);
+    // tmp1: object pointer set up above (NULL if static)
+    // tmp2: cache entry pointer
+    // tmp3: jvalue object on the stack
+    __ call_VM(NOREG,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::post_field_modification),
+               tmp1, tmp2, tmp3);
+    __ get_cache_and_index_at_bcp(cache, index, 1);
+    __ bind(L1);
+  }
+}
+
+// used registers : T0, T1, T2, T3, T8
+// T1 : flags
+// T2 : off
+// T3 : obj
+// T8 : volatile bit
+// see ConstantPoolCacheEntry::set_field for more info
+void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
+  transition(vtos, vtos);
+
+  const Register cache = T3;
+  const Register index = T0;
+  const Register obj   = T3;
+  const Register off   = T2;
+  const Register flags = T1;
+  const Register bc    = T3;
+
+  const Register scratch = T8;
+
+  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
+  jvmti_post_field_mod(cache, index, is_static);
+  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
+
+  Label Done;
+  {
+    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
+    __ andr(scratch, scratch, flags);
+
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ delayed()->nop();
+    volatile_barrier();
+    __ bind(notVolatile);
+  }
+
+
+  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
+
+  assert(btos == 0, "change code, btos != 0");
+
+  // btos
+  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
+  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
+  __ bne(flags, R0, notByte);
+  __ delayed()->nop();
+
+  __ pop(btos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ daddu(AT, obj, off);
+  __ sb(FSR, AT, 0);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  // ztos
+  __ bind(notByte);
+  __ move(AT, ztos);
+  __ bne(flags, AT, notBool);
+  __ delayed()->nop();
+
+  __ pop(ztos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ daddu(AT, obj, off);
+  __ andi(FSR, FSR, 0x1);
+  __ sb(FSR, AT, 0);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  // itos
+  __ bind(notBool);
+  __ move(AT, itos);
+  __ bne(flags, AT, notInt);
+  __ delayed()->nop();
+
+  __ pop(itos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ daddu(AT, obj, off);
+  __ sw(FSR, AT, 0);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  // atos
+  __ bind(notInt);
+  __ move(AT, atos);
+  __ bne(flags, AT, notObj);
+  __ delayed()->nop();
+
+  __ pop(atos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+
+  do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR, _bs->kind(), false);
+
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  // ctos
+  __ bind(notObj);
+  __ move(AT, ctos);
+  __ bne(flags, AT, notChar);
+  __ delayed()->nop();
+
+  __ pop(ctos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ daddu(AT, obj, off);
+  __ sh(FSR, AT, 0);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  // stos
+  __ bind(notChar);
+  __ move(AT, stos);
+  __ bne(flags, AT, notShort);
+  __ delayed()->nop();
+
+  __ pop(stos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ daddu(AT, obj, off);
+  __ sh(FSR, AT, 0);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  // ltos
+  __ bind(notShort);
+  __ move(AT, ltos);
+  __ bne(flags, AT, notLong);
+  __ delayed()->nop();
+
+  __ pop(ltos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ daddu(AT, obj, off);
+  __ sd(FSR, AT, 0);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+  // ftos
+  __ bind(notLong);
+  __ move(AT, ftos);
+  __ bne(flags, AT, notFloat);
+  __ delayed()->nop();
+
+  __ pop(ftos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ daddu(AT, obj, off);
+  __ swc1(FSF, AT, 0);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
+  }
+  __ b(Done);
+  __ delayed()->nop();
+
+
+  // dtos
+  __ bind(notFloat);
+  __ move(AT, dtos);
+#ifdef ASSERT
+  __ bne(flags, AT, notDouble);
+  __ delayed()->nop();
+#endif
+
+  __ pop(dtos);
+  if (!is_static) {
+    pop_and_check_object(obj);
+  }
+  __ daddu(AT, obj, off);
+  __ sdc1(FSF, AT, 0);
+  if (!is_static) {
+    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
+  }
+
+#ifdef ASSERT
+  __ b(Done);
+  __ delayed()->nop();
+
+  __ bind(notDouble);
+  __ stop("Bad state");
+#endif
+
+  __ bind(Done);
+
+  {
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ delayed()->nop();
+    volatile_barrier();
+    __ bind(notVolatile);
+  }
+}
+
+void TemplateTable::putfield(int byte_no) {
+  putfield_or_static(byte_no, false);
+}
+
+void TemplateTable::putstatic(int byte_no) {
+  putfield_or_static(byte_no, true);
+}
+
+// used registers : T1, T2, T3
+// T1 : cp_entry
+// T2 : obj
+// T3 : value pointer
+void TemplateTable::jvmti_post_fast_field_mod() {
+  if (JvmtiExport::can_post_field_modification()) {
+    // Check to see if a field modification watch has been set before
+    // we take the time to call into the VM.
+    Label L2;
+    //kill AT, T1, T2, T3, T9
+    Register tmp1 = T2;
+    Register tmp2 = T1;
+    Register tmp3 = T3;
+    Register tmp4 = T9;
+    __ li(AT, JvmtiExport::get_field_modification_count_addr());
+    __ lw(tmp3, AT, 0);
+    __ beq(tmp3, R0, L2);
+    __ delayed()->nop();
+    __ pop_ptr(tmp1);
+    __ verify_oop(tmp1);
+    __ push_ptr(tmp1);
+    switch (bytecode()) {          // load values into the jvalue object
+    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
+    case Bytecodes::_fast_bputfield: // fall through
+    case Bytecodes::_fast_zputfield: // fall through
+    case Bytecodes::_fast_sputfield: // fall through
+    case Bytecodes::_fast_cputfield: // fall through
+    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
+    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
+    case Bytecodes::_fast_fputfield: __ push_f(); break;
+    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
+      default:  ShouldNotReachHere();
+    }
+    __ move(tmp3, SP);
+    // access constant pool cache entry
+    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
+    __ verify_oop(tmp1);
+    // tmp1: object pointer copied above
+    // tmp2: cache entry pointer
+    // tmp3: jvalue object on the stack
+    __ call_VM(NOREG,
+               CAST_FROM_FN_PTR(address,
+                                InterpreterRuntime::post_field_modification),
+               tmp1, tmp2, tmp3);
+
+    switch (bytecode()) {             // restore tos values
+    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
+    case Bytecodes::_fast_bputfield: // fall through
+    case Bytecodes::_fast_zputfield: // fall through
+    case Bytecodes::_fast_sputfield: // fall through
+    case Bytecodes::_fast_cputfield: // fall through
+    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
+    case Bytecodes::_fast_dputfield: __ pop_d(); break;
+    case Bytecodes::_fast_fputfield: __ pop_f(); break;
+    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
+    }
+    __ bind(L2);
+  }
+}
+
+// used registers : T2, T3, T1
+// T2 : index & off & field address
+// T3 : cache & obj
+// T1 : flags
+void TemplateTable::fast_storefield(TosState state) {
+  transition(state, vtos);
+
+  const Register scratch = T8;
+
+  ByteSize base = ConstantPoolCache::base_offset();
+
+  jvmti_post_fast_field_mod();
+
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(T3, T2, 1);
+
+  // Must prevent reordering of the following cp cache loads with bytecode load
+  __ sync();
+
+  // test for volatile with T1
+  __ dsll(AT, T2, Address::times_8);
+  __ daddu(AT, T3, AT);
+  __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
+
+  // replace index with field offset from cache entry
+  __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
+
+  Label Done;
+  {
+    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
+    __ andr(scratch, scratch, T1);
+
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ delayed()->nop();
+    volatile_barrier();
+    __ bind(notVolatile);
+  }
+
+  // Get object from stack
+  pop_and_check_object(T3);
+
+  if (bytecode() != Bytecodes::_fast_aputfield) {
+    // field address
+    __ daddu(T2, T3, T2);
+  }
+
+  // access field
+  switch (bytecode()) {
+    case Bytecodes::_fast_zputfield:
+      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
+      // fall through to bputfield
+    case Bytecodes::_fast_bputfield:
+      __ sb(FSR, T2, 0);
+      break;
+    case Bytecodes::_fast_sputfield: // fall through
+    case Bytecodes::_fast_cputfield:
+      __ sh(FSR, T2, 0);
+      break;
+    case Bytecodes::_fast_iputfield:
+      __ sw(FSR, T2, 0);
+      break;
+    case Bytecodes::_fast_lputfield:
+      __ sd(FSR, T2, 0 * wordSize);
+      break;
+    case Bytecodes::_fast_fputfield:
+      __ swc1(FSF, T2, 0);
+      break;
+    case Bytecodes::_fast_dputfield:
+      __ sdc1(FSF, T2, 0 * wordSize);
+      break;
+    case Bytecodes::_fast_aputfield:
+      do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR, _bs->kind(), false);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  {
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ delayed()->nop();
+    volatile_barrier();
+    __ bind(notVolatile);
+  }
+}
+
+// used registers : T2, T3, T1
+// T3 : cp_entry & cache
+// T2 : index & offset
+void TemplateTable::fast_accessfield(TosState state) {
+  transition(atos, state);
+
+  const Register scratch = T8;
+
+  // do the JVMTI work here to avoid disturbing the register state below
+  if (JvmtiExport::can_post_field_access()) {
+    // Check to see if a field access watch has been set before we take
+    // the time to call into the VM.
+    Label L1;
+    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
+    __ lw(T3, AT, 0);
+    __ beq(T3, R0, L1);
+    __ delayed()->nop();
+    // access constant pool cache entry
+    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
+    __ move(TSR, FSR);
+    __ verify_oop(FSR);
+    // FSR: object pointer copied above
+    // T3: cache entry pointer
+    __ call_VM(NOREG,
+               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
+               FSR, T3);
+    __ move(FSR, TSR);
+    __ bind(L1);
+  }
+
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(T3, T2, 1);
+
+  // Must prevent reordering of the following cp cache loads with bytecode load
+  __ sync();
+
+  // replace index with field offset from cache entry
+  __ dsll(AT, T2, Address::times_8);
+  __ daddu(AT, T3, AT);
+  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+
+  {
+    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
+    __ andr(scratch, scratch, AT);
+
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ delayed()->nop();
+    volatile_barrier();
+    __ bind(notVolatile);
+  }
+
+  // FSR: object
+  __ verify_oop(FSR);
+  __ null_check(FSR);
+  // field addresses
+  __ daddu(FSR, FSR, T2);
+
+  // access field
+  switch (bytecode()) {
+    case Bytecodes::_fast_bgetfield:
+      __ lb(FSR, FSR, 0);
+      break;
+    case Bytecodes::_fast_sgetfield:
+      __ lh(FSR, FSR, 0);
+      break;
+    case Bytecodes::_fast_cgetfield:
+      __ lhu(FSR, FSR, 0);
+      break;
+    case Bytecodes::_fast_igetfield:
+      __ lw(FSR, FSR, 0);
+      break;
+    case Bytecodes::_fast_lgetfield:
+      __ stop("should not be rewritten");
+      break;
+    case Bytecodes::_fast_fgetfield:
+      __ lwc1(FSF, FSR, 0);
+      break;
+    case Bytecodes::_fast_dgetfield:
+      __ ldc1(FSF, FSR, 0);
+      break;
+    case Bytecodes::_fast_agetfield:
+      //add for compressedoops
+      __ load_heap_oop(FSR, Address(FSR, 0));
+      __ verify_oop(FSR);
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  {
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ delayed()->nop();
+    volatile_barrier();
+    __ bind(notVolatile);
+  }
+}
+
+// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
+// used registers : T1, T2, T3, T1
+// T1 : obj & field address
+// T2 : off
+// T3 : cache
+// T1 : index
+void TemplateTable::fast_xaccess(TosState state) {
+  transition(vtos, state);
+
+  const Register scratch = T8;
+
+  // get receiver
+  __ ld(T1, aaddress(0));
+  // access constant pool cache
+  __ get_cache_and_index_at_bcp(T3, T2, 2);
+  __ dsll(AT, T2, Address::times_8);
+  __ daddu(AT, T3, AT);
+  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
+
+  {
+    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
+    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
+    __ andr(scratch, scratch, AT);
+
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ delayed()->nop();
+    volatile_barrier();
+    __ bind(notVolatile);
+  }
+
+  // make sure exception is reported in correct bcp range (getfield is
+  // next instruction)
+  __ daddiu(BCP, BCP, 1);
+  __ null_check(T1);
+  __ daddu(T1, T1, T2);
+
+  if (state == itos) {
+    __ lw(FSR, T1, 0);
+  } else if (state == atos) {
+    __ load_heap_oop(FSR, Address(T1, 0));
+    __ verify_oop(FSR);
+  } else if (state == ftos) {
+    __ lwc1(FSF, T1, 0);
+  } else {
+    ShouldNotReachHere();
+  }
+  __ daddiu(BCP, BCP, -1);
+
+  {
+    Label notVolatile;
+    __ beq(scratch, R0, notVolatile);
+    __ delayed()->nop();
+    volatile_barrier();
+    __ bind(notVolatile);
+  }
+}
+
+
+
+//-----------------------------------------------------------------------------
+// Calls
+
+void TemplateTable::count_calls(Register method, Register temp) {
+  // implemented elsewhere
+  ShouldNotReachHere();
+}
+
+// method, index, recv, flags: T1, T2, T3, T1
+// byte_no = 2 for _invokevirtual, 1 else
+// T0 : return address
+// get the method & index of the invoke, and push the return address of
+// the invoke(first word in the frame)
+// this address is where the return code jmp to.
+// NOTE : this method will set T3&T1 as recv&flags
+void TemplateTable::prepare_invoke(int byte_no,
+                                   Register method,  // linked method (or i-klass)
+                                   Register index,   // itable index, MethodType, etc.
+                                   Register recv,    // if caller wants to see it
+                                   Register flags    // if caller wants to test it
+                                   ) {
+  // determine flags
+  const Bytecodes::Code code = bytecode();
+  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
+  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
+  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
+  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
+  const bool load_receiver       = (recv  != noreg);
+  const bool save_flags          = (flags != noreg);
+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
+  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
+  assert(flags == noreg || flags == T1, "error flags reg.");
+  assert(recv  == noreg || recv  == T3, "error recv reg.");
+
+  // setup registers & access constant pool cache
+  if(recv == noreg) recv  = T3;
+  if(flags == noreg) flags  = T1;
+  assert_different_registers(method, index, recv, flags);
+
+  // save 'interpreter return address'
+  __ save_bcp();
+
+  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
+
+  if (is_invokedynamic || is_invokehandle) {
+   Label L_no_push;
+     __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
+     __ andr(AT, AT, flags);
+     __ beq(AT, R0, L_no_push);
+     __ delayed()->nop();
+     // Push the appendix as a trailing parameter.
+     // This must be done before we get the receiver,
+     // since the parameter_size includes it.
+     Register tmp = SSR;
+     __ push(tmp);
+     __ move(tmp, index);
+     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
+     __ load_resolved_reference_at_index(index, tmp);
+     __ pop(tmp);
+     __ push(index);  // push appendix (MethodType, CallSite, etc.)
+     __ bind(L_no_push);
+  }
+
+  // load receiver if needed (after appendix is pushed so parameter size is correct)
+  // Note: no return address pushed yet
+  if (load_receiver) {
+    __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
+    __ andr(recv, flags, AT);
+    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
+    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
+    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
+    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
+    __ ld(recv, recv_addr);
+    __ verify_oop(recv);
+  }
+  if(save_flags) {
+    __ move(BCP, flags);
+  }
+
+  // compute return type
+  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
+  __ andi(flags, flags, 0xf);
+
+  // Make sure we don't need to mask flags for tos_state_shift after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
+  // load return address
+  {
+    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
+    __ li(AT, (long)table);
+    __ dsll(flags, flags, LogBytesPerWord);
+    __ daddu(AT, AT, flags);
+    __ ld(RA, AT, 0);
+  }
+
+  if (save_flags) {
+    __ move(flags, BCP);
+    __ restore_bcp();
+  }
+}
+
+// used registers : T0, T3, T1, T2
+// T3 : recv, this two register using convention is by prepare_invoke
+// T1 : flags, klass
+// Rmethod : method, index must be Rmethod
+void TemplateTable::invokevirtual_helper(Register index,
+                                         Register recv,
+                                         Register flags) {
+
+  assert_different_registers(index, recv, flags, T2);
+
+  // Test for an invoke of a final method
+  Label notFinal;
+  __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
+  __ andr(AT, flags, AT);
+  __ beq(AT, R0, notFinal);
+  __ delayed()->nop();
+
+  Register method = index;  // method must be Rmethod
+  assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
+
+  // do the call - the index is actually the method to call
+  // the index is indeed methodOop, for this is vfinal,
+  // see ConstantPoolCacheEntry::set_method for more info
+
+  __ verify_oop(method);
+
+  // It's final, need a null check here!
+  __ null_check(recv);
+
+  // profile this call
+  __ profile_final_call(T2);
+
+  // T2: tmp, used for mdp
+  // method: callee
+  // T9: tmp
+  // is_virtual: true
+  __ profile_arguments_type(T2, method, T9, true);
+
+  __ jump_from_interpreted(method, T2);
+
+  __ bind(notFinal);
+
+  // get receiver klass
+  __ null_check(recv, oopDesc::klass_offset_in_bytes());
+  __ load_klass(T2, recv);
+  __ verify_oop(T2);
+
+  // profile this call
+  __ profile_virtual_call(T2, T0, T1);
+
+  // get target methodOop & entry point
+  const int base = InstanceKlass::vtable_start_offset() * wordSize;
+  assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
+  __ dsll(AT, index, Address::times_ptr);
+  // T2: receiver
+  __ daddu(AT, T2, AT);
+  //this is a ualign read
+  __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
+  __ profile_arguments_type(T2, method, T9, true);
+  __ jump_from_interpreted(method, T2);
+
+}
+
+void TemplateTable::invokevirtual(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f2_byte, "use this argument");
+  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
+  // now recv & flags in T3, T1
+  invokevirtual_helper(Rmethod, T3, T1);
+}
+
+// T9 : entry
+// Rmethod : method
+void TemplateTable::invokespecial(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+  prepare_invoke(byte_no, Rmethod, NOREG, T3);
+  // now recv & flags in T3, T1
+  __ verify_oop(T3);
+  __ null_check(T3);
+  __ profile_call(T9);
+
+  // T8: tmp, used for mdp
+  // Rmethod: callee
+  // T9: tmp
+  // is_virtual: false
+  __ profile_arguments_type(T8, Rmethod, T9, false);
+
+  __ jump_from_interpreted(Rmethod, T9);
+  __ move(T0, T3);
+}
+
+void TemplateTable::invokestatic(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+  prepare_invoke(byte_no, Rmethod, NOREG);
+  __ verify_oop(Rmethod);
+
+  __ profile_call(T9);
+
+  // T8: tmp, used for mdp
+  // Rmethod: callee
+  // T9: tmp
+  // is_virtual: false
+  __ profile_arguments_type(T8, Rmethod, T9, false);
+
+  __ jump_from_interpreted(Rmethod, T9);
+}
+
+// i have no idea what to do here, now. for future change. FIXME.
+void TemplateTable::fast_invokevfinal(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f2_byte, "use this argument");
+  __ stop("fast_invokevfinal not used on mips64");
+}
+
+// used registers : T0, T1, T2, T3, T1, A7
+// T0 : itable, vtable, entry
+// T1 : interface
+// T3 : receiver
+// T1 : flags, klass
+// Rmethod : index, method, this is required by interpreter_entry
+void TemplateTable::invokeinterface(int byte_no) {
+  transition(vtos, vtos);
+  //this method will use T1-T4 and T0
+  assert(byte_no == f1_byte, "use this argument");
+  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
+  // T2: reference klass
+  // Rmethod: method
+  // T3: receiver
+  // T1: flags
+
+  // Special case of invokeinterface called for virtual method of
+  // java.lang.Object.  See cpCacheOop.cpp for details.
+  // This code isn't produced by javac, but could be produced by
+  // another compliant java compiler.
+  Label notMethod;
+  __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
+  __ andr(AT, T1, AT);
+  __ beq(AT, R0, notMethod);
+  __ delayed()->nop();
+
+  invokevirtual_helper(Rmethod, T3, T1);
+  __ bind(notMethod);
+  // Get receiver klass into T1 - also a null check
+  //add for compressedoops
+  __ load_klass(T1, T3);
+  __ verify_oop(T1);
+
+  Label no_such_interface, no_such_method;
+
+  // Receiver subtype check against REFC.
+  // Superklass in T2. Subklass in T1.
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             T1, T2, noreg,
+                             // outputs: scan temp. reg, scan temp. reg
+                             T0, FSR,
+                             no_such_interface,
+                             /*return_method=*/false);
+
+
+  // profile this call
+  __ profile_virtual_call(T1, T0, FSR);
+
+  // Get declaring interface class from method, and itable index
+  __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset()));
+  __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset()));
+  __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes());
+  __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
+  __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max);
+  __ subu32(Rmethod, R0, Rmethod);
+
+  __ lookup_interface_method(// inputs: rec. class, interface, itable index
+                             T1, T2, Rmethod,
+                             // outputs: method, scan temp. reg
+                             Rmethod, T0,
+                             no_such_interface);
+
+  // Rmethod: Method* to call
+  // T3: receiver
+  // Check for abstract method error
+  // Note: This should be done more efficiently via a throw_abstract_method_error
+  //       interpreter entry point and a conditional jump to it in case of a null
+  //       method.
+  __ beq(Rmethod, R0, no_such_method);
+  __ delayed()->nop();
+
+  __ profile_arguments_type(T1, Rmethod, T0, true);
+
+  // do the call
+  // T3: receiver
+  // Rmethod: Method*
+  __ jump_from_interpreted(Rmethod, T1);
+  __ should_not_reach_here();
+
+  // exception handling code follows...
+  // note: must restore interpreter registers to canonical
+  //       state for exception handling to work correctly!
+
+  __ bind(no_such_method);
+  // throw exception
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+  __ bind(no_such_interface);
+  // throw exception
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                   InterpreterRuntime::throw_IncompatibleClassChangeError));
+  // the call_VM checks for exception, so we should never return here.
+  __ should_not_reach_here();
+
+}
+
+
+void TemplateTable::invokehandle(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f1_byte, "use this argument");
+  const Register T2_method  = Rmethod;
+  const Register FSR_mtype  = FSR;
+  const Register T3_recv    = T3;
+
+  if (!EnableInvokeDynamic) {
+     // rewriter does not generate this bytecode
+     __ should_not_reach_here();
+     return;
+   }
+
+   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
+   //??__ verify_method_ptr(T2_method);
+   __ verify_oop(T3_recv);
+   __ null_check(T3_recv);
+
+   // T9: MethodType object (from cpool->resolved_references[f1], if necessary)
+   // T2_method: MH.invokeExact_MT method (from f2)
+
+   // Note:  T9 is already pushed (if necessary) by prepare_invoke
+
+   // FIXME: profile the LambdaForm also
+   __ profile_final_call(T9);
+
+   // T8: tmp, used for mdp
+   // T2_method: callee
+   // T9: tmp
+   // is_virtual: true
+   __ profile_arguments_type(T8, T2_method, T9, true);
+
+  __ jump_from_interpreted(T2_method, T9);
+}
+
+ void TemplateTable::invokedynamic(int byte_no) {
+   transition(vtos, vtos);
+   assert(byte_no == f1_byte, "use this argument");
+
+   if (!EnableInvokeDynamic) {
+     // We should not encounter this bytecode if !EnableInvokeDynamic.
+     // The verifier will stop it.  However, if we get past the verifier,
+     // this will stop the thread in a reasonable way, without crashing the JVM.
+     __ call_VM(noreg, CAST_FROM_FN_PTR(address,
+                      InterpreterRuntime::throw_IncompatibleClassChangeError));
+     // the call_VM checks for exception, so we should never return here.
+     __ should_not_reach_here();
+     return;
+   }
+
+   //const Register Rmethod   = T2;
+   const Register T2_callsite = T2;
+
+   prepare_invoke(byte_no, Rmethod, T2_callsite);
+
+   // T2: CallSite object (from cpool->resolved_references[f1])
+   // Rmethod: MH.linkToCallSite method (from f2)
+
+   // Note:  T2_callsite is already pushed by prepare_invoke
+   // %%% should make a type profile for any invokedynamic that takes a ref argument
+   // profile this call
+   __ profile_call(T9);
+
+   // T8: tmp, used for mdp
+   // Rmethod: callee
+   // T9: tmp
+   // is_virtual: false
+   __ profile_arguments_type(T8, Rmethod, T9, false);
+
+   __ verify_oop(T2_callsite);
+
+   __ jump_from_interpreted(Rmethod, T9);
+ }
+
+//-----------------------------------------------------------------------------
+// Allocation
+// T1 : tags & buffer end & thread
+// T2 : object end
+// T3 : klass
+// T1 : object size
+// A1 : cpool
+// A2 : cp index
+// return object in FSR
+void TemplateTable::_new() {
+  transition(vtos, atos);
+  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
+
+  Label slow_case;
+  Label done;
+  Label initialize_header;
+  Label initialize_object; // including clearing the fields
+  Label allocate_shared;
+
+  // get InstanceKlass in T3
+  __ get_cpool_and_tags(A1, T1);
+
+  __ dsll(AT, A2, Address::times_8);
+  if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) {
+    __ gsldx(T3, A1, AT, sizeof(ConstantPool));
+  } else {
+    __ daddu(AT, A1, AT);
+    __ ld(T3, AT, sizeof(ConstantPool));
+  }
+
+  // make sure the class we're about to instantiate has been resolved.
+  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
+  const int tags_offset = Array<u1>::base_offset_in_bytes();
+  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
+    __ gslbx(AT, T1, A2, tags_offset);
+  } else {
+    __ daddu(T1, T1, A2);
+    __ lb(AT, T1, tags_offset);
+  }
+  if(os::is_MP()) {
+    __ sync(); // load acquire
+  }
+  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
+  __ bne(AT, R0, slow_case);
+  __ delayed()->nop();
+
+
+  // make sure klass is initialized & doesn't have finalizer
+  // make sure klass is fully initialized
+  __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
+  __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
+  __ bne(AT, R0, slow_case);
+  __ delayed()->nop();
+
+  // has_finalizer
+  __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
+  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
+  __ bne(AT, R0, slow_case);
+  __ delayed()->nop();
+
+  // Allocate the instance
+  // 1) Try to allocate in the TLAB
+  // 2) if fail and the object is large allocate in the shared Eden
+  // 3) if the above fails (or is not applicable), go to a slow case
+  // (creates a new TLAB, etc.)
+
+  const bool allow_shared_alloc =
+    Universe::heap()->supports_inline_contig_alloc() && !CMSIncrementalMode;
+
+#ifndef OPT_THREAD
+    const Register thread = T8;
+    if (UseTLAB || allow_shared_alloc) {
+      __ get_thread(thread);
+    }
+#else
+    const Register thread = TREG;
+#endif
+
+  if (UseTLAB) {
+    // get tlab_top
+    __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
+    // get tlab_end
+    __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
+    __ daddu(T2, FSR, T0);
+    __ slt(AT, AT, T2);
+    __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
+    __ delayed()->nop();
+    __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
+
+    if (ZeroTLAB) {
+      // the fields have been already cleared
+      __ beq(R0, R0, initialize_header);
+    } else {
+      // initialize both the header and fields
+      __ beq(R0, R0, initialize_object);
+    }
+    __ delayed()->nop();
+  }
+
+  // Allocation in the shared Eden , if allowed
+  // T0 : instance size in words
+  if(allow_shared_alloc){
+    __ bind(allocate_shared);
+
+    Label retry;
+    Address heap_top(T1);
+    __ set64(T1, (long)Universe::heap()->top_addr());
+    __ ld(FSR, heap_top);
+
+    __ bind(retry);
+    __ set64(AT, (long)Universe::heap()->end_addr());
+    __ ld(AT, AT, 0);
+    __ daddu(T2, FSR, T0);
+    __ slt(AT, AT, T2);
+    __ bne(AT, R0, slow_case);
+    __ delayed()->nop();
+
+    // Compare FSR with the top addr, and if still equal, store the new
+    // top addr in T2 at the address of the top addr pointer. Sets AT if was
+    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
+    //
+    // FSR: object begin
+    // T2: object end
+    // T0: instance size in words
+
+    // if someone beat us on the allocation, try again, otherwise continue
+    __ cmpxchg(T2, heap_top, FSR);
+    __ beq(AT, R0, retry);
+    __ delayed()->nop();
+
+    __ incr_allocated_bytes(thread, T0, 0);
+  }
+
+  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
+    // The object is initialized before the header.  If the object size is
+    // zero, go directly to the header initialization.
+    __ bind(initialize_object);
+    __ set64(AT, - sizeof(oopDesc));
+    __ daddu(T0, T0, AT);
+    __ beq(T0, R0, initialize_header);
+    __ delayed()->nop();
+
+    // initialize remaining object fields: T0 is a multiple of 2
+    {
+      Label loop;
+      __ daddu(T1, FSR, T0);
+      __ daddiu(T1, T1, -oopSize);
+
+      __ bind(loop);
+      __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
+      __ bne(T1, FSR, loop); //dont clear header
+      __ delayed()->daddiu(T1, T1, -oopSize);
+    }
+
+    //klass in T3,
+    // initialize object header only.
+    __ bind(initialize_header);
+    if (UseBiasedLocking) {
+      __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
+      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
+    } else {
+      __ set64(AT, (long)markOopDesc::prototype());
+      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
+    }
+
+    __ store_klass_gap(FSR, R0);
+    __ store_klass(FSR, T3);
+
+    {
+      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
+      // Trigger dtrace event for fastpath
+      __ push(atos);
+      __ call_VM_leaf(
+           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
+      __ pop(atos);
+
+    }
+    __ b(done);
+    __ delayed()->nop();
+  }
+
+  // slow case
+  __ bind(slow_case);
+  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
+
+  // continue
+  __ bind(done);
+  __ sync();
+}
+
+void TemplateTable::newarray() {
+  transition(itos, atos);
+  __ lbu(A1, at_bcp(1));
+  //type, count
+  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
+  __ sync();
+}
+
+void TemplateTable::anewarray() {
+  transition(itos, atos);
+  __ get_2_byte_integer_at_bcp(A2, AT, 1);
+  __ huswap(A2);
+  __ get_constant_pool(A1);
+  // cp, index, count
+  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
+  __ sync();
+}
+
+void TemplateTable::arraylength() {
+  transition(atos, itos);
+  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
+  __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
+}
+
+// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
+// T2 : sub klass
+// T3 : cpool
+// T3 : super klass
+void TemplateTable::checkcast() {
+  transition(atos, atos);
+  Label done, is_null, ok_is_subtype, quicked, resolved;
+  __ beq(FSR, R0, is_null);
+  __ delayed()->nop();
+
+  // Get cpool & tags index
+  __ get_cpool_and_tags(T3, T1);
+  __ get_2_byte_integer_at_bcp(T2, AT, 1);
+  __ huswap(T2);
+
+  // See if bytecode has already been quicked
+  __ daddu(AT, T1, T2);
+  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
+  if(os::is_MP()) {
+    __ sync(); // load acquire
+  }
+  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
+  __ beq(AT, R0, quicked);
+  __ delayed()->nop();
+
+  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
+  // Then, GC will move the object in V0 to another places in heap.
+  // Therefore, We should never save such an object in register.
+  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
+  // After GC, the object address in FSR is changed to a new place.
+  //
+  __ push(atos);
+  const Register thread = TREG;
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+  __ get_vm_result_2(T3, thread);
+  __ pop_ptr(FSR);
+  __ b(resolved);
+  __ delayed()->nop();
+
+  // klass already in cp, get superklass in T3
+  __ bind(quicked);
+  __ dsll(AT, T2, Address::times_8);
+  __ daddu(AT, T3, AT);
+  __ ld(T3, AT, sizeof(ConstantPool));
+
+  __ bind(resolved);
+
+  // get subklass in T2
+  //add for compressedoops
+  __ load_klass(T2, FSR);
+  // Superklass in T3.  Subklass in T2.
+  __ gen_subtype_check(T3, T2, ok_is_subtype);
+
+  // Come here on failure
+  // object is at FSR
+  __ jmp(Interpreter::_throw_ClassCastException_entry);
+  __ delayed()->nop();
+
+  // Come here on success
+  __ bind(ok_is_subtype);
+
+  // Collect counts on whether this check-cast sees NULLs a lot or not.
+  if (ProfileInterpreter) {
+    __ b(done);
+    __ delayed()->nop();
+    __ bind(is_null);
+    __ profile_null_seen(T3);
+  } else {
+    __ bind(is_null);
+  }
+  __ bind(done);
+}
+
+// i use T3 as cpool, T1 as tags, T2 as index
+// object always in FSR, superklass in T3, subklass in T2
+void TemplateTable::instanceof() {
+  transition(atos, itos);
+  Label done, is_null, ok_is_subtype, quicked, resolved;
+
+  __ beq(FSR, R0, is_null);
+  __ delayed()->nop();
+
+  // Get cpool & tags index
+  __ get_cpool_and_tags(T3, T1);
+  // get index
+  __ get_2_byte_integer_at_bcp(T2, AT, 1);
+  __ huswap(T2);
+
+  // See if bytecode has already been quicked
+  // quicked
+  __ daddu(AT, T1, T2);
+  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
+  if(os::is_MP()) {
+    __ sync(); // load acquire
+  }
+  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
+  __ beq(AT, R0, quicked);
+  __ delayed()->nop();
+
+  __ push(atos);
+  const Register thread = TREG;
+#ifndef OPT_THREAD
+  __ get_thread(thread);
+#endif
+  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
+  __ get_vm_result_2(T3, thread);
+  __ pop_ptr(FSR);
+  __ b(resolved);
+  __ delayed()->nop();
+
+  // get superklass in T3, subklass in T2
+  __ bind(quicked);
+  __ dsll(AT, T2, Address::times_8);
+  __ daddu(AT, T3, AT);
+  __ ld(T3, AT, sizeof(ConstantPool));
+
+  __ bind(resolved);
+  // get subklass in T2
+  //add for compressedoops
+  __ load_klass(T2, FSR);
+
+  // Superklass in T3.  Subklass in T2.
+  __ gen_subtype_check(T3, T2, ok_is_subtype);
+  // Come here on failure
+  __ b(done);
+  __ delayed(); __ move(FSR, R0);
+
+  // Come here on success
+  __ bind(ok_is_subtype);
+  __ move(FSR, 1);
+
+  // Collect counts on whether this test sees NULLs a lot or not.
+  if (ProfileInterpreter) {
+    __ beq(R0, R0, done);
+    __ delayed()->nop();
+    __ bind(is_null);
+    __ profile_null_seen(T3);
+  } else {
+    __ bind(is_null);   // same as 'done'
+  }
+  __ bind(done);
+  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
+  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
+}
+
+//--------------------------------------------------------
+//--------------------------------------------
+// Breakpoints
+void TemplateTable::_breakpoint() {
+  // Note: We get here even if we are single stepping..
+  // jbug inists on setting breakpoints at every bytecode
+  // even if we are in single step mode.
+
+  transition(vtos, vtos);
+
+  // get the unpatched byte code
+  __ get_method(A1);
+  __ call_VM(NOREG,
+             CAST_FROM_FN_PTR(address,
+                              InterpreterRuntime::get_original_bytecode_at),
+             A1, BCP);
+  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
+
+  // post the breakpoint event
+  __ get_method(A1);
+  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
+
+  // complete the execution of original bytecode
+  __ dispatch_only_normal(vtos);
+}
+
+//-----------------------------------------------------------------------------
+// Exceptions
+
+void TemplateTable::athrow() {
+  transition(atos, vtos);
+  __ null_check(FSR);
+  __ jmp(Interpreter::throw_exception_entry());
+  __ delayed()->nop();
+}
+
+//-----------------------------------------------------------------------------
+// Synchronization
+//
+// Note: monitorenter & exit are symmetric routines; which is reflected
+//       in the assembly code structure as well
+//
+// Stack layout:
+//
+// [expressions  ] <--- SP               = expression stack top
+// ..
+// [expressions  ]
+// [monitor entry] <--- monitor block top = expression stack bot
+// ..
+// [monitor entry]
+// [frame data   ] <--- monitor block bot
+// ...
+// [return addr  ] <--- FP
+
+// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
+// object always in FSR
+void TemplateTable::monitorenter() {
+  transition(atos, vtos);
+
+  // check for NULL object
+  __ null_check(FSR);
+
+  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
+      * wordSize);
+  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
+  Label allocated;
+
+  // initialize entry pointer
+  __ move(c_rarg0, R0);
+
+  // find a free slot in the monitor block (result in c_rarg0)
+  {
+    Label entry, loop, exit, next;
+    __ ld(T2, monitor_block_top);
+    __ b(entry);
+    __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
+
+    // free slot?
+    __ bind(loop);
+    __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
+    __ bne(AT, R0, next);
+    __ delayed()->nop();
+    __ move(c_rarg0, T2);
+
+    __ bind(next);
+    __ beq(FSR, AT, exit);
+    __ delayed()->nop();
+    __ daddiu(T2, T2, entry_size);
+
+    __ bind(entry);
+    __ bne(T3, T2, loop);
+    __ delayed()->nop();
+    __ bind(exit);
+  }
+
+  __ bne(c_rarg0, R0, allocated);
+  __ delayed()->nop();
+
+  // allocate one if there's no free slot
+  {
+    Label entry, loop;
+    // 1. compute new pointers                   // SP: old expression stack top
+    __ ld(c_rarg0, monitor_block_top);
+    __ daddiu(SP, SP, - entry_size);
+    __ daddiu(c_rarg0, c_rarg0, - entry_size);
+    __ sd(c_rarg0, monitor_block_top);
+    __ b(entry);
+    __ delayed(); __ move(T3, SP);
+
+    // 2. move expression stack contents
+    __ bind(loop);
+    __ ld(AT, T3, entry_size);
+    __ sd(AT, T3, 0);
+    __ daddiu(T3, T3, wordSize);
+    __ bind(entry);
+    __ bne(T3, c_rarg0, loop);
+    __ delayed()->nop();
+  }
+
+  __ bind(allocated);
+  // Increment bcp to point to the next bytecode,
+  // so exception handling for async. exceptions work correctly.
+  // The object has already been poped from the stack, so the
+  // expression stack looks correct.
+  __ daddiu(BCP, BCP, 1);
+  __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
+  __ lock_object(c_rarg0);
+  // check to make sure this monitor doesn't cause stack overflow after locking
+  __ save_bcp();  // in case of exception
+  __ generate_stack_overflow_check(0);
+  // The bcp has already been incremented. Just need to dispatch to next instruction.
+
+  __ dispatch_next(vtos);
+}
+
+// T2 : top
+// c_rarg0 : entry
+void TemplateTable::monitorexit() {
+  transition(atos, vtos);
+
+  __ null_check(FSR);
+
+  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
+  Label found;
+
+  // find matching slot
+  {
+    Label entry, loop;
+    __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    __ b(entry);
+    __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
+
+    __ bind(loop);
+    __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
+    __ beq(FSR, AT, found);
+    __ delayed()->nop();
+    __ daddiu(c_rarg0, c_rarg0, entry_size);
+    __ bind(entry);
+    __ bne(T2, c_rarg0, loop);
+    __ delayed()->nop();
+  }
+
+  // error handling. Unlocking was not block-structured
+  Label end;
+  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
+  InterpreterRuntime::throw_illegal_monitor_state_exception));
+  __ should_not_reach_here();
+
+  // call run-time routine
+  // c_rarg0: points to monitor entry
+  __ bind(found);
+  __ move(TSR, FSR);
+  __ unlock_object(c_rarg0);
+  __ move(FSR, TSR);
+  __ bind(end);
+}
+
+
+// Wide instructions
+void TemplateTable::wide() {
+  transition(vtos, vtos);
+  __ lbu(Rnext, at_bcp(1));
+  __ dsll(T9, Rnext, Address::times_8);
+  __ li(AT, (long)Interpreter::_wentry_point);
+  __ daddu(AT, T9, AT);
+  __ ld(T9, AT, 0);
+  __ jr(T9);
+  __ delayed()->nop();
+}
+
+
+void TemplateTable::multianewarray() {
+  transition(vtos, atos);
+  // last dim is on top of stack; we want address of first one:
+  // first_addr = last_addr + (ndims - 1) * wordSize
+  __ lbu(A1, at_bcp(3));  // dimension
+  __ daddiu(A1, A1, -1);
+  __ dsll(A1, A1, Address::times_8);
+  __ daddu(A1, SP, A1);    // now A1 pointer to the count array on the stack
+  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
+  __ lbu(AT, at_bcp(3));
+  __ dsll(AT, AT, Address::times_8);
+  __ daddu(SP, SP, AT);
+  __ sync();
+}
+#endif // !CC_INTERP
diff --git a/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp b/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp
new file mode 100644
index 00000000000..b63274a2064
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/templateTable_mips_64.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
+#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
+
+  static void prepare_invoke(int byte_no,
+                             Register method,
+                             Register index = noreg,
+                             Register recv  = noreg,
+                             Register flags = noreg
+                             );
+  static void invokevirtual_helper(Register index, Register recv,
+                                   Register flags);
+  //static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
+  static void volatile_barrier();
+
+  // Helpers
+  static void index_check(Register array, Register index);
+  static void index_check_without_pop(Register array, Register index);
+
+#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
diff --git a/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp b/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp
new file mode 100644
index 00000000000..6939914356d
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/vmStructs_mips.hpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
+#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
+
+// These are the CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
+                                                                                                                                     \
+  /******************************/                                                                                                   \
+  /* JavaCallWrapper            */                                                                                                   \
+  /******************************/                                                                                                   \
+  /******************************/                                                                                                   \
+  /* JavaFrameAnchor            */                                                                                                   \
+  /******************************/                                                                                                   \
+  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                              \
+                                                                                                                                     \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used  */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
+  /* be present there)                                                */
+
+
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used  */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
+  /* be present there)                                                */
+
+
+#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used        */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
+  /* be present there)                                                      */
+
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
+
+  /* NOTE that we do not use the last_entry() macro here; it is used         */
+  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
+  /* be present there)                                                       */
+
+#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp
new file mode 100644
index 00000000000..a98f70d9ff1
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.cpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "memory/allocation.inline.hpp"
+#include "vm_version_ext_mips.hpp"
+
+// VM_Version_Ext statics
+int VM_Version_Ext::_no_of_threads = 0;
+int VM_Version_Ext::_no_of_cores = 0;
+int VM_Version_Ext::_no_of_sockets = 0;
+bool VM_Version_Ext::_initialized = false;
+char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
+char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
+
+void VM_Version_Ext::initialize_cpu_information(void) {
+  // do nothing if cpu info has been initialized
+  if (_initialized) {
+    return;
+  }
+
+  _no_of_cores  = os::processor_count();
+  _no_of_threads = _no_of_cores;
+  _no_of_sockets = _no_of_cores;
+  if (is_loongson()) {
+    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS");
+    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features());
+  } else {
+    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS");
+    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features());
+  }
+  _initialized = true;
+}
+
+int VM_Version_Ext::number_of_threads(void) {
+  initialize_cpu_information();
+  return _no_of_threads;
+}
+
+int VM_Version_Ext::number_of_cores(void) {
+  initialize_cpu_information();
+  return _no_of_cores;
+}
+
+int VM_Version_Ext::number_of_sockets(void) {
+  initialize_cpu_information();
+  return _no_of_sockets;
+}
+
+const char* VM_Version_Ext::cpu_name(void) {
+  initialize_cpu_information();
+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
+  if (NULL == tmp) {
+    return NULL;
+  }
+  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
+  return tmp;
+}
+
+const char* VM_Version_Ext::cpu_description(void) {
+  initialize_cpu_information();
+  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
+  if (NULL == tmp) {
+    return NULL;
+  }
+  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
+  return tmp;
+}
diff --git a/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp
new file mode 100644
index 00000000000..a240fcc2e92
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/vm_version_ext_mips.hpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
+#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
+
+#include "runtime/vm_version.hpp"
+#include "utilities/macros.hpp"
+
+class VM_Version_Ext : public VM_Version {
+ private:
+  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
+  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
+
+  static int               _no_of_threads;
+  static int               _no_of_cores;
+  static int               _no_of_sockets;
+  static bool              _initialized;
+  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
+  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
+
+ public:
+  static int number_of_threads(void);
+  static int number_of_cores(void);
+  static int number_of_sockets(void);
+
+  static const char* cpu_name(void);
+  static const char* cpu_description(void);
+  static void initialize_cpu_information(void);
+};
+
+#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/vm_version_mips.cpp b/hotspot/src/cpu/mips/vm/vm_version_mips.cpp
new file mode 100644
index 00000000000..aef8f0746ae
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/vm_version_mips.cpp
@@ -0,0 +1,510 @@
+/*
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/java.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "vm_version_mips.hpp"
+#ifdef TARGET_OS_FAMILY_linux
+# include "os_linux.inline.hpp"
+#endif
+
+#define A0 RA0
+
+int VM_Version::_cpuFeatures;
+const char* VM_Version::_features_str = "";
+VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
+volatile bool VM_Version::_is_determine_cpucfg_supported_running = false;
+bool VM_Version::_is_cpucfg_instruction_supported = true;
+bool VM_Version::_cpu_info_is_initialized = false;
+
+static BufferBlob* stub_blob;
+static const int stub_size = 600;
+
+extern "C" {
+  typedef void (*get_cpu_info_stub_t)(void*);
+}
+static get_cpu_info_stub_t get_cpu_info_stub = NULL;
+
+
+class VM_Version_StubGenerator: public StubCodeGenerator {
+ public:
+
+  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
+
+  address generate_get_cpu_info() {
+    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
+    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
+#   define __ _masm->
+
+    address start = __ pc();
+
+    __ enter();
+    __ push(AT);
+    __ push(V0);
+
+    __ li(AT, (long)0);
+    __ cpucfg(V0, AT);
+    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
+    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
+
+    __ li(AT, 1);
+    __ cpucfg(V0, AT);
+    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
+    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
+
+    __ li(AT, 2);
+    __ cpucfg(V0, AT);
+    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
+    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
+
+    __ pop(V0);
+    __ pop(AT);
+    __ leave();
+    __ jr(RA);
+    __ delayed()->nop();
+#   undef __
+
+    return start;
+  };
+};
+
+uint32_t VM_Version::get_feature_flags_by_cpucfg() {
+  uint32_t result = 0;
+  if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0)
+    result |= CPU_MMI;
+  if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0)
+    result |= CPU_MSA1_0;
+  if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0)
+    result |= CPU_MSA2_0;
+  if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0)
+    result |= CPU_CGP;
+  if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0)
+    result |= CPU_LSX1;
+  if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0)
+    result |= CPU_LSX2;
+  if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0)
+    result |= CPU_LASX;
+  if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0)
+    result |= CPU_LLSYNC;
+  if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0)
+    result |= CPU_TGTSYNC;
+  if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0)
+    result |= CPU_MUALP;
+  if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0)
+    result |= CPU_LEXT1;
+  if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0)
+    result |= CPU_LEXT2;
+  if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0)
+    result |= CPU_LEXT3;
+  if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0)
+    result |= CPU_LAMO;
+  if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0)
+    result |= CPU_LPIXU;
+
+  result |= CPU_ULSYNC;
+
+  return result;
+}
+
+void read_cpu_info(const char *path, char *result) {
+  FILE *ptr;
+  char buf[1024];
+  int i = 0;
+  if((ptr=fopen(path, "r")) != NULL) {
+    while(fgets(buf, 1024, ptr)!=NULL) {
+      strcat(result,buf);
+      i++;
+      if (i == 10) break;
+    }
+    fclose(ptr);
+  } else {
+    warning("Can't detect CPU info - cannot open %s", path);
+  }
+}
+
+void strlwr(char *str) {
+  for (; *str!='\0'; str++)
+    *str = tolower(*str);
+}
+
+int VM_Version::get_feature_flags_by_cpuinfo(int features) {
+  assert(!cpu_info_is_initialized(), "VM_Version should not be initialized");
+
+  char res[10240];
+  int i;
+  memset(res, '\0', 10240 * sizeof(char));
+  read_cpu_info("/proc/cpuinfo", res);
+  // res is converted to lower case
+  strlwr(res);
+
+  if (strstr(res, "loongson")) {
+    // Loongson CPU
+    features |= CPU_LOONGSON;
+
+    const struct Loongson_Cpuinfo loongson_cpuinfo[] = {
+      {L_3A1000,  "3a1000"},
+      {L_3B1500,  "3b1500"},
+      {L_3A2000,  "3a2000"},
+      {L_3B2000,  "3b2000"},
+      {L_3A3000,  "3a3000"},
+      {L_3B3000,  "3b3000"},
+      {L_2K1000,  "2k1000"},
+      {L_UNKNOWN, "unknown"}
+    };
+
+    // Loongson Family
+    int detected = 0;
+    for (i = 0; i <= L_UNKNOWN; i++) {
+      switch (i) {
+        // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed
+        // test PRID REV in /proc/cpuinfo
+        // 3A1000: V0.5, model name: ICT Loongson-3A V0.5  FPU V0.1
+        // 3B1500: V0.7, model name: ICT Loongson-3B V0.7  FPU V0.1
+        case L_3A1000:
+          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) {
+            features |= CPU_LOONGSON_GS464;
+            detected++;
+            //tty->print_cr("3A1000 platform");
+          }
+          break;
+        case L_3B1500:
+          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) {
+            features |= CPU_LOONGSON_GS464;
+            detected++;
+            //tty->print_cr("3B1500 platform");
+          }
+          break;
+        case L_3A2000:
+        case L_3B2000:
+        case L_3A3000:
+        case L_3B3000:
+          if (strstr(res, loongson_cpuinfo[i].match_str)) {
+            features |= CPU_LOONGSON_GS464E;
+            detected++;
+            //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform");
+          }
+          break;
+        case L_2K1000:
+          if (strstr(res, loongson_cpuinfo[i].match_str)) {
+            features |= CPU_LOONGSON_GS264;
+            detected++;
+            //tty->print_cr("2K1000 platform");
+          }
+          break;
+        case L_UNKNOWN:
+          if (detected == 0) {
+            detected++;
+            //tty->print_cr("unknown Loongson platform");
+          }
+          break;
+        default:
+          ShouldNotReachHere();
+      }
+    }
+    assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected");
+  } else { // not Loongson
+    // Not Loongson CPU
+    //tty->print_cr("MIPS platform");
+  }
+
+  if (features & CPU_LOONGSON_GS264) {
+    features |= CPU_LEXT1;
+    features |= CPU_LEXT2;
+    features |= CPU_TGTSYNC;
+    features |= CPU_ULSYNC;
+    features |= CPU_MSA1_0;
+    features |= CPU_LSX1;
+  } else if (features & CPU_LOONGSON_GS464) {
+    features |= CPU_LEXT1;
+    features |= CPU_LLSYNC;
+    features |= CPU_TGTSYNC;
+  } else if (features & CPU_LOONGSON_GS464E) {
+    features |= CPU_LEXT1;
+    features |= CPU_LEXT2;
+    features |= CPU_LEXT3;
+    features |= CPU_TGTSYNC;
+    features |= CPU_ULSYNC;
+  } else if (features & CPU_LOONGSON) {
+    // unknow loongson
+    features |= CPU_LLSYNC;
+    features |= CPU_TGTSYNC;
+    features |= CPU_ULSYNC;
+  }
+  VM_Version::_cpu_info_is_initialized = true;
+
+  return features;
+}
+
+void VM_Version::get_processor_features() {
+
+  clean_cpuFeatures();
+
+  // test if cpucfg instruction is supported
+  VM_Version::_is_determine_cpucfg_supported_running = true;
+  __asm__ __volatile__(
+    ".insn \n\t"
+    ".word (0xc8080118)\n\t" // cpucfg zero, zero
+    :
+    :
+    :
+    );
+  VM_Version::_is_determine_cpucfg_supported_running = false;
+
+  if (supports_cpucfg()) {
+    get_cpu_info_stub(&_cpuid_info);
+    _cpuFeatures = get_feature_flags_by_cpucfg();
+    // Only Loongson CPUs support cpucfg
+    _cpuFeatures |= CPU_LOONGSON;
+  } else {
+    _cpuFeatures = get_feature_flags_by_cpuinfo(0);
+  }
+
+  _supports_cx8 = true;
+
+  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
+    FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650);
+  }
+
+#ifdef COMPILER2
+  if (MaxVectorSize > 0) {
+    if (!is_power_of_2(MaxVectorSize)) {
+      warning("MaxVectorSize must be a power of 2");
+      MaxVectorSize = 8;
+    }
+    if (MaxVectorSize > 0 && supports_ps()) {
+      MaxVectorSize = 8;
+    } else {
+      MaxVectorSize = 0;
+    }
+  }
+  //
+  // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java.
+  // Vector optimization was closed by default.
+  // The reasons:
+  // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal.
+  // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions.
+  //
+  if (FLAG_IS_DEFAULT(MaxVectorSize)) {
+    MaxVectorSize = 0;
+  }
+
+#endif
+
+  if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) {
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 1000);
+    }
+  } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) {
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 2000);
+    }
+  } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 3000);
+    }
+  } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 4000);
+    }
+  } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) {
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
+    }
+  } else {
+    assert(false, "Should Not Reach Here, what is the cpu type?");
+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
+      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
+    }
+  }
+
+  if (supports_lext1()) {
+    if (FLAG_IS_DEFAULT(UseLEXT1)) {
+      FLAG_SET_DEFAULT(UseLEXT1, true);
+    }
+  } else if (UseLEXT1) {
+    warning("LEXT1 instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseLEXT1, false);
+  }
+
+  if (supports_lext2()) {
+    if (FLAG_IS_DEFAULT(UseLEXT2)) {
+      FLAG_SET_DEFAULT(UseLEXT2, true);
+    }
+  } else if (UseLEXT2) {
+    warning("LEXT2 instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseLEXT2, false);
+  }
+
+  if (supports_lext3()) {
+    if (FLAG_IS_DEFAULT(UseLEXT3)) {
+      FLAG_SET_DEFAULT(UseLEXT3, true);
+    }
+  } else if (UseLEXT3) {
+    warning("LEXT3 instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseLEXT3, false);
+  }
+
+  if (UseLEXT2) {
+    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) {
+      FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1);
+    }
+  } else if (UseCountTrailingZerosInstructionMIPS64) {
+    if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64))
+      warning("ctz/dctz instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0);
+  }
+
+  if (TieredCompilation) {
+    if (!FLAG_IS_DEFAULT(TieredCompilation))
+      warning("TieredCompilation not supported");
+    FLAG_SET_DEFAULT(TieredCompilation, false);
+  }
+
+  char buf[256];
+  bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg();
+
+  // A note on the _features_string format:
+  //   There are jtreg tests checking the _features_string for various properties.
+  //   For some strange reason, these tests require the string to contain
+  //   only _lowercase_ characters. Keep that in mind when being surprised
+  //   about the unusual notation of features - and when adding new ones.
+  //   Features may have one comma at the end.
+  //   Furthermore, use one, and only one, separator space between features.
+  //   Multiple spaces are considered separate tokens, messing up everything.
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d",
+              (is_loongson()           ?  "mips-compatible loongson cpu"  : "mips cpu"),
+              (is_gs464()              ?  ", gs464 (3a1000/3b1500)" : ""),
+              (is_gs464e()             ?  ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""),
+              (is_gs264()              ?  ", gs264 (2k1000)" : ""),
+              (is_unknown_loongson_cpu ?  ", unknown loongson cpu" : ""),
+              (supports_dsp()          ?  ", dsp" : ""),
+              (supports_ps()           ?  ", ps" : ""),
+              (supports_3d()           ?  ", 3d" : ""),
+              (supports_mmi()          ?  ", mmi" : ""),
+              (supports_msa1_0()       ?  ", msa1_0" : ""),
+              (supports_msa2_0()       ?  ", msa2_0" : ""),
+              (supports_lsx1()         ?  ", lsx1" : ""),
+              (supports_lsx2()         ?  ", lsx2" : ""),
+              (supports_lasx()         ?  ", lasx" : ""),
+              (supports_lext1()        ?  ", lext1" : ""),
+              (supports_lext2()        ?  ", lext2" : ""),
+              (supports_lext3()        ?  ", lext3" : ""),
+              (supports_cgp()          ?  ", aes, crc, sha1, sha256, sha512" : ""),
+              (supports_lamo()         ?  ", lamo" : ""),
+              (supports_lpixu()        ?  ", lpixu" : ""),
+              (needs_llsync()          ?  ", llsync" : ""),
+              (needs_tgtsync()         ?  ", tgtsync": ""),
+              (needs_ulsync()          ?  ", ulsync": ""),
+              (supports_mualp()        ?  ", mualp" : ""),
+              UseSyncLevel);
+  _features_str = strdup(buf);
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
+  }
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchLines, 1);
+  }
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
+  }
+
+  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
+  }
+
+  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
+    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
+  }
+
+  if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
+  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
+    warning("SHA intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  }
+
+  if (UseAES) {
+    if (!FLAG_IS_DEFAULT(UseAES)) {
+      warning("AES instructions are not available on this CPU");
+      FLAG_SET_DEFAULT(UseAES, false);
+    }
+  }
+
+  if (UseCRC32Intrinsics) {
+    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
+      warning("CRC32Intrinsics instructions are not available on this CPU");
+      FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
+    }
+  }
+
+  if (UseAESIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+      warning("AES intrinsics are not available on this CPU");
+      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+    }
+  }
+
+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+    UseMontgomeryMultiplyIntrinsic = true;
+  }
+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+    UseMontgomerySquareIntrinsic = true;
+  }
+
+  if (CriticalJNINatives) {
+    if (FLAG_IS_CMDLINE(CriticalJNINatives)) {
+      warning("CriticalJNINatives specified, but not supported in this VM");
+    }
+    FLAG_SET_DEFAULT(CriticalJNINatives, false);
+  }
+}
+
+void VM_Version::initialize() {
+  ResourceMark rm;
+  // Making this stub must be FIRST use of assembler
+
+  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
+  if (stub_blob == NULL) {
+    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
+  }
+  CodeBuffer c(stub_blob);
+  VM_Version_StubGenerator g(&c);
+  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
+                                     g.generate_get_cpu_info());
+
+  get_processor_features();
+}
diff --git a/hotspot/src/cpu/mips/vm/vm_version_mips.hpp b/hotspot/src/cpu/mips/vm/vm_version_mips.hpp
new file mode 100644
index 00000000000..0de01e5f64c
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/vm_version_mips.hpp
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP
+#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP
+
+#include "runtime/globals_extension.hpp"
+#include "runtime/vm_version.hpp"
+
+
+class VM_Version: public Abstract_VM_Version {
+public:
+
+  union Loongson_Cpucfg_Id1 {
+    uint32_t value;
+    struct {
+      uint32_t FP      : 1,
+               FPREV   : 3,
+               MMI     : 1,
+               MSA1    : 1,
+               MSA2    : 1,
+               CGP     : 1,
+               WRP     : 1,
+               LSX1    : 1,
+               LSX2    : 1,
+               LASX    : 1,
+               R6FXP   : 1,
+               R6CRCP  : 1,
+               R6FPP   : 1,
+               CNT64   : 1,
+               LSLDR0  : 1,
+               LSPREF  : 1,
+               LSPREFX : 1,
+               LSSYNCI : 1,
+               LSUCA   : 1,
+               LLSYNC  : 1,
+               TGTSYNC : 1,
+               LLEXC   : 1,
+               SCRAND  : 1,
+               MUALP   : 1,
+               KMUALEn : 1,
+               ITLBT   : 1,
+               LSUPERF : 1,
+               SFBP    : 1,
+               CDMAP   : 1,
+                       : 1;
+    } bits;
+  };
+
+  union Loongson_Cpucfg_Id2 {
+    uint32_t value;
+    struct {
+      uint32_t LEXT1    : 1,
+               LEXT2    : 1,
+               LEXT3    : 1,
+               LSPW     : 1,
+               LBT1     : 1,
+               LBT2     : 1,
+               LBT3     : 1,
+               LBTMMU   : 1,
+               LPMP     : 1,
+               LPMRev   : 3,
+               LAMO     : 1,
+               LPIXU    : 1,
+               LPIXNU   : 1,
+               LVZP     : 1,
+               LVZRev   : 3,
+               LGFTP    : 1,
+               LGFTRev  : 3,
+               LLFTP    : 1,
+               LLFTRev  : 3,
+               LCSRP    : 1,
+               DISBLKLY : 1,
+                        : 3;
+    } bits;
+  };
+
+protected:
+
+  enum {
+    CPU_LOONGSON          = (1 << 1),
+    CPU_LOONGSON_GS464    = (1 << 2),
+    CPU_LOONGSON_GS464E   = (1 << 3),
+    CPU_LOONGSON_GS264    = (1 << 4),
+    CPU_MMI               = (1 << 11),
+    CPU_MSA1_0            = (1 << 12),
+    CPU_MSA2_0            = (1 << 13),
+    CPU_CGP               = (1 << 14),
+    CPU_LSX1              = (1 << 15),
+    CPU_LSX2              = (1 << 16),
+    CPU_LASX              = (1 << 17),
+    CPU_LEXT1             = (1 << 18),
+    CPU_LEXT2             = (1 << 19),
+    CPU_LEXT3             = (1 << 20),
+    CPU_LAMO              = (1 << 21),
+    CPU_LPIXU             = (1 << 22),
+    CPU_LLSYNC            = (1 << 23),
+    CPU_TGTSYNC           = (1 << 24),
+    CPU_ULSYNC           = (1 << 25),
+    CPU_MUALP             = (1 << 26),
+
+    //////////////////////add some other feature here//////////////////
+  } cpuFeatureFlags;
+
+  enum Loongson_Family {
+    L_3A1000    = 0,
+    L_3B1500    = 1,
+    L_3A2000    = 2,
+    L_3B2000    = 3,
+    L_3A3000    = 4,
+    L_3B3000    = 5,
+    L_2K1000    = 6,
+    L_UNKNOWN   = 7
+  };
+
+  struct Loongson_Cpuinfo {
+    Loongson_Family    id;
+    const char* const  match_str;
+  };
+
+  static int  _cpuFeatures;
+  static const char* _features_str;
+  static volatile bool _is_determine_cpucfg_supported_running;
+  static bool _is_cpucfg_instruction_supported;
+  static bool _cpu_info_is_initialized;
+
+  struct CpuidInfo {
+    uint32_t            cpucfg_info_id0;
+    Loongson_Cpucfg_Id1 cpucfg_info_id1;
+    Loongson_Cpucfg_Id2 cpucfg_info_id2;
+    uint32_t            cpucfg_info_id3;
+    uint32_t            cpucfg_info_id4;
+    uint32_t            cpucfg_info_id5;
+    uint32_t            cpucfg_info_id6;
+    uint32_t            cpucfg_info_id8;
+  };
+
+  // The actual cpuid info block
+  static CpuidInfo _cpuid_info;
+
+  static uint32_t get_feature_flags_by_cpucfg();
+  static int      get_feature_flags_by_cpuinfo(int features);
+  static void     get_processor_features();
+
+public:
+  // Offsets for cpuid asm stub
+  static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
+  static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
+  static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
+  static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
+  static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
+  static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
+  static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
+  static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); }
+
+  static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; }
+
+  static void clean_cpuFeatures()   { _cpuFeatures = 0; }
+
+  // Initialization
+  static void initialize();
+
+  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
+
+  static bool supports_cpucfg()                  { return _is_cpucfg_instruction_supported; }
+  static bool set_supports_cpucfg(bool value)    { return _is_cpucfg_instruction_supported = value; }
+
+  static bool is_loongson()      { return _cpuFeatures & CPU_LOONGSON; }
+  static bool is_gs264()         { return _cpuFeatures & CPU_LOONGSON_GS264; }
+  static bool is_gs464()         { return _cpuFeatures & CPU_LOONGSON_GS464; }
+  static bool is_gs464e()        { return _cpuFeatures & CPU_LOONGSON_GS464E; }
+  static bool supports_dsp()     { return 0; /*not supported yet*/}
+  static bool supports_ps()      { return 0; /*not supported yet*/}
+  static bool supports_3d()      { return 0; /*not supported yet*/}
+  static bool supports_msa1_0()  { return _cpuFeatures & CPU_MSA1_0; }
+  static bool supports_msa2_0()  { return _cpuFeatures & CPU_MSA2_0; }
+  static bool supports_cgp()     { return _cpuFeatures & CPU_CGP; }
+  static bool supports_mmi()     { return _cpuFeatures & CPU_MMI; }
+  static bool supports_lsx1()    { return _cpuFeatures & CPU_LSX1; }
+  static bool supports_lsx2()    { return _cpuFeatures & CPU_LSX2; }
+  static bool supports_lasx()    { return _cpuFeatures & CPU_LASX; }
+  static bool supports_lext1()   { return _cpuFeatures & CPU_LEXT1; }
+  static bool supports_lext2()   { return _cpuFeatures & CPU_LEXT2; }
+  static bool supports_lext3()   { return _cpuFeatures & CPU_LEXT3; }
+  static bool supports_lamo()    { return _cpuFeatures & CPU_LAMO; }
+  static bool supports_lpixu()   { return _cpuFeatures & CPU_LPIXU; }
+  static bool needs_llsync()     { return _cpuFeatures & CPU_LLSYNC; }
+  static bool needs_tgtsync()    { return _cpuFeatures & CPU_TGTSYNC; }
+  static bool needs_ulsync()     { return _cpuFeatures & CPU_ULSYNC; }
+  static bool supports_mualp()   { return _cpuFeatures & CPU_MUALP; }
+
+  //mips has no such instructions, use ll/sc instead
+  static bool supports_compare_and_exchange() { return false; }
+
+  static const char* cpu_features()           { return _features_str; }
+
+};
+
+#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.cpp b/hotspot/src/cpu/mips/vm/vmreg_mips.cpp
new file mode 100644
index 00000000000..86bd74d4305
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/vmreg_mips.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "code/vmreg.hpp"
+
+
+
+void VMRegImpl::set_regName() {
+  Register reg = ::as_Register(0);
+  int i;
+  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
+    regName[i++] = reg->name();
+    regName[i++] = reg->name();
+    reg = reg->successor();
+  }
+
+  FloatRegister freg = ::as_FloatRegister(0);
+  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
+    regName[i++] = freg->name();
+    regName[i++] = freg->name();
+    freg = freg->successor();
+  }
+
+  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
+    regName[i] = "NON-GPR-FPR";
+  }
+}
diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.hpp b/hotspot/src/cpu/mips/vm/vmreg_mips.hpp
new file mode 100644
index 00000000000..6a970ea91aa
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/vmreg_mips.hpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP
+#define CPU_MIPS_VM_VMREG_MIPS_HPP
+
+bool is_Register();
+Register as_Register();
+
+bool is_FloatRegister();
+FloatRegister as_FloatRegister();
+
+#endif // CPU_MIPS_VM_VMREG_MIPS_HPP
diff --git a/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp b/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp
new file mode 100644
index 00000000000..77e18ce57d2
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/vmreg_mips.inline.hpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
+#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
+
+inline VMReg RegisterImpl::as_VMReg() {
+  if( this==noreg ) return VMRegImpl::Bad();
+  return VMRegImpl::as_VMReg(encoding() << 1 );
+}
+
+inline VMReg FloatRegisterImpl::as_VMReg() {
+  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
+}
+
+inline bool VMRegImpl::is_Register() {
+  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
+}
+
+inline bool VMRegImpl::is_FloatRegister() {
+  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
+}
+
+inline Register VMRegImpl::as_Register() {
+
+  assert( is_Register(), "must be");
+  // Yuk
+  return ::as_Register(value() >> 1);
+}
+
+inline FloatRegister VMRegImpl::as_FloatRegister() {
+  assert( is_FloatRegister(), "must be" );
+  // Yuk
+  assert( is_even(value()), "must be" );
+  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
+}
+
+inline   bool VMRegImpl::is_concrete() {
+  assert(is_reg(), "must be");
+  if(is_Register()) return true;
+  if(is_FloatRegister()) return true;
+  assert(false, "what register?");
+  return false;
+}
+
+#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
diff --git a/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp b/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp
new file mode 100644
index 00000000000..7779c58e0a6
--- /dev/null
+++ b/hotspot/src/cpu/mips/vm/vtableStubs_mips_64.cpp
@@ -0,0 +1,301 @@
+/*
+ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "code/vtableStubs.hpp"
+#include "interp_masm_mips_64.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/compiledICHolder.hpp"
+#include "oops/klassVtable.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_mips.inline.hpp"
+#ifdef COMPILER2
+#include "opto/runtime.hpp"
+#endif
+
+
+// machine-dependent part of VtableStubs: create VtableStub of correct size and
+// initialize its code
+
+#define __ masm->
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+#ifndef PRODUCT
+extern "C" void bad_compiled_vtable_index(JavaThread* thread,
+                                          oop receiver,
+                                          int index);
+#endif
+
+// used by compiler only;  reciever in T0.
+// used registers :
+// Rmethod : receiver klass & method
+// NOTE: If this code is used by the C1, the receiver_location is always 0.
+// when reach here, receiver in T0, klass in T8
+VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
+  const int gs2_code_length = VtableStub::pd_code_size_limit(true);
+  VtableStub* s = new(gs2_code_length) VtableStub(true, vtable_index);
+  ResourceMark rm;
+  CodeBuffer cb(s->entry_point(), gs2_code_length);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+  Register t1 = T8, t2 = Rmethod;
+#ifndef PRODUCT
+  if (CountCompiledCalls) {
+    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
+    __ lw(t1, AT , 0);
+    __ addiu(t1, t1, 1);
+    __ sw(t1, AT,0);
+  }
+#endif
+
+  // get receiver (need to skip return address on top of stack)
+  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
+
+  // get receiver klass
+  address npe_addr = __ pc();
+  //add for compressedoops
+  __ load_klass(t1, T0);
+  // compute entry offset (in words)
+  int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
+#ifndef PRODUCT
+  if (DebugVtables) {
+    Label L;
+    // check offset vs vtable length
+    __ lw(t2, t1, InstanceKlass::vtable_length_offset()*wordSize);
+    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
+    __ move(AT, vtable_index*vtableEntry::size());
+    __ slt(AT, AT, t2);
+    __ bne(AT, R0, L);
+    __ delayed()->nop();
+    __ move(A2, vtable_index);
+    __ move(A1, A0);
+    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
+    __ bind(L);
+  }
+#endif // PRODUCT
+  // load methodOop and target address
+  const Register method = Rmethod;
+  int offset = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
+  guarantee(Assembler::is_simm16(offset), "not a signed 16-bit int");
+  __ ld_ptr(method, t1, offset);
+  if (DebugVtables) {
+    Label L;
+    __ beq(method, R0, L);
+    __ delayed()->nop();
+    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
+    __ bne(AT, R0, L);
+    __ delayed()->nop();
+    __ stop("Vtable entry is NULL");
+    __ bind(L);
+  }
+  // T8: receiver klass
+  // T0: receiver
+  // Rmethod: methodOop
+  // T9: entry
+  address ame_addr = __ pc();
+  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
+  __ jr(T9);
+  __ delayed()->nop();
+  masm->flush();
+  s->set_exception_points(npe_addr, ame_addr);
+  return s;
+}
+
+
+// used registers :
+//  T1 T2
+// when reach here, the receiver in T0, klass in T1
+VtableStub* VtableStubs::create_itable_stub(int itable_index) {
+  // Note well: pd_code_size_limit is the absolute minimum we can get
+  // away with.  If you add code here, bump the code stub size
+  // returned by pd_code_size_limit!
+  const int gs2_code_length = VtableStub::pd_code_size_limit(false);
+  VtableStub* s = new(gs2_code_length) VtableStub(false, itable_index);
+  ResourceMark rm;
+  CodeBuffer cb(s->entry_point(), gs2_code_length);
+  MacroAssembler* masm = new MacroAssembler(&cb);
+  // we T8,T9 as temparary register, they are free from register allocator
+  Register t1 = T8, t2 = T2;
+  // Entry arguments:
+  //  T1: Interface
+  //  T0: Receiver
+
+#ifndef PRODUCT
+  if (CountCompiledCalls) {
+    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
+    __ lw(T8, AT, 0);
+    __ addiu(T8, T8,1);
+    __ sw(T8, AT, 0);
+  }
+#endif /* PRODUCT */
+  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
+  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
+  const Register icholder_reg = T1;
+  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
+  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
+
+  // get receiver klass (also an implicit null-check)
+  address npe_addr = __ pc();
+  __ load_klass(t1, T0);
+  {
+    // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS.
+    const int base = InstanceKlass::vtable_start_offset() * wordSize;
+    assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
+    assert(Assembler::is_simm16(base), "change this code");
+    __ daddiu(t2, t1, base);
+    assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code");
+    __ lw(AT, t1, InstanceKlass::vtable_length_offset() * wordSize);
+    __ dsll(AT, AT, Address::times_8);
+    __ daddu(t2, t2, AT);
+    if (HeapWordsPerLong > 1) {
+      __ round_to(t2, BytesPerLong);
+    }
+
+    Label hit, entry;
+    assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code");
+    __ bind(entry);
+
+#ifdef ASSERT
+    // Check that the entry is non-null
+    if (DebugVtables) {
+      Label L;
+      assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
+      __ lw(AT, t1, itableOffsetEntry::interface_offset_in_bytes());
+      __ bne(AT, R0, L);
+      __ delayed()->nop();
+      __ stop("null entry point found in itable's offset table");
+      __ bind(L);
+    }
+#endif
+    assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
+    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
+    __ bne(AT, resolved_klass_reg, entry);
+    __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
+
+  }
+
+  // add for compressedoops
+  __ load_klass(t1, T0);
+  // compute itable entry offset (in words)
+  const int base = InstanceKlass::vtable_start_offset() * wordSize;
+  assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
+  assert(Assembler::is_simm16(base), "change this code");
+  __ daddiu(t2, t1, base);
+  assert(Assembler::is_simm16(InstanceKlass::vtable_length_offset() * wordSize), "change this code");
+  __ lw(AT, t1, InstanceKlass::vtable_length_offset() * wordSize);
+  __ dsll(AT, AT, Address::times_8);
+  __ daddu(t2, t2, AT);
+  if (HeapWordsPerLong > 1) {
+    __ round_to(t2, BytesPerLong);
+  }
+
+  Label hit, entry;
+  assert(Assembler::is_simm16(itableOffsetEntry::size() * wordSize), "change this code");
+  __ bind(entry);
+
+#ifdef ASSERT
+  // Check that the entry is non-null
+  if (DebugVtables) {
+    Label L;
+    assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
+    __ lw(AT, t1, itableOffsetEntry::interface_offset_in_bytes());
+    __ bne(AT, R0, L);
+    __ delayed()->nop();
+    __ stop("null entry point found in itable's offset table");
+    __ bind(L);
+  }
+#endif
+  assert(Assembler::is_simm16(itableOffsetEntry::interface_offset_in_bytes()), "change this code");
+  __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
+  __ bne(AT, holder_klass_reg, entry);
+  __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
+
+  // We found a hit, move offset into T9
+  __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize);
+
+  // Compute itableMethodEntry.
+  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
+    itableMethodEntry::method_offset_in_bytes();
+
+  // Get methodOop and entrypoint for compiler
+  const Register method = Rmethod;
+  __ dsll(AT, t2, Address::times_1);
+  __ addu(AT, AT, t1);
+  guarantee(Assembler::is_simm16(method_offset), "not a signed 16-bit int");
+  __ ld_ptr(method, AT,  method_offset);
+
+#ifdef ASSERT
+  if (DebugVtables) {
+    Label L1;
+    __ beq(method, R0, L1);
+    __ delayed()->nop();
+    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
+    __ bne(AT, R0, L1);
+    __ delayed()->nop();
+    __ stop("methodOop is null");
+    __ bind(L1);
+  }
+#endif // ASSERT
+
+  // Rmethod: methodOop
+  // T0: receiver
+  // T9: entry point
+  address ame_addr = __ pc();
+  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
+  __ jr(T9);
+  __ delayed()->nop();
+  masm->flush();
+  s->set_exception_points(npe_addr, ame_addr);
+  return s;
+}
+
+// NOTE : whenever you change the code above, dont forget to change the const here
+int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
+  if (is_vtable_stub) {
+    return ( DebugVtables ? 600 : 28) + (CountCompiledCalls ? 24 : 0)+
+           (UseCompressedOops ? 16 : 0);
+  } else {
+    return  ( DebugVtables ? 636 : 152) + (CountCompiledCalls ? 24 : 0)+
+            (UseCompressedOops ? 32 : 0);
+  }
+}
+
+int VtableStub::pd_code_alignment() {
+  return wordSize;
+}
diff --git a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
index c1c053e66c3..5c90df1079f 100644
--- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
@@ -1513,6 +1513,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
   }
 }
 
+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
+  ShouldNotReachHere();
+}
+
 void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
   LIR_Opr src  = op->in_opr();
   LIR_Opr dest = op->result_opr();
@@ -2102,6 +2106,12 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
 }
 
 
+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right,
+                              LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
+  ShouldNotReachHere();
+}
+
+
 void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
 
diff --git a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
index 92b73e1c712..45da327efb7 100644
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
@@ -242,20 +242,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
   __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
 }
 
-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+template<typename T>
+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
   __ cmp_mem_int(condition, base, disp, c, info);
+  __ branch(condition, T_INT, tgt);
 }
 
+// Explicit instantiation for all supported types.
+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
 
-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
-  __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
-}
-
-
-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info) {
+template<typename T>
+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
   __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
+  __ branch(condition, type, tgt);
 }
 
+// Explicit instantiation for all supported types.
+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
 
 bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
   if (tmp->is_valid() && c > 0 && c < max_jint) {
diff --git a/hotspot/src/os/linux/vm/os_linux.cpp b/hotspot/src/os/linux/vm/os_linux.cpp
index ba1bce4239a..42a73ea5aad 100644
--- a/hotspot/src/os/linux/vm/os_linux.cpp
+++ b/hotspot/src/os/linux/vm/os_linux.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 // no precompiled headers
 #include "classfile/classLoader.hpp"
 #include "classfile/systemDictionary.hpp"
@@ -1969,7 +1975,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen)
     {EM_ALPHA,       EM_ALPHA,   ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"},
     {EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"},
     {EM_MIPS,        EM_MIPS,    ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"},
+    {EM_MIPS,        EM_MIPS,    ELFCLASS64, ELFDATA2LSB, (char*)"MIPS64 LE"},
     {EM_PARISC,      EM_PARISC,  ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
+#if  defined (LOONGARCH64)
+    {EM_LOONGARCH,   EM_LOONGARCH,    ELFCLASS64, ELFDATA2LSB, (char*)"LOONGARCH64"},
+#endif
     {EM_68K,         EM_68K,     ELFCLASS32, ELFDATA2MSB, (char*)"M68k"},
     {EM_AARCH64,     EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"},
   };
@@ -1984,6 +1994,8 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen)
     static  Elf32_Half running_arch_code=EM_SPARCV9;
   #elif  (defined __sparc) && (!defined _LP64)
     static  Elf32_Half running_arch_code=EM_SPARC;
+  #elif  (defined MIPS64)
+    static  Elf32_Half running_arch_code=EM_MIPS;
   #elif  (defined __powerpc64__)
     static  Elf32_Half running_arch_code=EM_PPC64;
   #elif  (defined __powerpc__)
@@ -2004,9 +2016,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen)
     static  Elf32_Half running_arch_code=EM_68K;
   #elif  (defined AARCH64)
     static  Elf32_Half running_arch_code=EM_AARCH64;
+  #elif  (defined LOONGARCH64)
+    static  Elf32_Half running_arch_code=EM_LOONGARCH;
   #else
     #error Method os::dll_load requires that one of following is defined:\
-         IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K, AARCH64
+         IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, __mips64, PARISC, M68K, AARCH64
   #endif
 
   // Identify compatability class for VM's architecture and library's architecture
@@ -3513,7 +3527,7 @@ size_t os::Linux::find_large_page_size() {
 
 #ifndef ZERO
   large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M)
-                     ARM_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M);
+                     ARM_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M) MIPS64_ONLY(4 * M) LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA
 #endif // ZERO
 
   FILE *fp = fopen("/proc/meminfo", "r");
@@ -5120,7 +5134,12 @@ jint os::init_2(void)
   Linux::fast_thread_clock_init();
 
   // Allocate a single page and mark it as readable for safepoint polling
+#ifdef OPT_SAFEPOINT
+  void * p = (void *)(0x10000);
+  address polling_page = (address) ::mmap(p, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+#else
   address polling_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+#endif
   guarantee( polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page" );
 
   os::set_polling_page( polling_page );
@@ -5155,13 +5174,20 @@ jint os::init_2(void)
   // size.  Add a page for compiler2 recursion in main thread.
   // Add in 2*BytesPerWord times page size to account for VM stack during
   // class initialization depending on 32 or 64 bit VM.
+
+  /*
+   * 2014/1/2: JDK8 requires larger -Xss option.
+   *   Some application cannot run with -Xss192K.
+   *   We are not sure whether this causes errors, so simply print a warning.
+   */
+  size_t min_stack_allowed_jdk6 = os::Linux::min_stack_allowed;
   os::Linux::min_stack_allowed = MAX2(os::Linux::min_stack_allowed,
             (size_t)(StackYellowPages+StackRedPages+StackShadowPages) * Linux::page_size() +
                     (2*BytesPerWord COMPILER2_PRESENT(+1)) * Linux::vm_default_page_size());
 
   size_t threadStackSizeInBytes = ThreadStackSize * K;
   if (threadStackSizeInBytes != 0 &&
-      threadStackSizeInBytes < os::Linux::min_stack_allowed) {
+      threadStackSizeInBytes < min_stack_allowed_jdk6) {
         tty->print_cr("\nThe stack size specified is too small, "
                       "Specify at least %dk",
                       os::Linux::min_stack_allowed/ K);
diff --git a/hotspot/src/os/linux/vm/os_perf_linux.cpp b/hotspot/src/os/linux/vm/os_perf_linux.cpp
index 0d1f75810af..cbc6c0757c3 100644
--- a/hotspot/src/os/linux/vm/os_perf_linux.cpp
+++ b/hotspot/src/os/linux/vm/os_perf_linux.cpp
@@ -50,6 +50,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "vm_version_ext_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "vm_version_ext_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "vm_version_ext_loongarch.hpp"
+#endif
 
 #include <stdio.h>
 #include <stdarg.h>
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp
new file mode 100644
index 00000000000..5ee0965f426
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/assembler_linux_loongarch.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "runtime/os.hpp"
+#include "runtime/threadLocalStorage.hpp"
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T4 RT4
+#define T5 RT5
+#define T6 RT6
+#define T7 RT7
+#define T8 RT8
+
+void MacroAssembler::get_thread(Register thread) {
+#ifdef MINIMIZE_RAM_USAGE
+  Register tmp;
+
+  if (thread == AT)
+    tmp = T9;
+  else
+    tmp = AT;
+
+  move(thread, SP);
+  shr(thread, PAGE_SHIFT);
+
+  push(tmp);
+  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
+  andr(thread, thread, tmp);
+  shl(thread, Address::times_ptr); // sizeof(Thread *)
+  li48(tmp, (long)ThreadLocalStorage::sp_map_addr());
+  add_d(tmp, tmp, thread);
+  ld_ptr(thread, tmp, 0);
+  pop(tmp);
+#else
+  if (thread != V0) {
+    push(V0);
+  }
+  pushad_except_v0();
+
+  li(A0, ThreadLocalStorage::thread_index());
+  push(S5);
+  move(S5, SP);
+  li(AT, -StackAlignmentInBytes);
+  andr(SP, SP, AT);
+  // TODO: confirm reloc
+  call(CAST_FROM_FN_PTR(address, pthread_getspecific), relocInfo::runtime_call_type);
+  move(SP, S5);
+  pop(S5);
+
+  popad_except_v0();
+  if (thread != V0) {
+    move(thread, V0);
+    pop(V0);
+  }
+#endif // MINIMIZE_RAM_USAGE
+}
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp
new file mode 100644
index 00000000000..69590ba5824
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/atomic_linux_loongarch.inline.hpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP
+#define OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP
+
+#include "orderAccess_linux_loongarch.inline.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_loongarch.hpp"
+
+// Implementation of class atomic
+
+inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
+inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
+inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
+inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
+
+inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
+inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
+inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
+inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void**)dest = store_value; }
+
+inline jlong Atomic::load     (volatile jlong* src) { return *src; }
+
+///////////implementation of Atomic::add*/////////////////
+inline jint Atomic::add  (jint add_value, volatile jint* dest) {
+  //TODO LA opt amadd
+  jint __ret, __tmp;
+  __asm__ __volatile__ (
+      "1: ll.w    %[__ret], %[__dest]    \n\t"
+      "   add.w   %[__tmp], %[__val], %[__ret]  \n\t"
+      "   sc.w    %[__tmp], %[__dest]    \n\t"
+      "   beqz    %[__tmp], 1b         \n\t"
+
+      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
+      : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (add_value)
+      : "memory"
+      );
+
+  return add_value + __ret;
+}
+
+inline intptr_t Atomic::add_ptr (intptr_t add_value, volatile intptr_t* dest) {
+  //TODO LA opt amadd
+  jint __ret, __tmp;
+  __asm__ __volatile__ (
+      "1: ll.d    %[__ret], %[__dest]    \n\t"
+      "   add.d   %[__tmp], %[__val], %[__ret]  \n\t"
+      "   sc.d    %[__tmp], %[__dest]    \n\t"
+      "   beqz    %[__tmp], 1b         \n\t"
+
+      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
+      : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (add_value)
+      : "memory"
+      );
+
+  return add_value + __ret;
+}
+
+inline void* Atomic::add_ptr (intptr_t add_value, volatile void* dest) {
+  return (void*)add_ptr((intptr_t)add_value, (volatile intptr_t*)dest);
+}
+
+///////////implementation of Atomic::inc*/////////////////
+inline void Atomic::inc      (volatile jint*      dest) { (void)add(1, dest); }
+inline void Atomic::inc_ptr (volatile intptr_t*  dest) { (void)add_ptr(1, dest); }
+inline void Atomic::inc_ptr (volatile void*     dest) { (void)inc_ptr((volatile intptr_t*)dest); }
+
+///////////implementation of Atomic::dec*/////////////////
+inline void Atomic::dec      (volatile jint*      dest) { (void)add(-1, dest); }
+inline void Atomic::dec_ptr (volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
+inline void Atomic::dec_ptr (volatile void*     dest) { (void)dec_ptr((volatile intptr_t*)dest); }
+
+
+///////////implementation of Atomic::xchg*/////////////////
+inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
+  jint __ret, __tmp;
+
+  __asm__ __volatile__ (
+      "1: ll.w    %[__ret], %[__dest]  \n\t"
+      "   move  %[__tmp], %[__val]  \n\t"
+      "   sc.w    %[__tmp], %[__dest]  \n\t"
+      "   beqz  %[__tmp], 1b    \n\t"
+
+      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
+      : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value)
+      : "memory"
+      );
+
+  return __ret;
+}
+
+inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
+  intptr_t __ret, __tmp;
+  __asm__ __volatile__ (
+      "1: ll.d   %[__ret], %[__dest]  \n\t"
+      "   move  %[__tmp], %[__val]  \n\t"
+      "   sc.d   %[__tmp], %[__dest]  \n\t"
+      "   beqz  %[__tmp], 1b    \n\t"
+
+      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
+      : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
+      : "memory"
+      );
+  return __ret;
+
+}
+
+inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
+  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+}
+
+///////////implementation of Atomic::cmpxchg*/////////////////
+inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value) {
+  jint __prev, __cmp;
+
+  __asm__ __volatile__ (
+      "1: ll.w  %[__prev], %[__dest]    \n\t"
+      "   bne   %[__prev], %[__old], 2f  \n\t"
+      "   move  %[__cmp],  $r0          \n\t"
+      "   move  %[__cmp],  %[__new]  \n\t"
+      "   sc.w  %[__cmp],  %[__dest]  \n\t"
+      "   beqz  %[__cmp],  1b    \n\t"
+      "2:        \n\t"
+      "   dbar 0x700        \n\t"
+
+      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
+      : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
+      : "memory"
+      );
+
+  return __prev;
+}
+
+inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value) {
+  jlong __prev, __cmp;
+
+  __asm__ __volatile__ (
+      "1: ll.d  %[__prev], %[__dest]    \n\t"
+      "   bne   %[__prev], %[__old], 2f  \n\t"
+      "   move  %[__cmp],  $r0          \n\t"
+      "   move  %[__cmp],  %[__new]  \n\t"
+      "   sc.d  %[__cmp],  %[__dest]  \n\t"
+      "   beqz  %[__cmp],  1b    \n\t"
+      "2:        \n\t"
+      "   dbar 0x700 \n\t"
+
+      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
+      : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
+      : "memory"
+      );
+  return __prev;
+}
+
+inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) {
+  intptr_t __prev, __cmp;
+  __asm__ __volatile__ (
+      "1: ll.d  %[__prev], %[__dest]    \n\t"
+      "   bne   %[__prev], %[__old], 2f  \n\t"
+      "   move  %[__cmp],  $r0          \n\t"
+      "   move  %[__cmp],  %[__new]  \n\t"
+      "   sc.d  %[__cmp],  %[__dest]  \n\t"
+      "   beqz  %[__cmp],  1b    \n\t"
+      "2:        \n\t"
+      "   dbar  0x700 \n\t"
+
+      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
+      : [__dest] "ZC" (*(volatile intptr_t*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
+      : "memory"
+      );
+
+      return __prev;
+}
+
+inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) {
+  return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value);
+}
+
+#endif // OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp
new file mode 100644
index 00000000000..4e205c468eb
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/bytes_linux_loongarch.inline.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP
+#define OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP
+
+#include <byteswap.h>
+
+// Efficient swapping of data bytes from Java byte
+// ordering to native byte ordering and vice versa.
+inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
+inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
+inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
+
+#endif // OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp
new file mode 100644
index 00000000000..7d6e11a9356
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/copy_linux_loongarch.inline.hpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP
+#define OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP
+
+static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  (void)memmove(to, from, count * HeapWordSize);
+}
+
+static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+  case 8:  to[7] = from[7];
+  case 7:  to[6] = from[6];
+  case 6:  to[5] = from[5];
+  case 5:  to[4] = from[4];
+  case 4:  to[3] = from[3];
+  case 3:  to[2] = from[2];
+  case 2:  to[1] = from[1];
+  case 1:  to[0] = from[0];
+  case 0:  break;
+  default:
+    (void)memcpy(to, from, count * HeapWordSize);
+    break;
+  }
+}
+
+static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+  case 8:  to[7] = from[7];
+  case 7:  to[6] = from[6];
+  case 6:  to[5] = from[5];
+  case 5:  to[4] = from[4];
+  case 4:  to[3] = from[3];
+  case 3:  to[2] = from[2];
+  case 2:  to[1] = from[1];
+  case 1:  to[0] = from[0];
+  case 0:  break;
+  default:
+    while (count-- > 0) {
+      *to++ = *from++;
+    }
+    break;
+  }
+}
+
+static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_words(from, to, count);
+}
+
+static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  pd_disjoint_words(from, to, count);
+}
+
+static void pd_conjoint_bytes(void* from, void* to, size_t count) {
+  (void)memmove(to, from, count);
+}
+
+static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
+  pd_conjoint_bytes(from, to, count);
+}
+
+static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+  copy_conjoint_atomic<jshort>(from, to, count);
+}
+
+static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+  copy_conjoint_atomic<jint>(from, to, count);
+}
+
+static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
+  copy_conjoint_atomic<jlong>(from, to, count);
+}
+
+static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
+  //assert(!UseCompressedOops, "foo!");
+  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
+  copy_conjoint_atomic<oop>(from, to, count);
+}
+
+static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_bytes_atomic(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
+}
+
+static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
+}
+
+static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
+}
+
+static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
+  //assert(!UseCompressedOops, "foo!");
+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
+  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
+}
+
+#endif // OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp
new file mode 100644
index 00000000000..8ec3fa8239a
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/globals_linux_loongarch.hpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP
+#define OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+define_pd_global(bool, DontYieldALot,            false);
+define_pd_global(intx, ThreadStackSize,          2048);
+define_pd_global(intx, VMThreadStackSize,        2048);
+
+define_pd_global(intx, CompilerThreadStackSize,  0); // 0 => use system default
+
+define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
+
+// Used on 64 bit platforms for UseCompressedOops base address
+define_pd_global(uintx,HeapBaseMinAddress,       2*G);
+
+#endif // OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp
new file mode 100644
index 00000000000..3e050c8d094
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/orderAccess_linux_loongarch.inline.hpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP
+#define OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP
+
+#include "runtime/atomic.hpp"
+#include "runtime/orderAccess.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_loongarch.hpp"
+
+#define inlasm_sync(v) if (os::is_ActiveCoresMP()) \
+                        __asm__ __volatile__ ("nop"   : : : "memory"); \
+                      else \
+                        __asm__ __volatile__ ("dbar %0"   : :"K"(v) : "memory");
+
+inline void OrderAccess::loadload()   { inlasm_sync(0x15); }
+inline void OrderAccess::storestore() { inlasm_sync(0x1a); }
+inline void OrderAccess::loadstore()  { inlasm_sync(0x16); }
+inline void OrderAccess::storeload()  { inlasm_sync(0x19); }
+
+inline void OrderAccess::acquire() { inlasm_sync(0x14); }
+inline void OrderAccess::release() { inlasm_sync(0x12); }
+inline void OrderAccess::fence()   { inlasm_sync(0x10); }
+
+//implementation of load_acquire
+inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) { jbyte data = *p; acquire(); return data; }
+inline jshort   OrderAccess::load_acquire(volatile jshort*  p) { jshort data = *p; acquire(); return data; }
+inline jint     OrderAccess::load_acquire(volatile jint*    p) { jint data = *p; acquire(); return data; }
+inline jlong    OrderAccess::load_acquire(volatile jlong*   p) { jlong tmp = *p; acquire(); return tmp; }
+inline jubyte   OrderAccess::load_acquire(volatile jubyte*  p) { jubyte data = *p; acquire(); return data; }
+inline jushort  OrderAccess::load_acquire(volatile jushort* p) { jushort data = *p; acquire(); return data; }
+inline juint    OrderAccess::load_acquire(volatile juint*   p) { juint data = *p; acquire(); return data; }
+inline julong   OrderAccess::load_acquire(volatile julong*  p) { julong tmp = *p; acquire(); return tmp; }
+inline jfloat   OrderAccess::load_acquire(volatile jfloat*  p) { jfloat data = *p; acquire(); return data; }
+inline jdouble  OrderAccess::load_acquire(volatile jdouble* p) { jdouble tmp = *p; acquire(); return tmp; }
+
+//implementation of load_ptr_acquire
+inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t*   p) { intptr_t data = *p; acquire(); return data; }
+inline void*    OrderAccess::load_ptr_acquire(volatile void*       p) { void *data = *(void* volatile *)p; acquire(); return data; }
+inline void*    OrderAccess::load_ptr_acquire(const volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; }
+
+//implementation of release_store
+inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jint*    p, jint    v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jushort* p, jushort v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile juint*   p, juint   v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile julong*  p, julong  v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v) { release(); *p = v; }
+
+//implementation of release_store_ptr
+inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release(); *p = v; }
+inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) { release(); *(void* volatile *)p = v; }
+
+//implementation of store_fence
+inline void     OrderAccess::store_fence(jbyte*   p, jbyte   v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jshort*  p, jshort  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jint*    p, jint    v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jlong*   p, jlong   v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(juint*   p, juint   v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(julong*  p, julong  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); }
+
+//implementation of store_ptr_fence
+inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); }
+inline void     OrderAccess::store_ptr_fence(void**    p, void*    v) { *p = v; fence(); }
+
+//implementation of release_store_fence
+inline void     OrderAccess::release_store_fence(volatile jbyte*   p, jbyte   v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jshort*  p, jshort  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jint*    p, jint    v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); }
+
+//implementaion of release_store_ptr_fence
+inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); }
+inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { release_store_ptr(p, v); fence(); }
+
+#undef inlasm_sync
+
+#endif // OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp
new file mode 100644
index 00000000000..f2c3df84a1d
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.cpp
@@ -0,0 +1,750 @@
+/*
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// no precompiled headers
+#include "asm/macroAssembler.hpp"
+#include "classfile/classLoader.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "classfile/vmSymbols.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "jvm_linux.h"
+#include "memory/allocation.inline.hpp"
+#include "mutex_linux.inline.hpp"
+#include "os_share_linux.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm.h"
+#include "prims/jvm_misc.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/extendedPC.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/java.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/osThread.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "runtime/timer.hpp"
+#include "utilities/events.hpp"
+#include "utilities/vmError.hpp"
+#include "utilities/debug.hpp"
+#include "compiler/disassembler.hpp"
+// put OS-includes here
+# include <sys/types.h>
+# include <sys/mman.h>
+# include <pthread.h>
+# include <signal.h>
+# include <errno.h>
+# include <dlfcn.h>
+# include <stdlib.h>
+# include <stdio.h>
+# include <unistd.h>
+# include <sys/resource.h>
+# include <pthread.h>
+# include <sys/stat.h>
+# include <sys/time.h>
+# include <sys/utsname.h>
+# include <sys/socket.h>
+# include <sys/wait.h>
+# include <pwd.h>
+# include <poll.h>
+# include <ucontext.h>
+# include <fpu_control.h>
+
+#define REG_SP 3
+#define REG_FP 22
+
+address os::current_stack_pointer() {
+  register void *sp __asm__ ("$r3");
+  return (address) sp;
+}
+
+char* os::non_memory_address_word() {
+  // Must never look like an address returned by reserve_memory,
+  // even in its subfields (as defined by the CPU immediate fields,
+  // if the CPU splits constants across multiple instructions).
+
+  return (char*) -1;
+}
+
+void os::initialize_thread(Thread* thr) {
+// Nothing to do.
+}
+
+address os::Linux::ucontext_get_pc(ucontext_t * uc) {
+  return (address)uc->uc_mcontext.__pc;
+}
+
+intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) {
+  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
+}
+
+intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) {
+  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread
+// is currently interrupted by SIGPROF.
+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
+// frames. Currently we don't do that on Linux, so it's the same as
+// os::fetch_frame_from_context().
+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
+  ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  assert(thread != NULL, "just checking");
+  assert(ret_sp != NULL, "just checking");
+  assert(ret_fp != NULL, "just checking");
+
+  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
+}
+
+ExtendedPC os::fetch_frame_from_context(void* ucVoid,
+                    intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  ExtendedPC  epc;
+  ucontext_t* uc = (ucontext_t*)ucVoid;
+
+  if (uc != NULL) {
+    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
+    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
+    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
+  } else {
+    // construct empty ExtendedPC for return value checking
+    epc = ExtendedPC(NULL);
+    if (ret_sp) *ret_sp = (intptr_t *)NULL;
+    if (ret_fp) *ret_fp = (intptr_t *)NULL;
+  }
+
+  return epc;
+}
+
+frame os::fetch_frame_from_context(void* ucVoid) {
+  intptr_t* sp;
+  intptr_t* fp;
+  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
+  return frame(sp, fp, epc.pc());
+}
+
+// By default, gcc always save frame pointer on stack. It may get
+// turned off by -fomit-frame-pointer,
+frame os::get_sender_for_C_frame(frame* fr) {
+  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
+}
+
+//intptr_t* _get_previous_fp() {
+intptr_t* __attribute__((noinline)) os::get_previous_fp() {
+  return (intptr_t*)__builtin_frame_address(0);
+}
+
+frame os::current_frame() {
+  intptr_t* fp = (intptr_t*)get_previous_fp();
+  frame myframe((intptr_t*)os::current_stack_pointer(),
+                (intptr_t*)fp,
+                CAST_FROM_FN_PTR(address, os::current_frame));
+  if (os::is_first_C_frame(&myframe)) {
+    // stack is not walkable
+    return frame();
+  } else {
+    return os::get_sender_for_C_frame(&myframe);
+  }
+}
+
+extern "C" JNIEXPORT int
+JVM_handle_linux_signal(int sig,
+                        siginfo_t* info,
+                        void* ucVoid,
+                        int abort_if_unrecognized) {
+#ifdef PRINT_SIGNAL_HANDLE
+  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
+      info->si_signo,
+      info->si_code,
+      info->si_errno,
+      info->si_addr);
+#endif
+
+  ucontext_t* uc = (ucontext_t*) ucVoid;
+
+  Thread* t = ThreadLocalStorage::get_thread_slow();
+
+  SignalHandlerMark shm(t);
+
+  // Note: it's not uncommon that JNI code uses signal/sigset to install
+  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
+  // or have a SIGILL handler when detecting CPU type). When that happens,
+  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
+  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
+  // that do not require siginfo/ucontext first.
+
+  if (sig == SIGPIPE/* || sig == SIGXFSZ*/) {
+    // allow chained handler to go first
+    if (os::Linux::chained_handler(sig, info, ucVoid)) {
+      return true;
+    } else {
+      if (PrintMiscellaneous && (WizardMode || Verbose)) {
+        warning("Ignoring SIGPIPE - see bug 4229104");
+      }
+      return true;
+    }
+  }
+
+  JavaThread* thread = NULL;
+  VMThread* vmthread = NULL;
+  if (os::Linux::signal_handlers_are_installed) {
+    if (t != NULL ){
+      if(t->is_Java_thread()) {
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print_cr("this thread is a java thread");
+#endif
+        thread = (JavaThread*)t;
+      }
+      else if(t->is_VM_thread()){
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print_cr("this thread is a VM thread\n");
+#endif
+        vmthread = (VMThread *)t;
+      }
+    }
+  }
+
+  // decide if this trap can be handled by a stub
+  address stub = NULL;
+  address pc   = NULL;
+
+  pc = (address) os::Linux::ucontext_get_pc(uc);
+#ifdef PRINT_SIGNAL_HANDLE
+  tty->print_cr("pc=%lx", pc);
+  os::print_context(tty, uc);
+#endif
+  //%note os_trap_1
+  if (info != NULL && uc != NULL && thread != NULL) {
+    pc = (address) os::Linux::ucontext_get_pc(uc);
+    // Handle ALL stack overflow variations here
+    if (sig == SIGSEGV) {
+      address addr = (address) info->si_addr;
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print("handle all stack overflow variations: ");
+      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
+        addr,
+        thread->stack_base(),
+        thread->stack_base() - thread->stack_size());
+        */
+#endif
+
+      // check if fault address is within thread stack
+      if (addr < thread->stack_base() &&
+          addr >= thread->stack_base() - thread->stack_size()) {
+        // stack overflow
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print("stack exception check \n");
+#endif
+        if (thread->in_stack_yellow_zone(addr)) {
+#ifdef PRINT_SIGNAL_HANDLE
+          tty->print("exception addr is in yellow zone\n");
+#endif
+          thread->disable_stack_yellow_zone();
+          if (thread->thread_state() == _thread_in_Java) {
+            // Throw a stack overflow exception.  Guard pages will be reenabled
+            // while unwinding the stack.
+#ifdef PRINT_SIGNAL_HANDLE
+            tty->print("this thread is in java\n");
+#endif
+            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
+          } else {
+            // Thread was in the vm or native code.  Return and try to finish.
+#ifdef PRINT_SIGNAL_HANDLE
+            tty->print("this thread is in vm or native codes and return\n");
+#endif
+            return 1;
+          }
+        } else if (thread->in_stack_red_zone(addr)) {
+          // Fatal red zone violation.  Disable the guard pages and fall through
+          // to handle_unexpected_exception way down below.
+#ifdef PRINT_SIGNAL_HANDLE
+          tty->print("exception addr is in red zone\n");
+#endif
+          thread->disable_stack_red_zone();
+          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
+
+          // This is a likely cause, but hard to verify. Let's just print
+          // it as a hint.
+          tty->print_raw_cr("Please check if any of your loaded .so files has "
+                            "enabled executable stack (see man page execstack(8))");
+        } else {
+          // Accessing stack address below sp may cause SEGV if current
+          // thread has MAP_GROWSDOWN stack. This should only happen when
+          // current thread was created by user code with MAP_GROWSDOWN flag
+          // and then attached to VM. See notes in os_linux.cpp.
+#ifdef PRINT_SIGNAL_HANDLE
+          tty->print("exception addr is neither in yellow zone nor in the red one\n");
+#endif
+          if (thread->osthread()->expanding_stack() == 0) {
+             thread->osthread()->set_expanding_stack();
+             if (os::Linux::manually_expand_stack(thread, addr)) {
+               thread->osthread()->clear_expanding_stack();
+               return 1;
+             }
+             thread->osthread()->clear_expanding_stack();
+          } else {
+             fatal("recursive segv. expanding stack.");
+          }
+        }
+      }
+    } // sig == SIGSEGV
+
+    if (thread->thread_state() == _thread_in_Java) {
+      // Java thread running in Java code => find exception handler if any
+      // a fault inside compiled code, the interpreter, or a stub
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print("java thread running in java code\n");
+#endif
+
+      // Handle signal from NativeJump::patch_verified_entry().
+      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
+#endif
+        stub = SharedRuntime::get_handle_wrong_method_stub();
+      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
+#endif
+        stub = SharedRuntime::get_poll_stub(pc);
+      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
+        // BugId 4454115: A read from a MappedByteBuffer can fault
+        // here if the underlying file has been truncated.
+        // Do not crash the VM in such a case.
+        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+        nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print("cb = %lx, nm = %lx\n", cb, nm);
+#endif
+        if (nm != NULL && nm->has_unsafe_access()) {
+          stub = StubRoutines::handler_for_unsafe_access();
+        }
+      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
+        // HACK: si_code does not work on linux 2.2.12-20!!!
+        int op = pc[0] & 0x3f;
+        int op1 = pc[3] & 0x3f;
+        //FIXME, Must port to LA code!!
+        switch (op) {
+          case 0x1e:  //ddiv
+          case 0x1f:  //ddivu
+          case 0x1a:  //div
+          case 0x1b:  //divu
+          case 0x34:  //trap
+            // In LA, div_by_zero exception can only be triggered by explicit 'trap'.
+            stub = SharedRuntime::continuation_for_implicit_exception(thread,
+                                    pc,
+                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
+            break;
+          default:
+            // TODO: handle more cases if we are using other x86 instructions
+            //   that can generate SIGFPE signal on linux.
+            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
+            //fatal("please update this code.");
+        }
+      } else if (sig == SIGSEGV &&
+          !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print("continuation for implicit exception\n");
+#endif
+        // Determination of interpreter/vtable stub/compiled code null exception
+        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
+#endif
+      }
+    } else if (thread->thread_state() == _thread_in_vm &&
+               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
+               thread->doing_unsafe_access()) {
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print_cr("SIGBUS in vm thread \n");
+#endif
+      stub = StubRoutines::handler_for_unsafe_access();
+    }
+
+    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
+    // and the heap gets shrunk before the field access.
+    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print("jni fast get trap: ");
+#endif
+      address addr = JNI_FastGetField::find_slowcase_pc(pc);
+      if (addr != (address)-1) {
+        stub = addr;
+      }
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print_cr("addr = %d, stub = %lx", addr, stub);
+#endif
+    }
+
+    // Check to see if we caught the safepoint code in the
+    // process of write protecting the memory serialization page.
+    // It write enables the page immediately after protecting it
+    // so we can just return to retry the write.
+    if ((sig == SIGSEGV) &&
+        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print("write protecting the memory serialiazation page\n");
+#endif
+      // Block current thread until the memory serialize page permission restored.
+      os::block_on_serialize_page_trap();
+      return true;
+    }
+  }
+
+  if (stub != NULL) {
+#ifdef PRINT_SIGNAL_HANDLE
+    tty->print_cr("resolved stub=%lx\n",stub);
+#endif
+    // save all thread context in case we need to restore it
+    if (thread != NULL) thread->set_saved_exception_pc(pc);
+
+    uc->uc_mcontext.__pc = (greg_t)stub;
+    return true;
+  }
+
+  // signal-chaining
+  if (os::Linux::chained_handler(sig, info, ucVoid)) {
+#ifdef PRINT_SIGNAL_HANDLE
+     tty->print_cr("signal chaining\n");
+#endif
+     return true;
+  }
+
+  if (!abort_if_unrecognized) {
+#ifdef PRINT_SIGNAL_HANDLE
+    tty->print_cr("abort becauce of unrecognized\n");
+#endif
+    // caller wants another chance, so give it to him
+    return false;
+  }
+
+  if (pc == NULL && uc != NULL) {
+    pc = os::Linux::ucontext_get_pc(uc);
+  }
+
+  // unmask current signal
+  sigset_t newset;
+  sigemptyset(&newset);
+  sigaddset(&newset, sig);
+  sigprocmask(SIG_UNBLOCK, &newset, NULL);
+#ifdef PRINT_SIGNAL_HANDLE
+  tty->print_cr("VMError in signal handler\n");
+#endif
+  VMError err(t, sig, pc, info, ucVoid);
+  err.report_and_die();
+
+  ShouldNotReachHere();
+  return true; // Mute compiler
+}
+
+void os::Linux::init_thread_fpu_state(void) {
+}
+
+int os::Linux::get_fpu_control_word(void) {
+  return 0; // mute compiler
+}
+
+void os::Linux::set_fpu_control_word(int fpu_control) {
+}
+
+bool os::is_allocatable(size_t bytes) {
+
+  if (bytes < 2 * G) {
+    return true;
+  }
+
+  char* addr = reserve_memory(bytes, NULL);
+
+  if (addr != NULL) {
+    release_memory(addr, bytes);
+  }
+
+  return addr != NULL;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// thread stack
+
+size_t os::Linux::min_stack_allowed  = 96 * K;
+
+// Test if pthread library can support variable thread stack size. LinuxThreads
+// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads
+// in floating stack mode and NPTL support variable stack size.
+bool os::Linux::supports_variable_stack_size() {
+  if (os::Linux::is_NPTL()) {
+     // NPTL, yes
+     return true;
+
+  } else {
+    // Note: We can't control default stack size when creating a thread.
+    // If we use non-default stack size (pthread_attr_setstacksize), both
+    // floating stack and non-floating stack LinuxThreads will return the
+    // same value. This makes it impossible to implement this function by
+    // detecting thread stack size directly.
+    //
+    // An alternative approach is to check %gs. Fixed-stack LinuxThreads
+    // do not use %gs, so its value is 0. Floating-stack LinuxThreads use
+    // %gs (either as LDT selector or GDT selector, depending on kernel)
+    // to access thread specific data.
+    //
+    // Note that %gs is a reserved glibc register since early 2001, so
+    // applications are not allowed to change its value (Ulrich Drepper from
+    // Redhat confirmed that all known offenders have been modified to use
+    // either %fs or TSD). In the worst case scenario, when VM is embedded in
+    // a native application that plays with %gs, we might see non-zero %gs
+    // even LinuxThreads is running in fixed stack mode. As the result, we'll
+    // return true and skip _thread_safety_check(), so we may not be able to
+    // detect stack-heap collisions. But otherwise it's harmless.
+    //
+    return false;
+  }
+}
+
+// return default stack size for thr_type
+size_t os::Linux::default_stack_size(os::ThreadType thr_type) {
+  // default stack size (compiler thread needs larger stack)
+  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
+  return s;
+}
+
+size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
+  // Creating guard page is very expensive. Java thread has HotSpot
+  // guard page, only enable glibc guard page for non-Java threads.
+  return (thr_type == java_thread ? 0 : page_size());
+}
+
+// Java thread:
+//
+//   Low memory addresses
+//    +------------------------+
+//    |                        |\  JavaThread created by VM does not have glibc
+//    |    glibc guard page    | - guard, attached Java thread usually has
+//    |                        |/  1 page glibc guard.
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+//    |                        |\
+//    |  HotSpot Guard Pages   | - red and yellow pages
+//    |                        |/
+//    +------------------------+ JavaThread::stack_yellow_zone_base()
+//    |                        |\
+//    |      Normal Stack      | -
+//    |                        |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// Non-Java thread:
+//
+//   Low memory addresses
+//    +------------------------+
+//    |                        |\
+//    |  glibc guard page      | - usually 1 page
+//    |                        |/
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+//    |                        |\
+//    |      Normal Stack      | -
+//    |                        |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from
+//    pthread_attr_getstack()
+
+static void current_stack_region(address * bottom, size_t * size) {
+  if (os::is_primordial_thread()) {
+     // primordial thread needs special handling because pthread_getattr_np()
+     // may return bogus value.
+     *bottom = os::Linux::initial_thread_stack_bottom();
+     *size   = os::Linux::initial_thread_stack_size();
+  } else {
+     pthread_attr_t attr;
+
+     int rslt = pthread_getattr_np(pthread_self(), &attr);
+
+     // JVM needs to know exact stack location, abort if it fails
+     if (rslt != 0) {
+       if (rslt == ENOMEM) {
+         vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
+       } else {
+         fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
+       }
+     }
+
+     if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) {
+         fatal("Can not locate current stack attributes!");
+     }
+
+     pthread_attr_destroy(&attr);
+
+  }
+  assert(os::current_stack_pointer() >= *bottom &&
+         os::current_stack_pointer() < *bottom + *size, "just checking");
+}
+
+address os::current_stack_base() {
+  address bottom;
+  size_t size;
+  current_stack_region(&bottom, &size);
+  return (bottom + size);
+}
+
+size_t os::current_stack_size() {
+  // stack size includes normal stack and HotSpot guard pages
+  address bottom;
+  size_t size;
+  current_stack_region(&bottom, &size);
+  return size;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// helper functions for fatal error handler
+void os::print_register_info(outputStream *st, void *context) {
+
+  ucontext_t *uc = (ucontext_t*)context;
+
+  st->print_cr("Register to memory mapping:");
+  st->cr();
+  // this is horrendously verbose but the layout of the registers in the
+  //   // context does not match how we defined our abstract Register set, so
+  //     // we can't just iterate through the gregs area
+  //
+  //       // this is only for the "general purpose" registers
+  st->print("ZERO=" ); print_location(st, uc->uc_mcontext.__gregs[0]);
+  st->print("RA=" ); print_location(st, uc->uc_mcontext.__gregs[1]);
+  st->print("TP=" ); print_location(st, uc->uc_mcontext.__gregs[2]);
+  st->print("SP=" ); print_location(st, uc->uc_mcontext.__gregs[3]);
+  st->cr();
+  st->print("A0=" ); print_location(st, uc->uc_mcontext.__gregs[4]);
+  st->print("A1=" ); print_location(st, uc->uc_mcontext.__gregs[5]);
+  st->print("A2=" ); print_location(st, uc->uc_mcontext.__gregs[6]);
+  st->print("A3=" ); print_location(st, uc->uc_mcontext.__gregs[7]);
+  st->cr();
+  st->print("A4=" ); print_location(st, uc->uc_mcontext.__gregs[8]);
+  st->print("A5=" ); print_location(st, uc->uc_mcontext.__gregs[9]);
+  st->print("A6=" ); print_location(st, uc->uc_mcontext.__gregs[10]);
+  st->print("A7=" ); print_location(st, uc->uc_mcontext.__gregs[11]);
+  st->cr();
+  st->print("T0=" ); print_location(st, uc->uc_mcontext.__gregs[12]);
+  st->print("T1=" ); print_location(st, uc->uc_mcontext.__gregs[13]);
+  st->print("T2=" ); print_location(st, uc->uc_mcontext.__gregs[14]);
+  st->print("T3=" ); print_location(st, uc->uc_mcontext.__gregs[15]);
+  st->cr();
+  st->print("T4=" ); print_location(st, uc->uc_mcontext.__gregs[16]);
+  st->print("T5=" ); print_location(st, uc->uc_mcontext.__gregs[17]);
+  st->print("T6=" ); print_location(st, uc->uc_mcontext.__gregs[18]);
+  st->print("T7=" ); print_location(st, uc->uc_mcontext.__gregs[19]);
+  st->cr();
+  st->print("T8=" ); print_location(st, uc->uc_mcontext.__gregs[20]);
+  st->print("RX=" ); print_location(st, uc->uc_mcontext.__gregs[21]);
+  st->print("FP=" ); print_location(st, uc->uc_mcontext.__gregs[22]);
+  st->print("S0=" ); print_location(st, uc->uc_mcontext.__gregs[23]);
+  st->cr();
+  st->print("S1=" ); print_location(st, uc->uc_mcontext.__gregs[24]);
+  st->print("S2=" ); print_location(st, uc->uc_mcontext.__gregs[25]);
+  st->print("S3=" ); print_location(st, uc->uc_mcontext.__gregs[26]);
+  st->print("S4=" ); print_location(st, uc->uc_mcontext.__gregs[27]);
+  st->cr();
+  st->print("S5=" ); print_location(st, uc->uc_mcontext.__gregs[28]);
+  st->print("S6=" ); print_location(st, uc->uc_mcontext.__gregs[29]);
+  st->print("S7=" ); print_location(st, uc->uc_mcontext.__gregs[30]);
+  st->print("S8=" ); print_location(st, uc->uc_mcontext.__gregs[31]);
+  st->cr();
+
+}
+void os::print_context(outputStream *st, void *context) {
+
+  ucontext_t *uc = (ucontext_t*)context;
+  st->print_cr("Registers:");
+  st->print(  "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]);
+  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]);
+  st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]);
+  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]);
+  st->cr();
+  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]);
+  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]);
+  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]);
+  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]);
+  st->cr();
+  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]);
+  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]);
+  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]);
+  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]);
+  st->cr();
+  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]);
+  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]);
+  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]);
+  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]);
+  st->cr();
+  st->print(  "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]);
+  st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]);
+  st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]);
+  st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]);
+  st->cr();
+  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]);
+  st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]);
+  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]);
+  st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]);
+  st->cr();
+  st->print(  "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]);
+  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]);
+  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]);
+  st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]);
+  st->cr();
+  st->print(  "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]);
+  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]);
+  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]);
+  st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]);
+  st->cr();
+  st->cr();
+
+  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
+  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
+  //print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t));
+  print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t));
+  st->cr();
+
+  // Note: it may be unsafe to inspect memory near pc. For example, pc may
+  // point to garbage if entry point in an nmethod is corrupted. Leave
+  // this at the end, and hope for the best.
+  address pc = os::Linux::ucontext_get_pc(uc);
+  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
+  print_hex_dump(st, pc - 64, pc + 64, sizeof(char));
+  Disassembler::decode(pc - 80, pc + 80, st);
+}
+
+void os::setup_fpu() {
+  // no use for LA
+}
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
+}
+#endif
+
+bool os::is_ActiveCoresMP() {
+  return UseActiveCoresMP && _initial_active_processor_count == 1;
+}
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp
new file mode 100644
index 00000000000..a7321ae0253
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/os_linux_loongarch.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP
+#define OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP
+
+  static void setup_fpu();
+  static bool is_allocatable(size_t bytes);
+  static intptr_t *get_previous_fp();
+
+  // Used to register dynamic code cache area with the OS
+  // Note: Currently only used in 64 bit Windows implementations
+  static bool register_code_area(char *low, char *high) { return true; }
+
+  static bool is_ActiveCoresMP();
+
+#endif // OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp
new file mode 100644
index 00000000000..a1cedcd8cf7
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/prefetch_linux_loongarch.inline.hpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
+#define OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
+
+
+inline void Prefetch::read (void *loc, intx interval) {
+// According to previous and present SPECjbb2015 score,
+// comment prefetch is better than if (interval >= 0) prefetch branch.
+// So choose comment prefetch as the base line.
+#if 0
+  __asm__ __volatile__ (
+                        "       preld  0, %[__loc] \n"
+                        :
+                        : [__loc] "m"( *((address)loc + interval) )
+                        : "memory"
+                        );
+#endif
+}
+
+inline void Prefetch::write(void *loc, intx interval) {
+// Ditto
+#if 0
+  __asm__ __volatile__ (
+                        "       preld  8, %[__loc] \n"
+                        :
+                        : [__loc] "m"( *((address)loc + interval) )
+                        : "memory"
+                        );
+#endif
+}
+
+#endif // OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp
new file mode 100644
index 00000000000..be28a562a1e
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
+#include "runtime/threadLocalStorage.hpp"
+
+// Map stack pointer (%esp) to thread pointer for faster TLS access
+//
+// Here we use a flat table for better performance. Getting current thread
+// is down to one memory access (read _sp_map[%esp>>12]) in generated code
+// and two in runtime code (-fPIC code needs an extra load for _sp_map).
+//
+// This code assumes stack page is not shared by different threads. It works
+// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters).
+//
+// Notice that _sp_map is allocated in the bss segment, which is ZFOD
+// (zero-fill-on-demand). While it reserves 4M address space upfront,
+// actual memory pages are committed on demand.
+//
+// If an application creates and destroys a lot of threads, usually the
+// stack space freed by a thread will soon get reused by new thread
+// (this is especially true in NPTL or LinuxThreads in fixed-stack mode).
+// No memory page in _sp_map is wasted.
+//
+// However, it's still possible that we might end up populating &
+// committing a large fraction of the 4M table over time, but the actual
+// amount of live data in the table could be quite small. The max wastage
+// is less than 4M bytes. If it becomes an issue, we could use madvise()
+// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map.
+// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the
+// physical memory page (i.e. similar to MADV_FREE on Solaris).
+
+#ifdef MINIMIZE_RAM_USAGE
+Thread* ThreadLocalStorage::_sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];
+#endif // MINIMIZE_RAM_USAGE
+
+void ThreadLocalStorage::generate_code_for_get_thread() {
+    // nothing we can do here for user-level thread
+}
+
+void ThreadLocalStorage::pd_init() {
+#ifdef MINIMIZE_RAM_USAGE
+  assert(align_size_down(os::vm_page_size(), PAGE_SIZE) == os::vm_page_size(),
+         "page size must be multiple of PAGE_SIZE");
+#endif // MINIMIZE_RAM_USAGE
+}
+
+void ThreadLocalStorage::pd_set_thread(Thread* thread) {
+  os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
+#ifdef MINIMIZE_RAM_USAGE
+  address stack_top = os::current_stack_base();
+  size_t stack_size = os::current_stack_size();
+
+  for (address p = stack_top - stack_size; p < stack_top; p += PAGE_SIZE) {
+    int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1);
+    assert(thread == NULL || _sp_map[index] == NULL || thread == _sp_map[index],
+           "thread exited without detaching from VM??");
+    _sp_map[index] = thread;
+  }
+#endif // MINIMIZE_RAM_USAGE
+}
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp
new file mode 100644
index 00000000000..4fab788a75d
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/threadLS_linux_loongarch.hpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP
+#define OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP
+
+#ifdef MINIMIZE_RAM_USAGE
+  // Processor dependent parts of ThreadLocalStorage
+  //only the low 2G space for user program in Linux
+
+  #define SP_BITLENGTH  34
+  #define PAGE_SHIFT    14
+  #define PAGE_SIZE     (1UL << PAGE_SHIFT)
+
+  static Thread* _sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];
+  static int _sp_map_low;
+  static int _sp_map_high;
+#endif // MINIMIZE_RAM_USAGE
+
+public:
+#ifdef MINIMIZE_RAM_USAGE
+  static Thread** sp_map_addr() { return _sp_map; }
+#endif // MINIMIZE_RAM_USAGE
+
+  static Thread* thread() {
+#ifdef MINIMIZE_RAM_USAGE
+    /* Thread::thread() can also be optimized in the same way as __get_thread() */
+    //return (Thread*) os::thread_local_storage_at(thread_index());
+    uintptr_t sp;
+    uintptr_t mask = (1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1;
+
+    __asm__ __volatile__ ("addi.d %0, $r29, 0 " : "=r" (sp));
+
+    return _sp_map[(sp >> PAGE_SHIFT) & mask];
+#else
+    return (Thread*) os::thread_local_storage_at(thread_index());
+#endif // MINIMIZE_RAM_USAGE
+  }
+#endif // OS_CPU_LINUX_LOONGARCH_VM_THREADLS_LINUX_LOONGARCH_HPP
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp
new file mode 100644
index 00000000000..44f666d61f3
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/thread.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+void JavaThread::pd_initialize()
+{
+    _anchor.clear();
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
+// currently interrupted by SIGPROF
+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
+  void* ucontext, bool isInJava) {
+
+  assert(Thread::current() == this, "caller must be current thread");
+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
+  assert(this->is_Java_thread(), "must be JavaThread");
+  JavaThread* jt = (JavaThread *)this;
+
+  // If we have a last_Java_frame, then we should use it even if
+  // isInJava == true.  It should be more reliable than ucontext info.
+  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
+    *fr_addr = jt->pd_last_frame();
+    return true;
+  }
+
+  // At this point, we don't have a last_Java_frame, so
+  // we try to glean some information out of the ucontext
+  // if we were running Java code when SIGPROF came in.
+  if (isInJava) {
+    ucontext_t* uc = (ucontext_t*) ucontext;
+
+    intptr_t* ret_fp;
+    intptr_t* ret_sp;
+    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
+      &ret_sp, &ret_fp);
+    if (addr.pc() == NULL || ret_sp == NULL ) {
+      // ucontext wasn't useful
+      return false;
+    }
+
+    frame ret_frame(ret_sp, ret_fp, addr.pc());
+    if (!ret_frame.safe_for_sender(jt)) {
+#ifdef COMPILER2
+      // C2 uses ebp as a general register see if NULL fp helps
+      frame ret_frame2(ret_sp, NULL, addr.pc());
+      if (!ret_frame2.safe_for_sender(jt)) {
+        // nothing else to try if the frame isn't good
+        return false;
+      }
+      ret_frame = ret_frame2;
+#else
+      // nothing else to try if the frame isn't good
+      return false;
+#endif /* COMPILER2 */
+    }
+    *fr_addr = ret_frame;
+    return true;
+  }
+
+  // nothing else to try
+  return false;
+}
+
+void JavaThread::cache_global_variables() { }
+
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp
new file mode 100644
index 00000000000..d6dd2521f42
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/thread_linux_loongarch.hpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
+#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
+
+ private:
+  void pd_initialize();
+
+  frame pd_last_frame() {
+    assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
+    if (_anchor.last_Java_pc() != NULL) {
+      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
+    } else {
+      // This will pick up pc from sp
+      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
+    }
+  }
+
+
+ public:
+  // Mutators are highly dangerous....
+  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
+  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
+
+  void set_base_of_stack_pointer(intptr_t* base_sp) {
+  }
+
+  static ByteSize last_Java_fp_offset()          {
+    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
+  }
+
+  intptr_t* base_of_stack_pointer() {
+    return NULL;
+  }
+  void record_base_of_stack_pointer() {
+  }
+
+  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
+    bool isInJava);
+
+  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
+private:
+  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
+public:
+
+  // These routines are only used on cpu architectures that
+  // have separate register stacks (Itanium).
+  static bool register_stack_overflow() { return false; }
+  static void enable_register_stack_guard() {}
+  static void disable_register_stack_guard() {}
+
+#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp b/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp
new file mode 100644
index 00000000000..0097cadcb7a
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/vmStructs_linux_loongarch.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP
+#define OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP
+
+// These are the OS and CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+                                                                                                                                     \
+  /******************************/                                                                                                   \
+  /* Threads (NOTE: incomplete) */                                                                                                   \
+  /******************************/                                                                                                   \
+  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
+  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
+
+
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+                                                                          \
+  /**********************/                                                \
+  /* Posix Thread IDs   */                                                \
+  /**********************/                                                \
+                                                                          \
+  declare_integer_type(pid_t)                                             \
+  declare_unsigned_integer_type(pthread_t)
+
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP
diff --git a/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp b/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp
new file mode 100644
index 00000000000..80a1538de9f
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_loongarch/vm/vm_version_linux_loongarch.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_loongarch.hpp"
+
diff --git a/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp
new file mode 100644
index 00000000000..4ba53d9341d
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/assembler_linux_mips.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "runtime/os.hpp"
+#include "runtime/threadLocalStorage.hpp"
+
+#define A0 RA0
+#define A1 RA1
+#define A2 RA2
+#define A3 RA3
+#define A4 RA4
+#define A5 RA5
+#define A6 RA6
+#define A7 RA7
+#define T0 RT0
+#define T1 RT1
+#define T2 RT2
+#define T3 RT3
+#define T8 RT8
+#define T9 RT9
+
+void MacroAssembler::get_thread(Register thread) {
+#ifdef MINIMIZE_RAM_USAGE
+//
+// In MIPS64, we don't use full 64-bit address space.
+//  Only a small range is actually used.
+//
+// Example:
+// $  cat /proc/13352/maps
+// 120000000-120010000 r-xp 00000000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
+// 12001c000-120020000 rw-p 0000c000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
+// 120020000-1208dc000 rwxp 00000000 00:00 0                                [heap]
+// 555d574000-555d598000 r-xp 00000000 08:01 2073768                        /lib/ld-2.12.so
+// 555d598000-555d59c000 rw-p 00000000 00:00 0
+// ......
+// 558b1f8000-558b23c000 rwxp 00000000 00:00 0
+// 558b23c000-558b248000 ---p 00000000 00:00 0
+// 558b248000-558b28c000 rwxp 00000000 00:00 0
+// ffff914000-ffff94c000 rwxp 00000000 00:00 0                              [stack]
+// ffffffc000-10000000000 r-xp 00000000 00:00 0                             [vdso]
+//
+// All stacks are positioned at 0x55________.
+// Therefore, we can utilize the same algorithm used in 32-bit.
+  // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1);
+  // Thread* thread = _sp_map[index];
+  Register tmp;
+
+  if (thread == AT)
+    tmp = T9;
+  else
+    tmp = AT;
+
+  move(thread, SP);
+  shr(thread, PAGE_SHIFT);
+
+  push(tmp);
+  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
+  andr(thread, thread, tmp);
+  shl(thread, Address::times_ptr); // sizeof(Thread *)
+  li48(tmp, (long)ThreadLocalStorage::sp_map_addr());
+  addu(tmp, tmp, thread);
+  ld_ptr(thread, tmp, 0);
+  pop(tmp);
+#else
+  if (thread != V0) {
+    push(V0);
+  }
+  pushad_except_v0();
+
+  move(A0, ThreadLocalStorage::thread_index());
+  push(S5);
+  move(S5, SP);
+  move(AT, -StackAlignmentInBytes);
+  andr(SP, SP, AT);
+  call(CAST_FROM_FN_PTR(address, pthread_getspecific));
+  delayed()->nop();
+  move(SP, S5);
+  pop(S5);
+
+  popad_except_v0();
+  if (thread != V0) {
+    move(thread, V0);
+    pop(V0);
+  }
+#endif // MINIMIZE_RAM_USAGE
+}
diff --git a/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp
new file mode 100644
index 00000000000..1c7ad605e95
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/atomic_linux_mips.inline.hpp
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP
+#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP
+
+#include "orderAccess_linux_mips.inline.hpp"
+#include "runtime/atomic.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_mips.hpp"
+
+// Implementation of class atomic
+
+inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
+inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
+inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
+inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
+
+inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
+inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
+inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
+inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
+inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
+inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void**)dest = store_value; }
+
+inline jlong Atomic::load     (volatile jlong* src) { return *src; }
+
+///////////implementation of Atomic::add*/////////////////
+inline jint Atomic::add  (jint add_value, volatile jint* dest) {
+  jint __ret, __tmp;
+  __asm__ __volatile__ (
+      " .set push\n\t"
+      " .set mips64\n\t"
+      " .set noreorder\n\t"
+
+      "1: sync          \n\t"
+      "   ll    %[__ret], %[__dest]    \n\t"
+      "   addu  %[__tmp], %[__val], %[__ret]  \n\t"
+      "   sc    %[__tmp], %[__dest]    \n\t"
+      "   beqz  %[__tmp], 1b         \n\t"
+      "   nop          \n\t"
+
+      " .set pop\n\t"
+
+      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
+      : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (add_value)
+      : "memory"
+      );
+
+  return add_value + __ret;
+}
+
+inline intptr_t Atomic::add_ptr (intptr_t add_value, volatile intptr_t* dest) {
+  jint __ret, __tmp;
+  __asm__ __volatile__ (
+      " .set push\n\t"
+      " .set mips64\n\t"
+      " .set noreorder\n\t"
+
+      "1: sync          \n\t"
+      "   lld    %[__ret], %[__dest]    \n\t"
+      "   daddu  %[__tmp], %[__val], %[__ret]  \n\t"
+      "   scd    %[__tmp], %[__dest]    \n\t"
+      "   beqz   %[__tmp], 1b         \n\t"
+      "   nop          \n\t"
+
+      " .set pop\n\t"
+
+      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
+      : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (add_value)
+      : "memory"
+      );
+
+  return add_value + __ret;
+}
+
+inline void* Atomic::add_ptr (intptr_t add_value, volatile void* dest) {
+  return (void*)add_ptr((intptr_t)add_value, (volatile intptr_t*)dest);
+}
+
+///////////implementation of Atomic::inc*/////////////////
+inline void Atomic::inc      (volatile jint*      dest) { (void)add(1, dest); }
+inline void Atomic::inc_ptr (volatile intptr_t*  dest) { (void)add_ptr(1, dest); }
+inline void Atomic::inc_ptr (volatile void*     dest) { (void)inc_ptr((volatile intptr_t*)dest); }
+
+///////////implementation of Atomic::dec*/////////////////
+inline void Atomic::dec      (volatile jint*      dest) { (void)add(-1, dest); }
+inline void Atomic::dec_ptr (volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
+inline void Atomic::dec_ptr (volatile void*     dest) { (void)dec_ptr((volatile intptr_t*)dest); }
+
+
+///////////implementation of Atomic::xchg*/////////////////
+inline jint     Atomic::xchg    (jint     exchange_value, volatile jint*     dest) {
+  jint __ret, __tmp;
+
+  __asm__ __volatile__ (
+      " .set push\n\t"
+      " .set mips64\n\t"
+      " .set noreorder\n\t"
+
+      "1: sync\n\t"
+      "   ll    %[__ret], %[__dest]  \n\t"
+      "   move  %[__tmp], %[__val]  \n\t"
+      "   sc    %[__tmp], %[__dest]  \n\t"
+      "   beqz  %[__tmp], 1b    \n\t"
+      "   nop        \n\t"
+
+      " .set pop\n\t"
+
+      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
+      : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value)
+      : "memory"
+      );
+
+  return __ret;
+}
+
+inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
+  intptr_t __ret, __tmp;
+  __asm__ __volatile__ (
+      " .set push\n\t"
+      " .set mips64\n\t"
+      " .set noreorder\n\t"
+
+      "1: sync\n\t"
+      "   lld   %[__ret], %[__dest]  \n\t"
+      "   move  %[__tmp], %[__val]  \n\t"
+      "   scd   %[__tmp], %[__dest]  \n\t"
+      "   beqz  %[__tmp], 1b    \n\t"
+      "   nop        \n\t"
+
+      " .set pop\n\t"
+
+      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
+      : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
+      : "memory"
+      );
+  return __ret;
+}
+
+inline void*    Atomic::xchg_ptr(void*    exchange_value, volatile void*     dest) {
+  return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
+}
+
+///////////implementation of Atomic::cmpxchg*/////////////////
+inline jint     Atomic::cmpxchg    (jint     exchange_value, volatile jint*     dest, jint     compare_value) {
+  jint __prev, __cmp;
+
+  __asm__ __volatile__ (
+      "  .set push\n\t"
+      "  .set mips64\n\t"
+      "  .set noreorder\n\t"
+
+      "1:sync \n\t"
+      "  ll     %[__prev], %[__dest]    \n\t"
+      "  bne    %[__prev], %[__old], 2f  \n\t"
+      "  move  %[__cmp],  $0          \n\t"
+      "  move  %[__cmp],  %[__new]  \n\t"
+      "  sc  %[__cmp],  %[__dest]  \n\t"
+      "  beqz  %[__cmp],  1b    \n\t"
+      "  nop        \n\t"
+      "2:        \n\t"
+      "  sync        \n\t"
+
+      "  .set pop\n\t"
+
+      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
+      : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
+      : "memory"
+      );
+
+  return __prev;
+}
+
+inline jlong    Atomic::cmpxchg    (jlong    exchange_value, volatile jlong*    dest, jlong    compare_value) {
+  jlong __prev, __cmp;
+
+  __asm__ __volatile__ (
+      "  .set push\n\t"
+      "  .set mips64\n\t"
+      "  .set noreorder\n\t"
+
+      "1:sync \n\t"
+      "  lld   %[__prev], %[__dest]    \n\t"
+      "  bne   %[__prev], %[__old], 2f  \n\t"
+      "  move  %[__cmp],  $0          \n\t"
+      "  move  %[__cmp],  %[__new]  \n\t"
+      "  scd   %[__cmp],  %[__dest]  \n\t"
+      "  beqz  %[__cmp],  1b    \n\t"
+      "  nop        \n\t"
+      "2:        \n\t"
+      "  sync \n\t"
+
+      "  .set pop\n\t"
+
+      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
+      : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
+      : "memory"
+      );
+  return __prev;
+}
+
+inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) {
+  intptr_t __prev, __cmp;
+  __asm__ __volatile__ (
+      " .set push \n\t"
+      " .set mips64\n\t\t"
+      " .set noreorder\n\t"
+
+      "1:sync \n\t"
+      "  lld    %[__prev], %[__dest]    \n\t"
+      "  bne    %[__prev], %[__old], 2f  \n\t"
+      "  move  %[__cmp],  $0          \n\t"
+      "  move  %[__cmp],  %[__new]  \n\t"
+      "  scd  %[__cmp],  %[__dest]  \n\t"
+      "  beqz  %[__cmp],  1b    \n\t"
+      "  nop        \n\t"
+      "2:        \n\t"
+      "  sync \n\t"
+      "  .set pop  \n\t"
+
+      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
+      : [__dest] "m" (*(volatile intptr_t*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
+      : "memory"
+      );
+
+      return __prev;
+}
+
+inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) {
+  return (void*)cmpxchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest, (intptr_t)compare_value);
+}
+
+#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp
new file mode 100644
index 00000000000..5b5cd10aa55
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/bytes_linux_mips.inline.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
+#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
+
+#include <byteswap.h>
+
+// Efficient swapping of data bytes from Java byte
+// ordering to native byte ordering and vice versa.
+inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
+inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
+inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
+
+#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp
new file mode 100644
index 00000000000..73ac34501bc
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/copy_linux_mips.inline.hpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
+#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
+
+static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  (void)memmove(to, from, count * HeapWordSize);
+}
+
+static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+  case 8:  to[7] = from[7];
+  case 7:  to[6] = from[6];
+  case 6:  to[5] = from[5];
+  case 5:  to[4] = from[4];
+  case 4:  to[3] = from[3];
+  case 3:  to[2] = from[2];
+  case 2:  to[1] = from[1];
+  case 1:  to[0] = from[0];
+  case 0:  break;
+  default:
+    (void)memcpy(to, from, count * HeapWordSize);
+    break;
+  }
+}
+
+static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+  case 8:  to[7] = from[7];
+  case 7:  to[6] = from[6];
+  case 6:  to[5] = from[5];
+  case 5:  to[4] = from[4];
+  case 4:  to[3] = from[3];
+  case 3:  to[2] = from[2];
+  case 2:  to[1] = from[1];
+  case 1:  to[0] = from[0];
+  case 0:  break;
+  default:
+    while (count-- > 0) {
+      *to++ = *from++;
+    }
+    break;
+  }
+}
+
+static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_words(from, to, count);
+}
+
+static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  pd_disjoint_words(from, to, count);
+}
+
+static void pd_conjoint_bytes(void* from, void* to, size_t count) {
+  (void)memmove(to, from, count);
+}
+
+static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
+  pd_conjoint_bytes(from, to, count);
+}
+
+static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+  copy_conjoint_atomic<jshort>(from, to, count);
+}
+
+static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+  copy_conjoint_atomic<jint>(from, to, count);
+}
+
+static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
+  copy_conjoint_atomic<jlong>(from, to, count);
+}
+
+static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
+  //assert(!UseCompressedOops, "foo!");
+  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
+  copy_conjoint_atomic<oop>(from, to, count);
+}
+
+static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_bytes_atomic(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
+}
+
+static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
+}
+
+static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
+}
+
+static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
+  //assert(!UseCompressedOops, "foo!");
+  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
+  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
+}
+
+#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp
new file mode 100644
index 00000000000..f1599ac5f17
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/globals_linux_mips.hpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
+#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+define_pd_global(bool, DontYieldALot,            false);
+#ifdef MIPS64
+define_pd_global(intx, ThreadStackSize,          1024); // 0 => use system default
+define_pd_global(intx, VMThreadStackSize,        1024);
+#else
+// ThreadStackSize 320 allows a couple of test cases to run while
+// keeping the number of threads that can be created high.  System
+// default ThreadStackSize appears to be 512 which is too big.
+define_pd_global(intx, ThreadStackSize,          320);
+define_pd_global(intx, VMThreadStackSize,        512);
+#endif // MIPS64
+
+define_pd_global(intx, CompilerThreadStackSize,  0);
+
+define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
+
+// Used on 64 bit platforms for UseCompressedOops base address
+define_pd_global(uintx,HeapBaseMinAddress,       2*G);
+
+#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad
new file mode 100644
index 00000000000..5e38996ffa3
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.ad
@@ -0,0 +1,153 @@
+//
+// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// mips32/godson2 Linux Architecture Description File
+
+//----------OS-DEPENDENT ENCODING BLOCK----------------------------------------
+// This block specifies the encoding classes used by the compiler to
+// output byte streams.  Encoding classes generate functions which are
+// called by Machine Instruction Nodes in order to generate the bit
+// encoding of the instruction.  Operands specify their base encoding
+// interface with the interface keyword.  There are currently
+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
+// COND_INTER.  REG_INTER causes an operand to generate a function
+// which returns its register number when queried.  CONST_INTER causes
+// an operand to generate a function which returns the value of the
+// constant when queried.  MEMORY_INTER causes an operand to generate
+// four functions which return the Base Register, the Index Register,
+// the Scale Value, and the Offset Value of the operand when queried.
+// COND_INTER causes an operand to generate six functions which return
+// the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional
+// instruction.  Instructions specify two basic values for encoding.
+// They use the ins_encode keyword to specify their encoding class
+// (which must be one of the class names specified in the encoding
+// block), and they use the opcode keyword to specify, in order, their
+// primary, secondary, and tertiary opcode.  Only the opcode sections
+// which a particular instruction needs for encoding need to be
+// specified.
+encode %{
+  // Build emit functions for each basic byte or larger field in the intel
+  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
+  // code in the enc_class source block.  Emit functions will live in the
+  // main source block for now.  In future, we can generalize this by
+  // adding a syntax that specifies the sizes of fields in an order,
+  // so that the adlc can build the emit functions automagically
+
+  enc_class linux_breakpoint
+  %{
+    MacroAssembler* masm = new MacroAssembler(&cbuf);
+    masm->call(CAST_FROM_FN_PTR(address, os::breakpoint), relocInfo::runtime_call_type);
+  %}
+
+  enc_class call_epilog
+  %{
+    if (VerifyStackAtCalls) {
+      // Check that stack depth is unchanged: find majik cookie on stack
+      int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP,-2));
+      if(framesize >= 128) {
+        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
+        emit_d8(cbuf,0xBC);
+        emit_d8(cbuf,0x24);
+        emit_d32(cbuf,framesize); // Find majik cookie from ESP
+        emit_d32(cbuf, 0xbadb100d);
+      }
+      else {
+        emit_opcode(cbuf, 0x81); // cmp [esp+0],0xbadb1ood
+        emit_d8(cbuf,0x7C);
+        emit_d8(cbuf,0x24);
+        emit_d8(cbuf,framesize); // Find majik cookie from ESP
+        emit_d32(cbuf, 0xbadb100d);
+      }
+      // jmp EQ around INT3
+      // QQQ TODO
+      const int jump_around = 5; // size of call to breakpoint, 1 for CC
+      emit_opcode(cbuf, 0x74);
+      emit_d8(cbuf, jump_around);
+      // QQQ temporary
+      emit_break(cbuf);
+      // Die if stack mismatch
+      // emit_opcode(cbuf,0xCC);
+    }
+  %}
+
+%}
+
+// INSTRUCTIONS -- Platform dependent
+
+//----------OS and Locking Instructions----------------------------------------
+
+// This name is KNOWN by the ADLC and cannot be changed.
+// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
+// for this guy.
+instruct tlsLoadP(eAXRegP dst, eFlagsReg cr) %{
+%{
+  match(Set dst (ThreadLocal));
+  effect(DEF dst, KILL cr);
+
+  format %{ "MOV    EAX, Thread::current()" %}
+  ins_encode( linux_tlsencode(dst) );
+  ins_pipe( ialu_reg_fat );
+%}
+
+// Die now
+instruct ShouldNotReachHere()
+%{
+  match(Halt);
+
+  // Use the following format syntax
+  format %{ "int3\t# ShouldNotReachHere" %}
+  // QQQ TODO for now call breakpoint
+  // opcode(0xCC);
+  // ins_encode(Opc);
+  ins_encode(linux_breakpoint);
+  ins_pipe(pipe_slow);
+%}
+
+
+// Platform dependent source
+
+source
+%{
+// emit an interrupt that is caught by the debugger
+void emit_break(CodeBuffer& cbuf) {
+  // Debugger doesn't really catch this but best we can do so far QQQ
+#define __ masm.
+    __ lui(T9, Assembler::split_high((int)os::breakpoint));
+    __ addiu(T9, T9, Assembler::split_low((int)os::breakpoint));
+    __ jalr(T9);
+    __ delayed()->nop();
+}
+
+void MachBreakpointNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
+  emit_break(cbuf);
+}
+
+uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
+  //return 5;
+  return 16;
+}
+
+%}
diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s
new file mode 100644
index 00000000000..f87fbf265d7
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips.s
@@ -0,0 +1,25 @@
+#
+# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2017, Loongson Technology. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+
diff --git a/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad b/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad
new file mode 100644
index 00000000000..ca4d094738b
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/linux_mips_64.ad
@@ -0,0 +1,50 @@
+//
+// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// AMD64 Linux Architecture Description File
+
+//----------OS-DEPENDENT ENCODING BLOCK----------------------------------------
+// This block specifies the encoding classes used by the compiler to
+// output byte streams.  Encoding classes generate functions which are
+// called by Machine Instruction Nodes in order to generate the bit
+// encoding of the instruction.  Operands specify their base encoding
+// interface with the interface keyword.  There are currently
+// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
+// COND_INTER.  REG_INTER causes an operand to generate a function
+// which returns its register number when queried.  CONST_INTER causes
+// an operand to generate a function which returns the value of the
+// constant when queried.  MEMORY_INTER causes an operand to generate
+// four functions which return the Base Register, the Index Register,
+// the Scale Value, and the Offset Value of the operand when queried.
+// COND_INTER causes an operand to generate six functions which return
+// the encoding code (ie - encoding bits for the instruction)
+// associated with each basic boolean condition for a conditional
+// instruction.  Instructions specify two basic values for encoding.
+// They use the ins_encode keyword to specify their encoding class
+// (which must be one of the class names specified in the encoding
+// block), and they use the opcode keyword to specify, in order, their
+// primary, secondary, and tertiary opcode.  Only the opcode sections
+// which a particular instruction needs for encoding need to be
+// specified.
diff --git a/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp
new file mode 100644
index 00000000000..c9bc169aa5c
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/orderAccess_linux_mips.inline.hpp
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP
+#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP
+
+#include "runtime/atomic.hpp"
+#include "runtime/orderAccess.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_mips.hpp"
+
+#define inlasm_sync() if (os::is_ActiveCoresMP()) \
+                        __asm__ __volatile__ ("nop"   : : : "memory"); \
+                      else \
+                        __asm__ __volatile__ ("sync"   : : : "memory");
+
+inline void OrderAccess::loadload()   { inlasm_sync(); }
+inline void OrderAccess::storestore() { inlasm_sync(); }
+inline void OrderAccess::loadstore()  { inlasm_sync(); }
+inline void OrderAccess::storeload()  { inlasm_sync(); }
+
+inline void OrderAccess::acquire() { inlasm_sync(); }
+inline void OrderAccess::release() { inlasm_sync(); }
+inline void OrderAccess::fence()   { inlasm_sync(); }
+
+//implementation of load_acquire
+inline jbyte    OrderAccess::load_acquire(volatile jbyte*   p) { jbyte data = *p; acquire(); return data; }
+inline jshort   OrderAccess::load_acquire(volatile jshort*  p) { jshort data = *p; acquire(); return data; }
+inline jint     OrderAccess::load_acquire(volatile jint*    p) { jint data = *p; acquire(); return data; }
+inline jlong    OrderAccess::load_acquire(volatile jlong*   p) { jlong tmp = *p; acquire(); return tmp; }
+inline jubyte   OrderAccess::load_acquire(volatile jubyte*  p) { jubyte data = *p; acquire(); return data; }
+inline jushort  OrderAccess::load_acquire(volatile jushort* p) { jushort data = *p; acquire(); return data; }
+inline juint    OrderAccess::load_acquire(volatile juint*   p) { juint data = *p; acquire(); return data; }
+inline julong   OrderAccess::load_acquire(volatile julong*  p) { julong tmp = *p; acquire(); return tmp; }
+inline jfloat   OrderAccess::load_acquire(volatile jfloat*  p) { jfloat data = *p; acquire(); return data; }
+inline jdouble  OrderAccess::load_acquire(volatile jdouble* p) { jdouble tmp = *p; acquire(); return tmp; }
+
+//implementation of load_ptr_acquire
+inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t*   p) { intptr_t data = *p; acquire(); return data; }
+inline void*    OrderAccess::load_ptr_acquire(volatile void*       p) { void *data = *(void* volatile *)p; acquire(); return data; }
+inline void*    OrderAccess::load_ptr_acquire(const volatile void* p) { void *data = *(void* volatile *)p; acquire(); return data; }
+
+//implementation of release_store
+inline void     OrderAccess::release_store(volatile jbyte*   p, jbyte   v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jshort*  p, jshort  v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jint*    p, jint    v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jlong*   p, jlong   v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jubyte*  p, jubyte  v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jushort* p, jushort v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile juint*   p, juint   v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile julong*  p, julong  v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jfloat*  p, jfloat  v) { release(); *p = v; }
+inline void     OrderAccess::release_store(volatile jdouble* p, jdouble v) { release(); *p = v; }
+
+//implementation of release_store_ptr
+inline void     OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release(); *p = v; }
+inline void     OrderAccess::release_store_ptr(volatile void*     p, void*    v) { release(); *(void* volatile *)p = v; }
+
+//implementation of store_fence
+inline void     OrderAccess::store_fence(jbyte*   p, jbyte   v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jshort*  p, jshort  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jint*    p, jint    v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jlong*   p, jlong   v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jubyte*  p, jubyte  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(juint*   p, juint   v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(julong*  p, julong  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jfloat*  p, jfloat  v) { *p = v; fence(); }
+inline void     OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); }
+
+//implementation of store_ptr_fence
+inline void     OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); }
+inline void     OrderAccess::store_ptr_fence(void**    p, void*    v) { *p = v; fence(); }
+
+//implementation of release_store_fence
+inline void     OrderAccess::release_store_fence(volatile jbyte*   p, jbyte   v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jshort*  p, jshort  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jint*    p, jint    v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jlong*   p, jlong   v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jubyte*  p, jubyte  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile juint*   p, juint   v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile julong*  p, julong  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jfloat*  p, jfloat  v) { release_store(p, v); fence(); }
+inline void     OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); }
+
+//implementaion of release_store_ptr_fence
+inline void     OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); }
+inline void     OrderAccess::release_store_ptr_fence(volatile void*     p, void*    v) { release_store_ptr(p, v); fence(); }
+
+#undef inlasm_sync
+
+#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp
new file mode 100644
index 00000000000..43487dab98a
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.cpp
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+// no precompiled headers
+#include "asm/macroAssembler.hpp"
+#include "classfile/classLoader.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "classfile/vmSymbols.hpp"
+#include "code/icBuffer.hpp"
+#include "code/vtableStubs.hpp"
+#include "interpreter/interpreter.hpp"
+#include "jvm_linux.h"
+#include "memory/allocation.inline.hpp"
+#include "mutex_linux.inline.hpp"
+#include "os_share_linux.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm.h"
+#include "prims/jvm_misc.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/extendedPC.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/java.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/osThread.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/thread.inline.hpp"
+#include "runtime/timer.hpp"
+#include "utilities/events.hpp"
+#include "utilities/vmError.hpp"
+#include "utilities/debug.hpp"
+#include "compiler/disassembler.hpp"
+// put OS-includes here
+# include <sys/types.h>
+# include <sys/mman.h>
+# include <pthread.h>
+# include <signal.h>
+# include <errno.h>
+# include <dlfcn.h>
+# include <stdlib.h>
+# include <stdio.h>
+# include <unistd.h>
+# include <sys/resource.h>
+# include <pthread.h>
+# include <sys/stat.h>
+# include <sys/time.h>
+# include <sys/utsname.h>
+# include <sys/socket.h>
+# include <sys/wait.h>
+# include <pwd.h>
+# include <poll.h>
+# include <ucontext.h>
+# include <fpu_control.h>
+
+#define REG_SP 29
+#define REG_FP 30
+
+address os::current_stack_pointer() {
+  register void *sp __asm__ ("$29");
+  return (address) sp;
+}
+
+char* os::non_memory_address_word() {
+  // Must never look like an address returned by reserve_memory,
+  // even in its subfields (as defined by the CPU immediate fields,
+  // if the CPU splits constants across multiple instructions).
+
+  return (char*) -1;
+}
+
+void os::initialize_thread(Thread* thr) {
+// Nothing to do.
+}
+
+address os::Linux::ucontext_get_pc(ucontext_t * uc) {
+  //return (address)uc->uc_mcontext.gregs[REG_PC];
+  return (address)uc->uc_mcontext.pc;
+}
+
+intptr_t* os::Linux::ucontext_get_sp(ucontext_t * uc) {
+  return (intptr_t*)uc->uc_mcontext.gregs[REG_SP];
+}
+
+intptr_t* os::Linux::ucontext_get_fp(ucontext_t * uc) {
+  return (intptr_t*)uc->uc_mcontext.gregs[REG_FP];
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread
+// is currently interrupted by SIGPROF.
+// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
+// frames. Currently we don't do that on Linux, so it's the same as
+// os::fetch_frame_from_context().
+ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
+  ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  assert(thread != NULL, "just checking");
+  assert(ret_sp != NULL, "just checking");
+  assert(ret_fp != NULL, "just checking");
+
+  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
+}
+
+ExtendedPC os::fetch_frame_from_context(void* ucVoid,
+                    intptr_t** ret_sp, intptr_t** ret_fp) {
+
+  ExtendedPC  epc;
+  ucontext_t* uc = (ucontext_t*)ucVoid;
+
+  if (uc != NULL) {
+    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
+    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
+    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
+  } else {
+    // construct empty ExtendedPC for return value checking
+    epc = ExtendedPC(NULL);
+    if (ret_sp) *ret_sp = (intptr_t *)NULL;
+    if (ret_fp) *ret_fp = (intptr_t *)NULL;
+  }
+
+  return epc;
+}
+
+frame os::fetch_frame_from_context(void* ucVoid) {
+  intptr_t* sp;
+  intptr_t* fp;
+  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
+  return frame(sp, fp, epc.pc());
+}
+
+// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
+// turned off by -fomit-frame-pointer,
+frame os::get_sender_for_C_frame(frame* fr) {
+  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
+}
+
+//intptr_t* _get_previous_fp() {
+intptr_t* __attribute__((noinline)) os::get_previous_fp() {
+  int *pc;
+  intptr_t sp;
+  int *pc_limit = (int*)(void*)&os::get_previous_fp;
+  int insn;
+
+  {
+    l_pc:;
+    pc = (int*)&&l_pc;
+    __asm__ __volatile__ ("move %0,  $sp" : "=r" (sp));
+  }
+
+  do {
+    insn = *pc;
+    switch(bitfield(insn, 16, 16)) {
+      case 0x27bd:  /* addiu $sp,$sp,-i */
+      case 0x67bd:  /* daddiu $sp,$sp,-i */
+        assert ((short)bitfield(insn, 0, 16)<0, "bad frame");
+        sp -= (short)bitfield(insn, 0, 16);
+        return (intptr_t*)sp;
+    }
+    --pc;
+  } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization.
+
+  ShouldNotReachHere();
+  return NULL; // mute compiler
+}
+
+
+frame os::current_frame() {
+  intptr_t* fp = (intptr_t*)get_previous_fp();
+  frame myframe((intptr_t*)os::current_stack_pointer(),
+                (intptr_t*)fp,
+                CAST_FROM_FN_PTR(address, os::current_frame));
+  if (os::is_first_C_frame(&myframe)) {
+    // stack is not walkable
+    return frame();
+  } else {
+    return os::get_sender_for_C_frame(&myframe);
+  }
+}
+
+//x86 add 2 new assemble function here!
+extern "C" JNIEXPORT int
+JVM_handle_linux_signal(int sig,
+                        siginfo_t* info,
+                        void* ucVoid,
+                        int abort_if_unrecognized) {
+#ifdef PRINT_SIGNAL_HANDLE
+  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
+      info->si_signo,
+      info->si_code,
+      info->si_errno,
+      info->si_addr);
+#endif
+
+  ucontext_t* uc = (ucontext_t*) ucVoid;
+
+  Thread* t = ThreadLocalStorage::get_thread_slow();
+
+  SignalHandlerMark shm(t);
+
+  // Note: it's not uncommon that JNI code uses signal/sigset to install
+  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
+  // or have a SIGILL handler when detecting CPU type). When that happens,
+  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
+  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
+  // that do not require siginfo/ucontext first.
+
+  if (sig == SIGPIPE/* || sig == SIGXFSZ*/) {
+    // allow chained handler to go first
+    if (os::Linux::chained_handler(sig, info, ucVoid)) {
+      return true;
+    } else {
+      if (PrintMiscellaneous && (WizardMode || Verbose)) {
+        warning("Ignoring SIGPIPE - see bug 4229104");
+      }
+      return true;
+    }
+  }
+
+  JavaThread* thread = NULL;
+  VMThread* vmthread = NULL;
+  if (os::Linux::signal_handlers_are_installed) {
+    if (t != NULL ){
+      if(t->is_Java_thread()) {
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print_cr("this thread is a java thread");
+#endif
+        thread = (JavaThread*)t;
+      }
+      else if(t->is_VM_thread()){
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print_cr("this thread is a VM thread\n");
+#endif
+        vmthread = (VMThread *)t;
+      }
+    }
+  }
+
+  // decide if this trap can be handled by a stub
+  address stub = NULL;
+  address pc   = NULL;
+
+  pc = (address) os::Linux::ucontext_get_pc(uc);
+#ifdef PRINT_SIGNAL_HANDLE
+  tty->print_cr("pc=%lx", pc);
+  os::print_context(tty, uc);
+#endif
+  //%note os_trap_1
+  if (info != NULL && uc != NULL && thread != NULL) {
+    pc = (address) os::Linux::ucontext_get_pc(uc);
+    // Handle ALL stack overflow variations here
+    if (sig == SIGSEGV) {
+      address addr = (address) info->si_addr;
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print("handle all stack overflow variations: ");
+      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
+        addr,
+        thread->stack_base(),
+        thread->stack_base() - thread->stack_size());
+        */
+#endif
+
+      // check if fault address is within thread stack
+      if (addr < thread->stack_base() &&
+          addr >= thread->stack_base() - thread->stack_size()) {
+        // stack overflow
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print("stack exception check \n");
+#endif
+        if (thread->in_stack_yellow_zone(addr)) {
+#ifdef PRINT_SIGNAL_HANDLE
+          tty->print("exception addr is in yellow zone\n");
+#endif
+          thread->disable_stack_yellow_zone();
+          if (thread->thread_state() == _thread_in_Java) {
+            // Throw a stack overflow exception.  Guard pages will be reenabled
+            // while unwinding the stack.
+#ifdef PRINT_SIGNAL_HANDLE
+            tty->print("this thread is in java\n");
+#endif
+            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
+          } else {
+            // Thread was in the vm or native code.  Return and try to finish.
+#ifdef PRINT_SIGNAL_HANDLE
+            tty->print("this thread is in vm or native codes and return\n");
+#endif
+            return 1;
+          }
+        } else if (thread->in_stack_red_zone(addr)) {
+          // Fatal red zone violation.  Disable the guard pages and fall through
+          // to handle_unexpected_exception way down below.
+#ifdef PRINT_SIGNAL_HANDLE
+          tty->print("exception addr is in red zone\n");
+#endif
+          thread->disable_stack_red_zone();
+          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
+
+          // This is a likely cause, but hard to verify. Let's just print
+          // it as a hint.
+          tty->print_raw_cr("Please check if any of your loaded .so files has "
+                            "enabled executable stack (see man page execstack(8))");
+        } else {
+          // Accessing stack address below sp may cause SEGV if current
+          // thread has MAP_GROWSDOWN stack. This should only happen when
+          // current thread was created by user code with MAP_GROWSDOWN flag
+          // and then attached to VM. See notes in os_linux.cpp.
+#ifdef PRINT_SIGNAL_HANDLE
+          tty->print("exception addr is neither in yellow zone nor in the red one\n");
+#endif
+          if (thread->osthread()->expanding_stack() == 0) {
+             thread->osthread()->set_expanding_stack();
+             if (os::Linux::manually_expand_stack(thread, addr)) {
+               thread->osthread()->clear_expanding_stack();
+               return 1;
+             }
+             thread->osthread()->clear_expanding_stack();
+          } else {
+             fatal("recursive segv. expanding stack.");
+          }
+        }
+      } //addr <
+    } //sig == SIGSEGV
+
+    if (thread->thread_state() == _thread_in_Java) {
+      // Java thread running in Java code => find exception handler if any
+      // a fault inside compiled code, the interpreter, or a stub
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print("java thread running in java code\n");
+#endif
+
+      // Handle signal from NativeJump::patch_verified_entry().
+      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
+#endif
+        stub = SharedRuntime::get_handle_wrong_method_stub();
+      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
+#endif
+        stub = SharedRuntime::get_poll_stub(pc);
+      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
+        // BugId 4454115: A read from a MappedByteBuffer can fault
+        // here if the underlying file has been truncated.
+        // Do not crash the VM in such a case.
+        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
+        nmethod* nm = cb->is_nmethod() ? (nmethod*)cb : NULL;
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print("cb = %lx, nm = %lx\n", cb, nm);
+#endif
+        if (nm != NULL && nm->has_unsafe_access()) {
+          stub = StubRoutines::handler_for_unsafe_access();
+        }
+      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
+        // HACK: si_code does not work on linux 2.2.12-20!!!
+        int op = pc[0] & 0x3f;
+        int op1 = pc[3] & 0x3f;
+        //FIXME, Must port to mips code!!
+        switch (op) {
+          case 0x1e:  //ddiv
+          case 0x1f:  //ddivu
+          case 0x1a:  //div
+          case 0x1b:  //divu
+          case 0x34:  //trap
+            /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'.
+             * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv()
+             */
+            stub = SharedRuntime::continuation_for_implicit_exception(thread,
+                                    pc,
+                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
+            break;
+          default:
+            // TODO: handle more cases if we are using other x86 instructions
+            //   that can generate SIGFPE signal on linux.
+            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
+            //fatal("please update this code.");
+        }
+      } else if (sig == SIGSEGV &&
+          !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print("continuation for implicit exception\n");
+#endif
+        // Determination of interpreter/vtable stub/compiled code null exception
+        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
+#ifdef PRINT_SIGNAL_HANDLE
+        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
+#endif
+      } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) {
+        //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here.
+        //The method is to trigger kernel emulation of float emulation.
+        int inst = *(int*)pc;
+        int ops = (inst >> 26) & 0x3f;
+        int ops_fmt = (inst >> 21) & 0x1f;
+        int op = inst & 0x3f;
+        if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) {
+          int ft, fs, fd;
+          ft = (inst >> 16) & 0x1f;
+          fs = (inst >> 11) & 0x1f;
+          fd = (inst >> 6) & 0x1f;
+          float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower;
+          double ft_value, fs_value, fd_value;
+          ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
+          fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
+          __asm__ __volatile__ (
+            "cvt.s.pl %0, %4\n\t"
+            "cvt.s.pu %1, %4\n\t"
+            "cvt.s.pl %2, %5\n\t"
+            "cvt.s.pu %3, %5\n\t"
+            : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper)
+            : "f" (fs_value), "f" (ft_value)
+          );
+
+          switch (op) {
+            case Assembler::fadd_op:
+              __asm__ __volatile__ (
+                "add.s  %1, %3, %5\n\t"
+                "add.s  %2, %4, %6\n\t"
+                "pll.ps %0, %1, %2\n\t"
+                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
+                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
+              );
+              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
+              stub = pc + 4;
+              break;
+            case Assembler::fsub_op:
+              //fd = fs - ft
+              __asm__ __volatile__ (
+                "sub.s  %1, %3, %5\n\t"
+                "sub.s  %2, %4, %6\n\t"
+                "pll.ps %0, %1, %2\n\t"
+                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
+                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
+              );
+              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
+              stub = pc + 4;
+              break;
+            case Assembler::fmul_op:
+              __asm__ __volatile__ (
+                "mul.s  %1, %3, %5\n\t"
+                "mul.s  %2, %4, %6\n\t"
+                "pll.ps %0, %1, %2\n\t"
+                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
+                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
+              );
+              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
+              stub = pc + 4;
+              break;
+            default:
+              tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op);
+          }
+        } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) {
+          // madd.ps is not used, the code below were not tested
+          int fr, ft, fs, fd;
+          float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower;
+          double fr_value, ft_value, fs_value, fd_value;
+          switch (op) {
+            case Assembler::madd_ps_op:
+              // fd = (fs * ft) + fr
+              fr = (inst >> 21) & 0x1f;
+              ft = (inst >> 16) & 0x1f;
+              fs = (inst >> 11) & 0x1f;
+              fd = (inst >> 6) & 0x1f;
+              fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr];
+              ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
+              fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
+              __asm__ __volatile__ (
+                "cvt.s.pu %3, %9\n\t"
+                "cvt.s.pl %4, %9\n\t"
+                "cvt.s.pu %5, %10\n\t"
+                "cvt.s.pl %6, %10\n\t"
+                "cvt.s.pu %7, %11\n\t"
+                "cvt.s.pl %8, %11\n\t"
+                "madd.s %1, %3, %5, %7\n\t"
+                "madd.s %2, %4, %6, %8\n\t"
+                "pll.ps %0, %1, %2\n\t"
+                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower)
+                : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/
+              );
+              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
+              stub = pc + 4;
+              break;
+            default:
+              tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op);
+          }
+        }
+      } //SIGILL
+    } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
+      // thread->thread_state() != _thread_in_Java
+      // SIGILL must be caused by VM_Version::determine_features().
+      VM_Version::set_supports_cpucfg(false);
+      stub = pc + 4;  // continue with next instruction.
+    } else if (thread->thread_state() == _thread_in_vm &&
+               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
+               thread->doing_unsafe_access()) {
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print_cr("SIGBUS in vm thread \n");
+#endif
+      stub = StubRoutines::handler_for_unsafe_access();
+    }
+
+    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
+    // and the heap gets shrunk before the field access.
+    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print("jni fast get trap: ");
+#endif
+      address addr = JNI_FastGetField::find_slowcase_pc(pc);
+      if (addr != (address)-1) {
+        stub = addr;
+      }
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print_cr("addr = %d, stub = %lx", addr, stub);
+#endif
+    }
+
+    // Check to see if we caught the safepoint code in the
+    // process of write protecting the memory serialization page.
+    // It write enables the page immediately after protecting it
+    // so we can just return to retry the write.
+    if ((sig == SIGSEGV) &&
+        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
+#ifdef PRINT_SIGNAL_HANDLE
+      tty->print("write protecting the memory serialiazation page\n");
+#endif
+      // Block current thread until the memory serialize page permission restored.
+      os::block_on_serialize_page_trap();
+      return true;
+    }
+  }
+
+  // Execution protection violation
+  //
+  // This should be kept as the last step in the triage.  We don't
+  // have a dedicated trap number for a no-execute fault, so be
+  // conservative and allow other handlers the first shot.
+  //
+  // Note: We don't test that info->si_code == SEGV_ACCERR here.
+  // this si_code is so generic that it is almost meaningless; and
+  // the si_code for this condition may change in the future.
+  // Furthermore, a false-positive should be harmless.
+  if (UnguardOnExecutionViolation > 0 &&
+      //(sig == SIGSEGV || sig == SIGBUS) &&
+      //uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) {
+    (sig == SIGSEGV || sig == SIGBUS
+#ifdef OPT_RANGECHECK
+     || sig == SIGSYS
+#endif
+    ) &&
+      //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) {
+      (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) {
+#ifdef PRINT_SIGNAL_HANDLE
+    tty->print_cr("execution protection violation\n");
+#endif
+
+    int page_size = os::vm_page_size();
+    address addr = (address) info->si_addr;
+    address pc = os::Linux::ucontext_get_pc(uc);
+    // Make sure the pc and the faulting address are sane.
+    //
+    // If an instruction spans a page boundary, and the page containing
+    // the beginning of the instruction is executable but the following
+    // page is not, the pc and the faulting address might be slightly
+    // different - we still want to unguard the 2nd page in this case.
+    //
+    // 15 bytes seems to be a (very) safe value for max instruction size.
+    bool pc_is_near_addr =
+      (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15);
+    bool instr_spans_page_boundary =
+      (align_size_down((intptr_t) pc ^ (intptr_t) addr,
+                       (intptr_t) page_size) > 0);
+
+    if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) {
+      static volatile address last_addr =
+        (address) os::non_memory_address_word();
+
+      // In conservative mode, don't unguard unless the address is in the VM
+      if (addr != last_addr &&
+          (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) {
+
+        // Set memory to RWX and retry
+        address page_start =
+          (address) align_size_down((intptr_t) addr, (intptr_t) page_size);
+        bool res = os::protect_memory((char*) page_start, page_size,
+                                      os::MEM_PROT_RWX);
+
+        if (PrintMiscellaneous && Verbose) {
+          char buf[256];
+          jio_snprintf(buf, sizeof(buf), "Execution protection violation "
+                       "at " INTPTR_FORMAT
+                       ", unguarding " INTPTR_FORMAT ": %s, errno=%d", addr,
+                       page_start, (res ? "success" : "failed"), errno);
+          tty->print_raw_cr(buf);
+        }
+        stub = pc;
+
+        // Set last_addr so if we fault again at the same address, we don't end
+        // up in an endless loop.
+        //
+        // There are two potential complications here.  Two threads trapping at
+        // the same address at the same time could cause one of the threads to
+        // think it already unguarded, and abort the VM.  Likely very rare.
+        //
+        // The other race involves two threads alternately trapping at
+        // different addresses and failing to unguard the page, resulting in
+        // an endless loop.  This condition is probably even more unlikely than
+        // the first.
+        //
+        // Although both cases could be avoided by using locks or thread local
+        // last_addr, these solutions are unnecessary complication: this
+        // handler is a best-effort safety net, not a complete solution.  It is
+        // disabled by default and should only be used as a workaround in case
+        // we missed any no-execute-unsafe VM code.
+
+        last_addr = addr;
+      }
+    }
+  }
+
+  if (stub != NULL) {
+#ifdef PRINT_SIGNAL_HANDLE
+    tty->print_cr("resolved stub=%lx\n",stub);
+#endif
+    // save all thread context in case we need to restore it
+    if (thread != NULL) thread->set_saved_exception_pc(pc);
+
+    uc->uc_mcontext.pc = (greg_t)stub;
+    return true;
+  }
+
+  // signal-chaining
+  if (os::Linux::chained_handler(sig, info, ucVoid)) {
+#ifdef PRINT_SIGNAL_HANDLE
+     tty->print_cr("signal chaining\n");
+#endif
+     return true;
+  }
+
+  if (!abort_if_unrecognized) {
+#ifdef PRINT_SIGNAL_HANDLE
+    tty->print_cr("abort becauce of unrecognized\n");
+#endif
+    // caller wants another chance, so give it to him
+    return false;
+  }
+
+  if (pc == NULL && uc != NULL) {
+    pc = os::Linux::ucontext_get_pc(uc);
+  }
+
+  // unmask current signal
+  sigset_t newset;
+  sigemptyset(&newset);
+  sigaddset(&newset, sig);
+  sigprocmask(SIG_UNBLOCK, &newset, NULL);
+#ifdef PRINT_SIGNAL_HANDLE
+  tty->print_cr("VMError in signal handler\n");
+#endif
+  VMError err(t, sig, pc, info, ucVoid);
+  err.report_and_die();
+
+  ShouldNotReachHere();
+  return true; // Mute compiler
+}
+
+// FCSR:...|24| 23 |22|21|...
+//      ...|FS|FCC0|FO|FN|...
+void os::Linux::init_thread_fpu_state(void) {
+  if (SetFSFOFN == 999)
+    return;
+  int fs = (SetFSFOFN / 100)? 1:0;
+  int fo = ((SetFSFOFN % 100) / 10)? 1:0;
+  int fn = (SetFSFOFN % 10)? 1:0;
+  int mask = fs << 24 | fo << 22 | fn << 21;
+
+  int fcsr = get_fpu_control_word();
+  fcsr = fcsr | mask;
+  set_fpu_control_word(fcsr);
+  /*
+  if (fcsr != get_fpu_control_word())
+    tty->print_cr(" fail to set to %lx, get_fpu_control_word:%lx", fcsr, get_fpu_control_word());
+  */
+}
+
+int os::Linux::get_fpu_control_word(void) {
+  int fcsr;
+  __asm__ __volatile__ (
+      ".set noat;"
+      "daddiu  %0, $0, 0;"
+      "cfc1 %0, $31;"
+      : "=r" (fcsr)
+      );
+  return fcsr;
+}
+
+void os::Linux::set_fpu_control_word(int fpu_control) {
+  __asm__ __volatile__ (
+      ".set noat;"
+      "ctc1 %0, $31;"
+      :
+      : "r" (fpu_control)
+      );
+}
+
+bool os::is_allocatable(size_t bytes) {
+
+  if (bytes < 2 * G) {
+    return true;
+  }
+
+  char* addr = reserve_memory(bytes, NULL);
+
+  if (addr != NULL) {
+    release_memory(addr, bytes);
+  }
+
+  return addr != NULL;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// thread stack
+
+size_t os::Linux::min_stack_allowed  = 96 * K;
+
+
+// Test if pthread library can support variable thread stack size. LinuxThreads
+// in fixed stack mode allocates 2M fixed slot for each thread. LinuxThreads
+// in floating stack mode and NPTL support variable stack size.
+bool os::Linux::supports_variable_stack_size() {
+  if (os::Linux::is_NPTL()) {
+     // NPTL, yes
+     return true;
+
+  } else {
+    // Note: We can't control default stack size when creating a thread.
+    // If we use non-default stack size (pthread_attr_setstacksize), both
+    // floating stack and non-floating stack LinuxThreads will return the
+    // same value. This makes it impossible to implement this function by
+    // detecting thread stack size directly.
+    //
+    // An alternative approach is to check %gs. Fixed-stack LinuxThreads
+    // do not use %gs, so its value is 0. Floating-stack LinuxThreads use
+    // %gs (either as LDT selector or GDT selector, depending on kernel)
+    // to access thread specific data.
+    //
+    // Note that %gs is a reserved glibc register since early 2001, so
+    // applications are not allowed to change its value (Ulrich Drepper from
+    // Redhat confirmed that all known offenders have been modified to use
+    // either %fs or TSD). In the worst case scenario, when VM is embedded in
+    // a native application that plays with %gs, we might see non-zero %gs
+    // even LinuxThreads is running in fixed stack mode. As the result, we'll
+    // return true and skip _thread_safety_check(), so we may not be able to
+    // detect stack-heap collisions. But otherwise it's harmless.
+    //
+    return false;
+  }
+}
+
+// return default stack size for thr_type
+size_t os::Linux::default_stack_size(os::ThreadType thr_type) {
+  // default stack size (compiler thread needs larger stack)
+  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
+  return s;
+}
+
+size_t os::Linux::default_guard_size(os::ThreadType thr_type) {
+  // Creating guard page is very expensive. Java thread has HotSpot
+  // guard page, only enable glibc guard page for non-Java threads.
+  return (thr_type == java_thread ? 0 : page_size());
+}
+
+// Java thread:
+//
+//   Low memory addresses
+//    +------------------------+
+//    |                        |\  JavaThread created by VM does not have glibc
+//    |    glibc guard page    | - guard, attached Java thread usually has
+//    |                        |/  1 page glibc guard.
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+//    |                        |\
+//    |  HotSpot Guard Pages   | - red and yellow pages
+//    |                        |/
+//    +------------------------+ JavaThread::stack_yellow_zone_base()
+//    |                        |\
+//    |      Normal Stack      | -
+//    |                        |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// Non-Java thread:
+//
+//   Low memory addresses
+//    +------------------------+
+//    |                        |\
+//    |  glibc guard page      | - usually 1 page
+//    |                        |/
+// P1 +------------------------+ Thread::stack_base() - Thread::stack_size()
+//    |                        |\
+//    |      Normal Stack      | -
+//    |                        |/
+// P2 +------------------------+ Thread::stack_base()
+//
+// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from
+//    pthread_attr_getstack()
+
+static void current_stack_region(address * bottom, size_t * size) {
+  if (os::is_primordial_thread()) {
+     // primordial thread needs special handling because pthread_getattr_np()
+     // may return bogus value.
+     *bottom = os::Linux::initial_thread_stack_bottom();
+     *size   = os::Linux::initial_thread_stack_size();
+  } else {
+     pthread_attr_t attr;
+
+     int rslt = pthread_getattr_np(pthread_self(), &attr);
+
+     // JVM needs to know exact stack location, abort if it fails
+     if (rslt != 0) {
+       if (rslt == ENOMEM) {
+         vm_exit_out_of_memory(0, OOM_MMAP_ERROR, "pthread_getattr_np");
+       } else {
+         fatal(err_msg("pthread_getattr_np failed with errno = %d", rslt));
+       }
+     }
+
+     if (pthread_attr_getstack(&attr, (void **)bottom, size) != 0) {
+         fatal("Can not locate current stack attributes!");
+     }
+
+     pthread_attr_destroy(&attr);
+
+  }
+  assert(os::current_stack_pointer() >= *bottom &&
+         os::current_stack_pointer() < *bottom + *size, "just checking");
+}
+
+address os::current_stack_base() {
+  address bottom;
+  size_t size;
+  current_stack_region(&bottom, &size);
+  return (bottom + size);
+}
+
+size_t os::current_stack_size() {
+  // stack size includes normal stack and HotSpot guard pages
+  address bottom;
+  size_t size;
+  current_stack_region(&bottom, &size);
+  return size;
+}
+
+/////////////////////////////////////////////////////////////////////////////
+// helper functions for fatal error handler
+void os::print_register_info(outputStream *st, void *context) {
+  if (context == NULL) return;
+
+  ucontext_t *uc = (ucontext_t*)context;
+
+  st->print_cr("Register to memory mapping:");
+  st->cr();
+  // this is horrendously verbose but the layout of the registers in the
+  //   // context does not match how we defined our abstract Register set, so
+  //     // we can't just iterate through the gregs area
+  //
+  //       // this is only for the "general purpose" registers
+  st->print("R0=" ); print_location(st, uc->uc_mcontext.gregs[0]);
+  st->print("AT=" ); print_location(st, uc->uc_mcontext.gregs[1]);
+  st->print("V0=" ); print_location(st, uc->uc_mcontext.gregs[2]);
+  st->print("V1=" ); print_location(st, uc->uc_mcontext.gregs[3]);
+  st->cr();
+  st->print("A0=" ); print_location(st, uc->uc_mcontext.gregs[4]);
+  st->print("A1=" ); print_location(st, uc->uc_mcontext.gregs[5]);
+  st->print("A2=" ); print_location(st, uc->uc_mcontext.gregs[6]);
+  st->print("A3=" ); print_location(st, uc->uc_mcontext.gregs[7]);
+  st->cr();
+  st->print("A4=" ); print_location(st, uc->uc_mcontext.gregs[8]);
+  st->print("A5=" ); print_location(st, uc->uc_mcontext.gregs[9]);
+  st->print("A6=" ); print_location(st, uc->uc_mcontext.gregs[10]);
+  st->print("A7=" ); print_location(st, uc->uc_mcontext.gregs[11]);
+  st->cr();
+  st->print("T0=" ); print_location(st, uc->uc_mcontext.gregs[12]);
+  st->print("T1=" ); print_location(st, uc->uc_mcontext.gregs[13]);
+  st->print("T2=" ); print_location(st, uc->uc_mcontext.gregs[14]);
+  st->print("T3=" ); print_location(st, uc->uc_mcontext.gregs[15]);
+  st->cr();
+  st->print("S0=" ); print_location(st, uc->uc_mcontext.gregs[16]);
+  st->print("S1=" ); print_location(st, uc->uc_mcontext.gregs[17]);
+  st->print("S2=" ); print_location(st, uc->uc_mcontext.gregs[18]);
+  st->print("S3=" ); print_location(st, uc->uc_mcontext.gregs[19]);
+  st->cr();
+  st->print("S4=" ); print_location(st, uc->uc_mcontext.gregs[20]);
+  st->print("S5=" ); print_location(st, uc->uc_mcontext.gregs[21]);
+  st->print("S6=" ); print_location(st, uc->uc_mcontext.gregs[22]);
+  st->print("S7=" ); print_location(st, uc->uc_mcontext.gregs[23]);
+  st->cr();
+  st->print("T8=" ); print_location(st, uc->uc_mcontext.gregs[24]);
+  st->print("T9=" ); print_location(st, uc->uc_mcontext.gregs[25]);
+  st->print("K0=" ); print_location(st, uc->uc_mcontext.gregs[26]);
+  st->print("K1=" ); print_location(st, uc->uc_mcontext.gregs[27]);
+  st->cr();
+  st->print("GP=" ); print_location(st, uc->uc_mcontext.gregs[28]);
+  st->print("SP=" ); print_location(st, uc->uc_mcontext.gregs[29]);
+  st->print("FP=" ); print_location(st, uc->uc_mcontext.gregs[30]);
+  st->print("RA=" ); print_location(st, uc->uc_mcontext.gregs[31]);
+  st->cr();
+
+}
+void os::print_context(outputStream *st, void *context) {
+  if (context == NULL) return;
+
+  ucontext_t *uc = (ucontext_t*)context;
+  st->print_cr("Registers:");
+  st->print(  "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]);
+  st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]);
+  st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]);
+  st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]);
+  st->cr();
+  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]);
+  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]);
+  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]);
+  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]);
+  st->cr();
+  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]);
+  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]);
+  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]);
+  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]);
+  st->cr();
+  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]);
+  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]);
+  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]);
+  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]);
+  st->cr();
+  st->print(  "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]);
+  st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]);
+  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]);
+  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]);
+  st->cr();
+  st->print(  "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]);
+  st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]);
+  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]);
+  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]);
+  st->cr();
+  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]);
+  st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]);
+  st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]);
+  st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]);
+  st->cr();
+  st->print(  "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]);
+  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]);
+  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]);
+  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]);
+  st->cr();
+  st->cr();
+
+  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
+  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
+  //print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t));
+  print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t));
+  st->cr();
+
+  // Note: it may be unsafe to inspect memory near pc. For example, pc may
+  // point to garbage if entry point in an nmethod is corrupted. Leave
+  // this at the end, and hope for the best.
+  address pc = os::Linux::ucontext_get_pc(uc);
+  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
+  print_hex_dump(st, pc - 64, pc + 64, sizeof(char));
+  Disassembler::decode(pc - 80, pc + 80, st);
+}
+
+void os::setup_fpu() {
+  /*
+  //no use for MIPS
+  int fcsr;
+  address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std();
+  __asm__ __volatile__ (
+      ".set noat;"
+      "cfc1 %0, $31;"
+      "sw   %0, 0(%1);"
+      : "=r" (fcsr)
+      : "r" (fpu_cntrl)
+      : "memory"
+  );
+  printf("fpu_cntrl:  %lx\n", fpu_cntrl);
+  */
+}
+
+#ifndef PRODUCT
+void os::verify_stack_alignment() {
+  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
+}
+#endif
+
+bool os::is_ActiveCoresMP() {
+  return UseActiveCoresMP && _initial_active_processor_count == 1;
+}
diff --git a/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp
new file mode 100644
index 00000000000..c07d08156f2
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/os_linux_mips.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
+#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
+
+  static void setup_fpu();
+  static bool is_allocatable(size_t bytes);
+  static intptr_t *get_previous_fp();
+
+  // Used to register dynamic code cache area with the OS
+  // Note: Currently only used in 64 bit Windows implementations
+  static bool register_code_area(char *low, char *high) { return true; }
+
+  static bool is_ActiveCoresMP();
+
+#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
diff --git a/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp b/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp
new file mode 100644
index 00000000000..93490345f0b
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/prefetch_linux_mips.inline.hpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
+#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
+
+
+inline void Prefetch::read (void *loc, intx interval) {
+        // 'pref' is implemented as NOP in Loongson 3A
+        __asm__ __volatile__ (
+                        "               .set push\n"
+                        "               .set mips32\n"
+                        "               .set noreorder\n"
+                        "       pref  0, 0(%[__loc]) \n"
+                        "       .set pop\n"
+                        : [__loc] "=&r"(loc)
+                        :
+                        : "memory"
+                        );
+}
+
+inline void Prefetch::write(void *loc, intx interval) {
+        __asm__ __volatile__ (
+                        "               .set push\n"
+                        "               .set mips32\n"
+                        "               .set noreorder\n"
+                        "       pref  1, 0(%[__loc]) \n"
+                        "       .set pop\n"
+                        : [__loc] "=&r"(loc)
+                        :
+                        : "memory"
+                        );
+
+}
+
+#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
diff --git a/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp
new file mode 100644
index 00000000000..be28a562a1e
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/thread.inline.hpp"
+#include "runtime/threadLocalStorage.hpp"
+
+// Map stack pointer (%esp) to thread pointer for faster TLS access
+//
+// Here we use a flat table for better performance. Getting current thread
+// is down to one memory access (read _sp_map[%esp>>12]) in generated code
+// and two in runtime code (-fPIC code needs an extra load for _sp_map).
+//
+// This code assumes stack page is not shared by different threads. It works
+// in 32-bit VM when page size is 4K (or a multiple of 4K, if that matters).
+//
+// Notice that _sp_map is allocated in the bss segment, which is ZFOD
+// (zero-fill-on-demand). While it reserves 4M address space upfront,
+// actual memory pages are committed on demand.
+//
+// If an application creates and destroys a lot of threads, usually the
+// stack space freed by a thread will soon get reused by new thread
+// (this is especially true in NPTL or LinuxThreads in fixed-stack mode).
+// No memory page in _sp_map is wasted.
+//
+// However, it's still possible that we might end up populating &
+// committing a large fraction of the 4M table over time, but the actual
+// amount of live data in the table could be quite small. The max wastage
+// is less than 4M bytes. If it becomes an issue, we could use madvise()
+// with MADV_DONTNEED to reclaim unused (i.e. all-zero) pages in _sp_map.
+// MADV_DONTNEED on Linux keeps the virtual memory mapping, but zaps the
+// physical memory page (i.e. similar to MADV_FREE on Solaris).
+
+#ifdef MINIMIZE_RAM_USAGE
+Thread* ThreadLocalStorage::_sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];
+#endif // MINIMIZE_RAM_USAGE
+
+void ThreadLocalStorage::generate_code_for_get_thread() {
+    // nothing we can do here for user-level thread
+}
+
+void ThreadLocalStorage::pd_init() {
+#ifdef MINIMIZE_RAM_USAGE
+  assert(align_size_down(os::vm_page_size(), PAGE_SIZE) == os::vm_page_size(),
+         "page size must be multiple of PAGE_SIZE");
+#endif // MINIMIZE_RAM_USAGE
+}
+
+void ThreadLocalStorage::pd_set_thread(Thread* thread) {
+  os::thread_local_storage_at_put(ThreadLocalStorage::thread_index(), thread);
+#ifdef MINIMIZE_RAM_USAGE
+  address stack_top = os::current_stack_base();
+  size_t stack_size = os::current_stack_size();
+
+  for (address p = stack_top - stack_size; p < stack_top; p += PAGE_SIZE) {
+    int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1);
+    assert(thread == NULL || _sp_map[index] == NULL || thread == _sp_map[index],
+           "thread exited without detaching from VM??");
+    _sp_map[index] = thread;
+  }
+#endif // MINIMIZE_RAM_USAGE
+}
diff --git a/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp
new file mode 100644
index 00000000000..e595195e213
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/threadLS_linux_mips.hpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP
+#define OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP
+
+#ifdef MINIMIZE_RAM_USAGE
+  // Processor dependent parts of ThreadLocalStorage
+  //only the low 2G space for user program in Linux
+
+  #define SP_BITLENGTH  34
+  #define PAGE_SHIFT    14
+  #define PAGE_SIZE     (1UL << PAGE_SHIFT)
+
+  static Thread* _sp_map[1UL << (SP_BITLENGTH - PAGE_SHIFT)];
+  static int _sp_map_low;
+  static int _sp_map_high;
+#endif // MINIMIZE_RAM_USAGE
+
+public:
+#ifdef MINIMIZE_RAM_USAGE
+  static Thread** sp_map_addr() { return _sp_map; }
+#endif // MINIMIZE_RAM_USAGE
+
+  static Thread* thread() {
+#ifdef MINIMIZE_RAM_USAGE
+    /* Thread::thread() can also be optimized in the same way as __get_thread() */
+    //return (Thread*) os::thread_local_storage_at(thread_index());
+    uintptr_t sp;
+    uintptr_t mask = (1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1;
+
+    __asm__ __volatile__ ("daddiu %0, $29, 0 " : "=r" (sp));
+
+    return _sp_map[(sp >> PAGE_SHIFT) & mask];
+#else
+    return (Thread*) os::thread_local_storage_at(thread_index());
+#endif // MINIMIZE_RAM_USAGE
+  }
+#endif // OS_CPU_LINUX_MIPS_VM_THREADLS_LINUX_MIPS_HPP
diff --git a/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp
new file mode 100644
index 00000000000..44f666d61f3
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.cpp
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/thread.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+void JavaThread::pd_initialize()
+{
+    _anchor.clear();
+}
+
+// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
+// currently interrupted by SIGPROF
+bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
+  void* ucontext, bool isInJava) {
+
+  assert(Thread::current() == this, "caller must be current thread");
+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
+  return pd_get_top_frame(fr_addr, ucontext, isInJava);
+}
+
+bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
+  assert(this->is_Java_thread(), "must be JavaThread");
+  JavaThread* jt = (JavaThread *)this;
+
+  // If we have a last_Java_frame, then we should use it even if
+  // isInJava == true.  It should be more reliable than ucontext info.
+  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
+    *fr_addr = jt->pd_last_frame();
+    return true;
+  }
+
+  // At this point, we don't have a last_Java_frame, so
+  // we try to glean some information out of the ucontext
+  // if we were running Java code when SIGPROF came in.
+  if (isInJava) {
+    ucontext_t* uc = (ucontext_t*) ucontext;
+
+    intptr_t* ret_fp;
+    intptr_t* ret_sp;
+    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
+      &ret_sp, &ret_fp);
+    if (addr.pc() == NULL || ret_sp == NULL ) {
+      // ucontext wasn't useful
+      return false;
+    }
+
+    frame ret_frame(ret_sp, ret_fp, addr.pc());
+    if (!ret_frame.safe_for_sender(jt)) {
+#ifdef COMPILER2
+      // C2 uses ebp as a general register see if NULL fp helps
+      frame ret_frame2(ret_sp, NULL, addr.pc());
+      if (!ret_frame2.safe_for_sender(jt)) {
+        // nothing else to try if the frame isn't good
+        return false;
+      }
+      ret_frame = ret_frame2;
+#else
+      // nothing else to try if the frame isn't good
+      return false;
+#endif /* COMPILER2 */
+    }
+    *fr_addr = ret_frame;
+    return true;
+  }
+
+  // nothing else to try
+  return false;
+}
+
+void JavaThread::cache_global_variables() { }
+
diff --git a/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp
new file mode 100644
index 00000000000..cb11c36ae50
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/thread_linux_mips.hpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
+#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
+
+ private:
+  void pd_initialize();
+
+  frame pd_last_frame() {
+    assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
+    if (_anchor.last_Java_pc() != NULL) {
+      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
+    } else {
+      // This will pick up pc from sp
+      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
+    }
+  }
+
+
+ public:
+  // Mutators are highly dangerous....
+  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
+  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
+
+  void set_base_of_stack_pointer(intptr_t* base_sp) {
+  }
+
+  static ByteSize last_Java_fp_offset()          {
+    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
+  }
+
+  intptr_t* base_of_stack_pointer() {
+    return NULL;
+  }
+  void record_base_of_stack_pointer() {
+  }
+
+  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
+    bool isInJava);
+
+  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
+private:
+  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
+public:
+
+  // These routines are only used on cpu architectures that
+  // have separate register stacks (Itanium).
+  static bool register_stack_overflow() { return false; }
+  static void enable_register_stack_guard() {}
+  static void disable_register_stack_guard() {}
+
+#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
diff --git a/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp b/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp
new file mode 100644
index 00000000000..b7454bf045a
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/vmStructs_linux_mips.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
+#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
+
+// These are the OS and CPU-specific fields, types and integer
+// constants required by the Serviceability Agent. This file is
+// referenced by vmStructs.cpp.
+
+#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+                                                                                                                                     \
+  /******************************/                                                                                                   \
+  /* Threads (NOTE: incomplete) */                                                                                                   \
+  /******************************/                                                                                                   \
+  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
+  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
+
+
+#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+                                                                          \
+  /**********************/                                                \
+  /* Posix Thread IDs   */                                                \
+  /**********************/                                                \
+                                                                          \
+  declare_integer_type(pid_t)                                             \
+  declare_unsigned_integer_type(pthread_t)
+
+#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+
+#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
diff --git a/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp b/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp
new file mode 100644
index 00000000000..ce697823b99
--- /dev/null
+++ b/hotspot/src/os_cpu/linux_mips/vm/vm_version_linux_mips.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "runtime/os.hpp"
+#include "vm_version_mips.hpp"
diff --git a/hotspot/src/share/tools/hsdis/Makefile b/hotspot/src/share/tools/hsdis/Makefile
index 0d1b608944f..a9754ce2acf 100644
--- a/hotspot/src/share/tools/hsdis/Makefile
+++ b/hotspot/src/share/tools/hsdis/Makefile
@@ -105,12 +105,25 @@ CFLAGS/sparc	+= -m32
 endif
 CFLAGS		+= $(CFLAGS/$(ARCH))
 CFLAGS		+= -fPIC
+ifeq ($(ARCH), mips64)
+CPUINFO = $(shell cat /proc/cpuinfo)
+ifneq ($(findstring Loongson,$(CPUINFO)),)
+CFLAGS += -DLOONGSON
+endif
+endif
 OS		= linux
 LIB_EXT		= .so
 CC 		= gcc
 endif
 CFLAGS		+= -O
 DLDFLAGS	+= -shared
+ifeq ($(ARCH), mips64)
+DLDFLAGS	+= -Wl,-z,noexecstack
+endif
+ifeq ($(ARCH), loongarch64)
+DLDFLAGS        += -Wl,-z,noexecstack
+CONFIGURE_ARGS  += --disable-werror
+endif
 LDFLAGS         += -ldl
 OUTFLAGS	+= -o $@
 else
diff --git a/hotspot/src/share/tools/hsdis/hsdis.c b/hotspot/src/share/tools/hsdis/hsdis.c
index 4fb49648704..f6ef5bea151 100644
--- a/hotspot/src/share/tools/hsdis/hsdis.c
+++ b/hotspot/src/share/tools/hsdis/hsdis.c
@@ -493,6 +493,16 @@ static const char* native_arch_name() {
 #if defined(LIBARCH_ppc64) || defined(LIBARCH_ppc64le)
   res = "powerpc:common64";
 #endif
+#ifdef LIBARCH_mips64
+#ifdef LOONGSON
+  res = "mips:loongson_3a";
+#else
+  res = "mips:isa64";
+#endif
+#endif
+#ifdef LIBARCH_loongarch64
+  res = "loongarch";
+#endif
 #ifdef LIBARCH_aarch64
   res = "aarch64";
 #endif
diff --git a/hotspot/src/share/vm/adlc/main.cpp b/hotspot/src/share/vm/adlc/main.cpp
index 52044f12d40..50c585872ea 100644
--- a/hotspot/src/share/vm/adlc/main.cpp
+++ b/hotspot/src/share/vm/adlc/main.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 // MAIN.CPP - Entry point for the Architecture Description Language Compiler
 #include "adlc.hpp"
 
@@ -234,6 +240,14 @@ int main(int argc, char *argv[])
   AD.addInclude(AD._CPP_file, "nativeInst_x86.hpp");
   AD.addInclude(AD._CPP_file, "vmreg_x86.inline.hpp");
 #endif
+#ifdef TARGET_ARCH_mips
+  AD.addInclude(AD._CPP_file, "nativeInst_mips.hpp");
+  AD.addInclude(AD._CPP_file, "vmreg_mips.inline.hpp");
+#endif
+#ifdef TARGET_ARCH_loongarch
+  AD.addInclude(AD._CPP_file, "nativeInst_loongarch.hpp");
+  AD.addInclude(AD._CPP_file, "vmreg_loongarch.inline.hpp");
+#endif
 #ifdef TARGET_ARCH_aarch64
   AD.addInclude(AD._CPP_file, "assembler_aarch64.inline.hpp");
   AD.addInclude(AD._CPP_file, "nativeInst_aarch64.hpp");
diff --git a/hotspot/src/share/vm/asm/assembler.hpp b/hotspot/src/share/vm/asm/assembler.hpp
index f7f1ae1d367..572aa997cac 100644
--- a/hotspot/src/share/vm/asm/assembler.hpp
+++ b/hotspot/src/share/vm/asm/assembler.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_ASM_ASSEMBLER_HPP
 #define SHARE_VM_ASM_ASSEMBLER_HPP
 
@@ -53,6 +59,14 @@
 # include "register_ppc.hpp"
 # include "vm_version_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "register_mips.hpp"
+# include "vm_version_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "register_loongarch.hpp"
+# include "vm_version_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "register_aarch64.hpp"
 # include "vm_version_aarch64.hpp"
@@ -468,6 +482,12 @@ class AbstractAssembler : public ResourceObj  {
 #ifdef TARGET_ARCH_ppc
 # include "assembler_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "assembler_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "assembler_loongarch.hpp"
+#endif
 
 
 #endif // SHARE_VM_ASM_ASSEMBLER_HPP
diff --git a/hotspot/src/share/vm/asm/assembler.inline.hpp b/hotspot/src/share/vm/asm/assembler.inline.hpp
index 1a48cb3171d..8ac90e14740 100644
--- a/hotspot/src/share/vm/asm/assembler.inline.hpp
+++ b/hotspot/src/share/vm/asm/assembler.inline.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_ASM_ASSEMBLER_INLINE_HPP
 #define SHARE_VM_ASM_ASSEMBLER_INLINE_HPP
 
@@ -42,6 +48,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "assembler_ppc.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "assembler_mips.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "assembler_loongarch.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "assembler_aarch64.inline.hpp"
 #endif
diff --git a/hotspot/src/share/vm/asm/codeBuffer.cpp b/hotspot/src/share/vm/asm/codeBuffer.cpp
index d94ac406555..f6b578111f3 100644
--- a/hotspot/src/share/vm/asm/codeBuffer.cpp
+++ b/hotspot/src/share/vm/asm/codeBuffer.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2023. These
+ * modifications are Copyright (c) 2015, 2023, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "asm/codeBuffer.hpp"
 #include "compiler/disassembler.hpp"
@@ -323,6 +329,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format)
     assert(rtype == relocInfo::none              ||
            rtype == relocInfo::runtime_call_type ||
            rtype == relocInfo::internal_word_type||
+           NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||))
            rtype == relocInfo::section_word_type ||
            rtype == relocInfo::external_word_type,
            "code needs relocation information");
diff --git a/hotspot/src/share/vm/asm/codeBuffer.hpp b/hotspot/src/share/vm/asm/codeBuffer.hpp
index 02b619ad77f..c04560a0bc1 100644
--- a/hotspot/src/share/vm/asm/codeBuffer.hpp
+++ b/hotspot/src/share/vm/asm/codeBuffer.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_ASM_CODEBUFFER_HPP
 #define SHARE_VM_ASM_CODEBUFFER_HPP
 
@@ -635,6 +641,12 @@ class CodeBuffer: public StackObj {
 #ifdef TARGET_ARCH_ppc
 # include "codeBuffer_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "codeBuffer_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "codeBuffer_loongarch.hpp"
+#endif
 
 };
 
diff --git a/hotspot/src/share/vm/asm/macroAssembler.hpp b/hotspot/src/share/vm/asm/macroAssembler.hpp
index 1482eb630b1..0be415b6c50 100644
--- a/hotspot/src/share/vm/asm/macroAssembler.hpp
+++ b/hotspot/src/share/vm/asm/macroAssembler.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_ASM_MACROASSEMBLER_HPP
 #define SHARE_VM_ASM_MACROASSEMBLER_HPP
 
@@ -45,5 +51,10 @@
 #ifdef TARGET_ARCH_aarch64
 # include "macroAssembler_aarch64.hpp"
 #endif
-
+#ifdef TARGET_ARCH_mips
+# include "macroAssembler_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "macroAssembler_loongarch.hpp"
+#endif
 #endif // SHARE_VM_ASM_MACROASSEMBLER_HPP
diff --git a/hotspot/src/share/vm/asm/macroAssembler.inline.hpp b/hotspot/src/share/vm/asm/macroAssembler.inline.hpp
index db3daa52e9a..6f4e523c595 100644
--- a/hotspot/src/share/vm/asm/macroAssembler.inline.hpp
+++ b/hotspot/src/share/vm/asm/macroAssembler.inline.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP
 #define SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP
 
@@ -42,6 +48,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "macroAssembler_ppc.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "macroAssembler_mips.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "macroAssembler_loongarch.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "macroAssembler_aarch64.inline.hpp"
 #endif
diff --git a/hotspot/src/share/vm/asm/register.hpp b/hotspot/src/share/vm/asm/register.hpp
index c500890181a..6a20929e590 100644
--- a/hotspot/src/share/vm/asm/register.hpp
+++ b/hotspot/src/share/vm/asm/register.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_ASM_REGISTER_HPP
 #define SHARE_VM_ASM_REGISTER_HPP
 
@@ -108,6 +114,12 @@ const type name = ((type)name##_##type##EnumValue)
 #ifdef TARGET_ARCH_ppc
 # include "register_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "register_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "register_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "register_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/c1/c1_Defs.hpp b/hotspot/src/share/vm/c1/c1_Defs.hpp
index b0cd7637399..b42b9de1b55 100644
--- a/hotspot/src/share/vm/c1/c1_Defs.hpp
+++ b/hotspot/src/share/vm/c1/c1_Defs.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_C1_C1_DEFS_HPP
 #define SHARE_VM_C1_C1_DEFS_HPP
 
@@ -29,6 +35,9 @@
 #ifdef TARGET_ARCH_x86
 # include "register_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "register_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "register_aarch64.hpp"
 #endif
@@ -56,6 +65,9 @@ enum {
 #ifdef TARGET_ARCH_x86
 # include "c1_Defs_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "c1_Defs_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "c1_Defs_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp b/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp
index f07e97a4d32..6bc367a8974 100644
--- a/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp
+++ b/hotspot/src/share/vm/c1/c1_FpuStackSim.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_C1_C1_FPUSTACKSIM_HPP
 #define SHARE_VM_C1_C1_FPUSTACKSIM_HPP
 
@@ -35,6 +41,9 @@ class FpuStackSim;
 #ifdef TARGET_ARCH_x86
 # include "c1_FpuStackSim_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "c1_FpuStackSim_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "c1_FpuStackSim_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/c1/c1_FrameMap.cpp b/hotspot/src/share/vm/c1/c1_FrameMap.cpp
index 1dac94d58cf..b1e37ec41c1 100644
--- a/hotspot/src/share/vm/c1/c1_FrameMap.cpp
+++ b/hotspot/src/share/vm/c1/c1_FrameMap.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "c1/c1_FrameMap.hpp"
 #include "c1/c1_LIR.hpp"
@@ -29,6 +35,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vmreg_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "vmreg_loongarch.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "vmreg_aarch64.inline.hpp"
 #endif
diff --git a/hotspot/src/share/vm/c1/c1_FrameMap.hpp b/hotspot/src/share/vm/c1/c1_FrameMap.hpp
index 41571e3d168..c0e7b28ea47 100644
--- a/hotspot/src/share/vm/c1/c1_FrameMap.hpp
+++ b/hotspot/src/share/vm/c1/c1_FrameMap.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_C1_C1_FRAMEMAP_HPP
 #define SHARE_VM_C1_C1_FRAMEMAP_HPP
 
@@ -85,6 +91,9 @@ class FrameMap : public CompilationResourceObj {
 #ifdef TARGET_ARCH_x86
 # include "c1_FrameMap_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "c1_FrameMap_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "c1_FrameMap_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/c1/c1_LIR.cpp b/hotspot/src/share/vm/c1/c1_LIR.cpp
index fa37e7a046e..5d33d3f7a0f 100644
--- a/hotspot/src/share/vm/c1/c1_LIR.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIR.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "c1/c1_InstructionPrinter.hpp"
 #include "c1/c1_LIR.hpp"
@@ -79,6 +85,17 @@ FloatRegister LIR_OprDesc::as_double_reg() const {
 
 #endif
 
+#if defined(LOONGARCH64)
+
+FloatRegister LIR_OprDesc::as_float_reg() const {
+  return as_FloatRegister(fpu_regnr());
+}
+
+FloatRegister LIR_OprDesc::as_double_reg() const {
+  return as_FloatRegister(fpu_regnrLo());
+}
+
+#endif
 
 LIR_Opr LIR_OprFact::illegalOpr = LIR_OprFact::illegal();
 
@@ -149,13 +166,19 @@ void LIR_Address::verify0() const {
 #endif
 #ifdef _LP64
   assert(base()->is_cpu_register(), "wrong base operand");
-#ifndef AARCH64
+#if !defined(AARCH64) && !defined(LOONGARCH64)
   assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand");
 #else
   assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand");
 #endif
+#ifdef LOONGARCH64
+  assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT ||
+         base()->type() == T_LONG || base()->type() == T_METADATA,
+         "wrong type for addresses");
+#else
   assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA,
          "wrong type for addresses");
+#endif
 #else
   assert(base()->is_single_cpu(), "wrong base operand");
   assert(index()->is_illegal() || index()->is_single_cpu(), "wrong index operand");
@@ -258,8 +281,6 @@ bool LIR_OprDesc::is_oop() const {
   }
 }
 
-
-
 void LIR_Op2::verify() const {
 #ifdef ASSERT
   switch (code()) {
@@ -301,6 +322,18 @@ void LIR_Op2::verify() const {
 #endif
 }
 
+void LIR_Op4::verify() const {
+#ifdef ASSERT
+  switch (code()) {
+    case lir_cmp_cmove:
+      break;
+
+    default:
+      assert(!result_opr()->is_register() || !result_opr()->is_oop_register(),
+             "can't produce oops from arith");
+  }
+#endif
+}
 
 LIR_OpBranch::LIR_OpBranch(LIR_Condition cond, BasicType type, BlockBegin* block)
   : LIR_Op(lir_branch, LIR_OprFact::illegalOpr, (CodeEmitInfo*)NULL)
@@ -358,6 +391,55 @@ void LIR_OpBranch::negate_cond() {
   }
 }
 
+LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info)
+  : LIR_Op2(lir_cmp_branch, cond, left, right, info)
+  , _label(stub->entry())
+  , _block(NULL)
+  , _ublock(NULL)
+  , _stub(stub) {
+}
+
+LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info)
+  : LIR_Op2(lir_cmp_branch, cond, left, right, info)
+  , _label(block->label())
+  , _block(block)
+  , _ublock(NULL)
+  , _stub(NULL) {
+}
+
+LIR_OpCmpBranch::LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info)
+  : LIR_Op2(lir_cmp_float_branch, cond, left, right, info)
+  , _label(block->label())
+  , _block(block)
+  , _ublock(ublock)
+  , _stub(NULL) {
+}
+
+void LIR_OpCmpBranch::change_block(BlockBegin* b) {
+  assert(_block != NULL, "must have old block");
+  assert(_block->label() == label(), "must be equal");
+
+  _block = b;
+  _label = b->label();
+}
+
+void LIR_OpCmpBranch::change_ublock(BlockBegin* b) {
+  assert(_ublock != NULL, "must have old block");
+
+  _ublock = b;
+}
+
+void LIR_OpCmpBranch::negate_cond() {
+  switch (condition()) {
+    case lir_cond_equal:        set_condition(lir_cond_notEqual);     break;
+    case lir_cond_notEqual:     set_condition(lir_cond_equal);        break;
+    case lir_cond_less:         set_condition(lir_cond_greaterEqual); break;
+    case lir_cond_lessEqual:    set_condition(lir_cond_greater);      break;
+    case lir_cond_greaterEqual: set_condition(lir_cond_less);         break;
+    case lir_cond_greater:      set_condition(lir_cond_lessEqual);    break;
+    default: ShouldNotReachHere();
+  }
+}
 
 LIR_OpTypeCheck::LIR_OpTypeCheck(LIR_Code code, LIR_Opr result, LIR_Opr object, ciKlass* klass,
                                  LIR_Opr tmp1, LIR_Opr tmp2, LIR_Opr tmp3,
@@ -560,10 +642,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
       assert(opConvert->_info == NULL, "must be");
       if (opConvert->_opr->is_valid())       do_input(opConvert->_opr);
       if (opConvert->_result->is_valid())    do_output(opConvert->_result);
-#if defined(PPC) || defined(AARCH64)
-      if (opConvert->_tmp1->is_valid())      do_temp(opConvert->_tmp1);
-      if (opConvert->_tmp2->is_valid())      do_temp(opConvert->_tmp2);
-#endif
+      if (opConvert->_tmp->is_valid())       do_temp(opConvert->_tmp);
       do_stub(opConvert->_stub);
 
       break;
@@ -661,6 +740,25 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
       break;
     }
 
+// LIR_OpCmpBranch;
+    case lir_cmp_branch:               // may have info, input and result register always invalid
+    case lir_cmp_float_branch:         // may have info, input and result register always invalid
+    {
+      assert(op->as_OpCmpBranch() != NULL, "must be");
+      LIR_OpCmpBranch* opCmpBranch = (LIR_OpCmpBranch*)op;
+      assert(opCmpBranch->_tmp2->is_illegal() && opCmpBranch->_tmp3->is_illegal() &&
+             opCmpBranch->_tmp4->is_illegal() && opCmpBranch->_tmp5->is_illegal(), "not used");
+
+      if (opCmpBranch->_info)               do_info(opCmpBranch->_info);
+      if (opCmpBranch->_opr1->is_valid())   do_input(opCmpBranch->_opr1);
+      if (opCmpBranch->_opr2->is_valid())   do_input(opCmpBranch->_opr2);
+      if (opCmpBranch->_tmp1->is_valid())   do_temp(opCmpBranch->_tmp1);
+      if (opCmpBranch->_stub != NULL)       opCmpBranch->stub()->visit(this);
+      assert(opCmpBranch->_result->is_illegal(), "not used");
+
+      break;
+    }
+
     // special handling for cmove: right input operand must not be equal
     // to the result operand, otherwise the backend fails
     case lir_cmove:
@@ -806,6 +904,29 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
       break;
     }
 
+// LIR_Op4
+    // special handling for cmp cmove: src2(opr4) operand must not be equal
+    // to the result operand, otherwise the backend fails
+    case lir_cmp_cmove:
+    {
+      assert(op->as_Op4() != NULL, "must be");
+      LIR_Op4* op4 = (LIR_Op4*)op;
+
+      assert(op4->_info == NULL, "not used");
+      assert(op4->_opr1->is_valid() && op4->_opr2->is_valid() &&
+             op4->_opr3->is_valid() && op4->_opr4->is_valid() &&
+             op4->_result->is_valid(), "used");
+
+      do_input(op4->_opr1);
+      do_input(op4->_opr2);
+      do_input(op4->_opr3);
+      do_input(op4->_opr4);
+      do_temp(op4->_opr4);
+      do_output(op4->_result);
+
+      break;
+    }
+
 
 // LIR_OpJavaCall
     case lir_static_call:
@@ -1121,6 +1242,13 @@ void LIR_Op2::emit_code(LIR_Assembler* masm) {
   masm->emit_op2(this);
 }
 
+void LIR_OpCmpBranch::emit_code(LIR_Assembler* masm) {
+  masm->emit_opCmpBranch(this);
+  if (stub()) {
+    masm->append_code_stub(stub());
+  }
+}
+
 void LIR_OpAllocArray::emit_code(LIR_Assembler* masm) {
   masm->emit_alloc_array(this);
   masm->append_code_stub(stub());
@@ -1141,6 +1269,10 @@ void LIR_Op3::emit_code(LIR_Assembler* masm) {
   masm->emit_op3(this);
 }
 
+void LIR_Op4::emit_code(LIR_Assembler* masm) {
+  masm->emit_op4(this);
+}
+
 void LIR_OpLock::emit_code(LIR_Assembler* masm) {
   masm->emit_lock(this);
   if (stub()) {
@@ -1381,7 +1513,6 @@ void LIR_List::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int
                     info));
 }
 
-
 void LIR_List::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* addr, CodeEmitInfo* info) {
   append(new LIR_Op2(
                     lir_cmp,
@@ -1391,6 +1522,17 @@ void LIR_List::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Address* ad
                     info));
 }
 
+void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_null) {
+  if (deoptimize_on_null) {
+    // Emit an explicit null check and deoptimize if opr is null
+    CodeStub* deopt = new DeoptimizeStub(info);
+    cmp_branch(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL), T_OBJECT, deopt);
+  } else {
+    // Emit an implicit null check
+    append(new LIR_Op1(lir_null_check, opr, info));
+  }
+}
+
 void LIR_List::allocate_object(LIR_Opr dst, LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr t4,
                                int header_size, int object_size, LIR_Opr klass, bool init_check, CodeStub* stub) {
   append(new LIR_OpAllocObj(
@@ -1520,18 +1662,6 @@ void LIR_List::store_check(LIR_Opr object, LIR_Opr array, LIR_Opr tmp1, LIR_Opr
   append(c);
 }
 
-void LIR_List::null_check(LIR_Opr opr, CodeEmitInfo* info, bool deoptimize_on_null) {
-  if (deoptimize_on_null) {
-    // Emit an explicit null check and deoptimize if opr is null
-    CodeStub* deopt = new DeoptimizeStub(info);
-    cmp(lir_cond_equal, opr, LIR_OprFact::oopConst(NULL));
-    branch(lir_cond_equal, T_OBJECT, deopt);
-  } else {
-    // Emit an implicit null check
-    append(new LIR_Op1(lir_null_check, opr, info));
-  }
-}
-
 void LIR_List::cas_long(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value,
                         LIR_Opr t1, LIR_Opr t2, LIR_Opr result) {
   append(new LIR_OpCompareAndSwap(lir_cas_long, addr, cmp_value, new_value, t1, t2, result));
@@ -1780,6 +1910,8 @@ const char * LIR_Op::name() const {
      case lir_cmp_l2i:               s = "cmp_l2i";       break;
      case lir_ucmp_fd2i:             s = "ucomp_fd2i";    break;
      case lir_cmp_fd2i:              s = "comp_fd2i";     break;
+     case lir_cmp_branch:            s = "cmp_branch";    break;
+     case lir_cmp_float_branch:      s = "cmp_fbranch";   break;
      case lir_cmove:                 s = "cmove";         break;
      case lir_add:                   s = "add";           break;
      case lir_sub:                   s = "sub";           break;
@@ -1809,6 +1941,8 @@ const char * LIR_Op::name() const {
      // LIR_Op3
      case lir_idiv:                  s = "idiv";          break;
      case lir_irem:                  s = "irem";          break;
+     // LIR_Op4
+     case lir_cmp_cmove:             s = "cmp_cmove";     break;
      // LIR_OpJavaCall
      case lir_static_call:           s = "static";        break;
      case lir_optvirtual_call:       s = "optvirtual";    break;
@@ -1960,6 +2094,26 @@ void LIR_OpBranch::print_instr(outputStream* out) const {
   }
 }
 
+// LIR_OpCmpBranch
+void LIR_OpCmpBranch::print_instr(outputStream* out) const {
+  print_condition(out, condition());        out->print(" ");
+  in_opr1()->print(out);    out->print(" ");
+  in_opr2()->print(out);    out->print(" ");
+  if (block() != NULL) {
+    out->print("[B%d] ", block()->block_id());
+  } else if (stub() != NULL) {
+    out->print("[");
+    stub()->print_name(out);
+    out->print(": " INTPTR_FORMAT "]", p2i(stub()));
+    if (stub()->info() != NULL) out->print(" [bci:%d]", stub()->info()->stack()->bci());
+  } else {
+    out->print("[label:" INTPTR_FORMAT "] ", p2i(label()));
+  }
+  if (ublock() != NULL) {
+    out->print("unordered: [B%d] ", ublock()->block_id());
+  }
+}
+
 void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) {
   switch(cond) {
     case lir_cond_equal:           out->print("[EQ]");      break;
@@ -1980,12 +2134,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const {
   print_bytecode(out, bytecode());
   in_opr()->print(out);                  out->print(" ");
   result_opr()->print(out);              out->print(" ");
-#if defined(PPC) || defined(AARCH64)
-  if(tmp1()->is_valid()) {
-    tmp1()->print(out); out->print(" ");
-    tmp2()->print(out); out->print(" ");
+  if(tmp()->is_valid()) {
+    tmp()->print(out);                   out->print(" ");
   }
-#endif
 }
 
 void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) {
@@ -2031,9 +2182,6 @@ void LIR_OpRoundFP::print_instr(outputStream* out) const {
 
 // LIR_Op2
 void LIR_Op2::print_instr(outputStream* out) const {
-  if (code() == lir_cmove) {
-    print_condition(out, condition());         out->print(" ");
-  }
   in_opr1()->print(out);    out->print(" ");
   in_opr2()->print(out);    out->print(" ");
   if (tmp1_opr()->is_valid()) { tmp1_opr()->print(out);    out->print(" "); }
@@ -2082,6 +2230,18 @@ void LIR_Op3::print_instr(outputStream* out) const {
   result_opr()->print(out);
 }
 
+// LIR_Op4
+void LIR_Op4::print_instr(outputStream* out) const {
+  if (code() == lir_cmp_cmove) {
+    print_condition(out, condition());         out->print(" ");
+  }
+  in_opr1()->print(out);    out->print(" ");
+  in_opr2()->print(out);    out->print(" ");
+  in_opr3()->print(out);    out->print(" ");
+  in_opr4()->print(out);    out->print(" ");
+  result_opr()->print(out);
+}
+
 
 void LIR_OpLock::print_instr(outputStream* out) const {
   hdr_opr()->print(out);   out->print(" ");
@@ -2095,10 +2255,14 @@ void LIR_OpLock::print_instr(outputStream* out) const {
 
 #ifdef ASSERT
 void LIR_OpAssert::print_instr(outputStream* out) const {
+  tty->print_cr("function LIR_OpAssert::print_instr unimplemented yet! ");
+  Unimplemented();
+  /*
   print_condition(out, condition()); out->print(" ");
   in_opr1()->print(out);             out->print(" ");
   in_opr2()->print(out);             out->print(", \"");
   out->print("%s", msg());          out->print("\"");
+  */
 }
 #endif
 
diff --git a/hotspot/src/share/vm/c1/c1_LIR.hpp b/hotspot/src/share/vm/c1/c1_LIR.hpp
index 24b86202111..aec77afe1f8 100644
--- a/hotspot/src/share/vm/c1/c1_LIR.hpp
+++ b/hotspot/src/share/vm/c1/c1_LIR.hpp
@@ -22,6 +22,11 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
 #ifndef SHARE_VM_C1_C1_LIR_HPP
 #define SHARE_VM_C1_C1_LIR_HPP
 
@@ -452,7 +457,7 @@ class LIR_OprDesc: public CompilationResourceObj {
   // for compatibility with RInfo
   int fpu () const                                  { return lo_reg_half(); }
 #endif
-#if defined(SPARC) || defined(ARM) || defined(PPC) || defined(AARCH64)
+#if defined(SPARC) || defined(ARM) || defined(PPC) || defined(AARCH64) || defined(LOONGARCH)
   FloatRegister as_float_reg   () const;
   FloatRegister as_double_reg  () const;
 #endif
@@ -542,7 +547,7 @@ class LIR_Address: public LIR_OprPtr {
      , _type(type)
      , _disp(0) { verify(); }
 
-#if defined(X86) || defined(ARM) || defined(AARCH64)
+#if defined(X86) || defined(ARM) || defined(AARCH64) || defined(LOONGARCH)
   LIR_Address(LIR_Opr base, LIR_Opr index, Scale scale, intx disp, BasicType type):
        _base(base)
      , _index(index)
@@ -658,7 +663,13 @@ class LIR_OprFact: public AllStatic {
                                                                              LIR_OprDesc::double_type          |
                                                                              LIR_OprDesc::cpu_register         |
                                                                              LIR_OprDesc::double_size); }
-#endif // PPC
+#elif defined(LOONGARCH)
+  static LIR_Opr double_fpu(int reg)            { return (LIR_Opr)(intptr_t)((reg  << LIR_OprDesc::reg1_shift) |
+                                                                             (reg  << LIR_OprDesc::reg2_shift) |
+                                                                             LIR_OprDesc::double_type          |
+                                                                             LIR_OprDesc::fpu_register         |
+                                                                             LIR_OprDesc::double_size); }
+#endif // LOONGARCH
 
   static LIR_Opr virtual_register(int index, BasicType type) {
     LIR_Opr res;
@@ -872,9 +883,11 @@ class      LIR_OpConvert;
 class      LIR_OpAllocObj;
 class      LIR_OpRoundFP;
 class    LIR_Op2;
+class      LIR_OpCmpBranch;
 class    LIR_OpDelay;
 class    LIR_Op3;
 class      LIR_OpAllocArray;
+class    LIR_Op4;
 class    LIR_OpCall;
 class      LIR_OpJavaCall;
 class      LIR_OpRTCall;
@@ -943,6 +956,8 @@ enum LIR_Code {
       , lir_cmp_l2i
       , lir_ucmp_fd2i
       , lir_cmp_fd2i
+      , lir_cmp_branch
+      , lir_cmp_float_branch
       , lir_cmove
       , lir_add
       , lir_sub
@@ -976,6 +991,9 @@ enum LIR_Code {
       , lir_idiv
       , lir_irem
   , end_op3
+  , begin_op4
+      , lir_cmp_cmove
+  , end_op4
   , begin_opJavaCall
       , lir_static_call
       , lir_optvirtual_call
@@ -1139,12 +1157,14 @@ class LIR_Op: public CompilationResourceObj {
   virtual LIR_OpAllocObj* as_OpAllocObj() { return NULL; }
   virtual LIR_OpRoundFP* as_OpRoundFP() { return NULL; }
   virtual LIR_OpBranch* as_OpBranch() { return NULL; }
+  virtual LIR_OpCmpBranch* as_OpCmpBranch() { return NULL; }
   virtual LIR_OpRTCall* as_OpRTCall() { return NULL; }
   virtual LIR_OpConvert* as_OpConvert() { return NULL; }
   virtual LIR_Op0* as_Op0() { return NULL; }
   virtual LIR_Op1* as_Op1() { return NULL; }
   virtual LIR_Op2* as_Op2() { return NULL; }
   virtual LIR_Op3* as_Op3() { return NULL; }
+  virtual LIR_Op4* as_Op4() { return NULL; }
   virtual LIR_OpArrayCopy* as_OpArrayCopy() { return NULL; }
   virtual LIR_OpUpdateCRC32* as_OpUpdateCRC32() { return NULL; }
   virtual LIR_OpTypeCheck* as_OpTypeCheck() { return NULL; }
@@ -1474,37 +1494,18 @@ class LIR_OpConvert: public LIR_Op1 {
  private:
    Bytecodes::Code _bytecode;
    ConversionStub* _stub;
-#if defined(PPC) || defined(AARCH64)
-  LIR_Opr _tmp1;
-  LIR_Opr _tmp2;
-#endif
+   LIR_Opr _tmp;
 
  public:
-   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub)
+   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub, LIR_Opr tmp)
      : LIR_Op1(lir_convert, opr, result)
      , _stub(stub)
-#ifdef PPC
-     , _tmp1(LIR_OprDesc::illegalOpr())
-     , _tmp2(LIR_OprDesc::illegalOpr())
-#endif
+     , _tmp(tmp)
      , _bytecode(code)                           {}
 
-#if defined(PPC) || defined(AARCH64)
-   LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub
-                 ,LIR_Opr tmp1, LIR_Opr tmp2)
-     : LIR_Op1(lir_convert, opr, result)
-     , _stub(stub)
-     , _tmp1(tmp1)
-     , _tmp2(tmp2)
-     , _bytecode(code)                           {}
-#endif
-
   Bytecodes::Code bytecode() const               { return _bytecode; }
   ConversionStub* stub() const                   { return _stub; }
-#if defined(PPC) || defined(AARCH64)
-  LIR_Opr tmp1() const                           { return _tmp1; }
-  LIR_Opr tmp2() const                           { return _tmp2; }
-#endif
+  LIR_Opr tmp() const                            { return _tmp; }
 
   virtual void emit_code(LIR_Assembler* masm);
   virtual LIR_OpConvert* as_OpConvert() { return this; }
@@ -1659,7 +1660,7 @@ class LIR_Op2: public LIR_Op {
     , _tmp3(LIR_OprFact::illegalOpr)
     , _tmp4(LIR_OprFact::illegalOpr)
     , _tmp5(LIR_OprFact::illegalOpr) {
-    assert(code == lir_cmp || code == lir_assert, "code check");
+    assert(code == lir_cmp || code == lir_cmp_branch || code == lir_cmp_float_branch || code == lir_assert, "code check");
   }
 
   LIR_Op2(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type)
@@ -1691,7 +1692,7 @@ class LIR_Op2: public LIR_Op {
     , _tmp3(LIR_OprFact::illegalOpr)
     , _tmp4(LIR_OprFact::illegalOpr)
     , _tmp5(LIR_OprFact::illegalOpr) {
-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
+    assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check");
   }
 
   LIR_Op2(LIR_Code code, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, LIR_Opr tmp1, LIR_Opr tmp2 = LIR_OprFact::illegalOpr,
@@ -1707,7 +1708,7 @@ class LIR_Op2: public LIR_Op {
     , _tmp3(tmp3)
     , _tmp4(tmp4)
     , _tmp5(tmp5) {
-    assert(code != lir_cmp && is_in_range(code, begin_op2, end_op2), "code check");
+    assert((code != lir_cmp && code != lir_cmp_branch && code != lir_cmp_float_branch) && is_in_range(code, begin_op2, end_op2), "code check");
   }
 
   LIR_Opr in_opr1() const                        { return _opr1; }
@@ -1719,10 +1720,12 @@ class LIR_Op2: public LIR_Op {
   LIR_Opr tmp4_opr() const                       { return _tmp4; }
   LIR_Opr tmp5_opr() const                       { return _tmp5; }
   LIR_Condition condition() const  {
-    assert(code() == lir_cmp || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert"); return _condition;
+    assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove || code() == lir_assert, "only valid for cmp and cmove and assert");
+    return _condition;
   }
   void set_condition(LIR_Condition condition) {
-    assert(code() == lir_cmp || code() == lir_cmove, "only valid for cmp and cmove");  _condition = condition;
+    assert(code() == lir_cmp || code() == lir_cmp_branch || code() == lir_cmp_float_branch || code() == lir_cmove, "only valid for cmp and cmove");
+    _condition = condition;
   }
 
   void set_fpu_stack_size(int size)              { _fpu_stack_size = size; }
@@ -1736,6 +1739,43 @@ class LIR_Op2: public LIR_Op {
   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
 };
 
+class LIR_OpCmpBranch: public LIR_Op2 {
+ friend class LIR_OpVisitState;
+
+ private:
+  Label*        _label;
+  BlockBegin*   _block;  // if this is a branch to a block, this is the block
+  BlockBegin*   _ublock; // if this is a float-branch, this is the unorderd block
+  CodeStub*     _stub;   // if this is a branch to a stub, this is the stub
+
+ public:
+  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, Label* lbl, CodeEmitInfo* info = NULL)
+    : LIR_Op2(lir_cmp_branch, cond, left, right, info)
+    , _label(lbl)
+    , _block(NULL)
+    , _ublock(NULL)
+    , _stub(NULL) { }
+
+  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeStub* stub, CodeEmitInfo* info = NULL);
+  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, CodeEmitInfo* info = NULL);
+
+  // for unordered comparisons
+  LIR_OpCmpBranch(LIR_Condition cond, LIR_Opr left, LIR_Opr right, BlockBegin* block, BlockBegin* ublock, CodeEmitInfo* info = NULL);
+
+  Label*        label()       const              { return _label;  }
+  BlockBegin*   block()       const              { return _block;  }
+  BlockBegin*   ublock()      const              { return _ublock; }
+  CodeStub*     stub()        const              { return _stub;   }
+
+  void          change_block(BlockBegin* b);
+  void          change_ublock(BlockBegin* b);
+  void          negate_cond();
+
+  virtual void emit_code(LIR_Assembler* masm);
+  virtual LIR_OpCmpBranch* as_OpCmpBranch() { return this; }
+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
+
 class LIR_OpAllocArray : public LIR_Op {
  friend class LIR_OpVisitState;
 
@@ -1776,7 +1816,6 @@ class LIR_OpAllocArray : public LIR_Op {
   virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
 };
 
-
 class LIR_Op3: public LIR_Op {
  friend class LIR_OpVisitState;
 
@@ -1800,6 +1839,48 @@ class LIR_Op3: public LIR_Op {
 };
 
 
+class LIR_Op4: public LIR_Op {
+ friend class LIR_OpVisitState;
+
+ private:
+  LIR_Opr _opr1;
+  LIR_Opr _opr2;
+  LIR_Opr _opr3;
+  LIR_Opr _opr4;
+  BasicType _type;
+  LIR_Condition _condition;
+
+  void verify() const;
+
+ public:
+  LIR_Op4(LIR_Code code, LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr opr3, LIR_Opr opr4, LIR_Opr result, BasicType type)
+    : LIR_Op(code, result, NULL)
+    , _opr1(opr1)
+    , _opr2(opr2)
+    , _opr3(opr3)
+    , _opr4(opr4)
+    , _type(type)
+    , _condition(condition) {
+    assert(is_in_range(code, begin_op4, end_op4), "code check");
+    assert(type != T_ILLEGAL, "cmove should have type");
+  }
+  LIR_Opr in_opr1() const                        { return _opr1; }
+  LIR_Opr in_opr2() const                        { return _opr2; }
+  LIR_Opr in_opr3() const                        { return _opr3; }
+  LIR_Opr in_opr4() const                        { return _opr4; }
+  BasicType type()  const                        { return _type; }
+  LIR_Condition condition() const  {
+    assert(code() == lir_cmp_cmove, "only valid for cmp cmove"); return _condition;
+  }
+  void set_condition(LIR_Condition condition) {
+    assert(code() == lir_cmp_cmove, "only valid for cmp cmove");  _condition = condition;
+  }
+
+  virtual void emit_code(LIR_Assembler* masm);
+  virtual LIR_Op4* as_Op4() { return this; }
+  virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
+};
+
 //--------------------------------
 class LabelObj: public CompilationResourceObj {
  private:
@@ -2141,17 +2222,9 @@ class LIR_List: public CompilationResourceObj {
 
   void safepoint(LIR_Opr tmp, CodeEmitInfo* info)  { append(new LIR_Op1(lir_safepoint, tmp, info)); }
 
-#ifdef PPC
-  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, LIR_Opr tmp1, LIR_Opr tmp2) { append(new LIR_OpConvert(code, left, dst, NULL, tmp1, tmp2)); }
-#endif
-#if defined(AARCH64)
-  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst,
-               ConversionStub* stub = NULL, LIR_Opr tmp1 = LIR_OprDesc::illegalOpr()) {
-    append(new LIR_OpConvert(code, left, dst, stub, tmp1, LIR_OprDesc::illegalOpr()));
+  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL, LIR_Opr tmp = LIR_OprFact::illegalOpr) {
+    append(new LIR_OpConvert(code, left, dst, stub, tmp));
   }
-#else
-  void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); }
-#endif
 
   void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and,  left, right, dst)); }
   void logical_or  (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or,   left, right, dst)); }
@@ -2256,6 +2329,48 @@ class LIR_List: public CompilationResourceObj {
     append(new LIR_OpBranch(cond, type, block, unordered));
   }
 
+#if defined(X86) || defined(AARCH64)
+
+  template<typename T>
+  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL) {
+    cmp(condition, left, right, info);
+    branch(condition, type, tgt);
+  }
+
+  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
+    cmp(condition, left, right);
+    branch(condition, type, block, unordered);
+  }
+
+  void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
+    cmp(condition, left, right);
+    cmove(condition, src1, src2, dst, type);
+  }
+
+#endif
+
+#ifdef LOONGARCH
+
+  template<typename T>
+  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info = NULL) {
+    append(new LIR_OpCmpBranch(condition, left, right, tgt, info));
+  }
+
+  void cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
+    append(new LIR_OpCmpBranch(condition, left, right, block, unordered));
+  }
+
+  void cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
+    append(new LIR_Op4(lir_cmp_cmove, condition, left, right, src1, src2, dst, type));
+  }
+
+#endif
+
+  template<typename T>
+  void cmp_branch(LIR_Condition condition, LIR_Opr left, int right, BasicType type, T tgt, CodeEmitInfo* info = NULL) {
+    cmp_branch(condition, left, LIR_OprFact::intConst(right), type, tgt, info);
+  }
+
   void shift_left(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp);
   void shift_right(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp);
   void unsigned_shift_right(LIR_Opr value, LIR_Opr count, LIR_Opr dst, LIR_Opr tmp);
diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
index e5cd19f17a7..a18c53008bd 100644
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "c1/c1_Compilation.hpp"
 #include "c1/c1_Instruction.hpp"
@@ -34,6 +40,10 @@
 # include "nativeInst_x86.hpp"
 # include "vmreg_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "nativeInst_loongarch.hpp"
+# include "vmreg_loongarch.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "nativeInst_aarch64.hpp"
 # include "vmreg_aarch64.inline.hpp"
@@ -811,6 +821,18 @@ void LIR_Assembler::emit_op2(LIR_Op2* op) {
 }
 
 
+void LIR_Assembler::emit_op4(LIR_Op4* op) {
+  switch (op->code()) {
+    case lir_cmp_cmove:
+      cmp_cmove(op->condition(), op->in_opr1(), op->in_opr2(), op->in_opr3(), op->in_opr4(), op->result_opr(), op->type());
+      break;
+
+    default:
+      Unimplemented();
+      break;
+  }
+}
+
 void LIR_Assembler::build_frame() {
   _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
 }
diff --git a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
index 1a68d458d23..ac0f4e7a462 100644
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_C1_C1_LIRASSEMBLER_HPP
 #define SHARE_VM_C1_C1_LIRASSEMBLER_HPP
 
@@ -195,7 +201,9 @@ class LIR_Assembler: public CompilationResourceObj {
   void emit_op1(LIR_Op1* op);
   void emit_op2(LIR_Op2* op);
   void emit_op3(LIR_Op3* op);
+  void emit_op4(LIR_Op4* op);
   void emit_opBranch(LIR_OpBranch* op);
+  void emit_opCmpBranch(LIR_OpCmpBranch* op);
   void emit_opLabel(LIR_OpLabel* op);
   void emit_arraycopy(LIR_OpArrayCopy* op);
   void emit_updatecrc32(LIR_OpUpdateCRC32* op);
@@ -227,6 +235,7 @@ class LIR_Assembler: public CompilationResourceObj {
   void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);
   void comp_mem_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info);  // info set for null exceptions
   void comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr result, LIR_Op2* op);
+  void cmp_cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type);
   void cmove(LIR_Condition code, LIR_Opr left, LIR_Opr right, LIR_Opr result, BasicType type);
 
   void call(        LIR_OpJavaCall* op, relocInfo::relocType rtype);
@@ -265,6 +274,9 @@ class LIR_Assembler: public CompilationResourceObj {
 #ifdef TARGET_ARCH_x86
 # include "c1_LIRAssembler_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "c1_LIRAssembler_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "c1_LIRAssembler_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
index e98834d03a5..d1a987c6991 100644
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "c1/c1_Defs.hpp"
 #include "c1/c1_Compilation.hpp"
@@ -483,13 +489,11 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
                                     CodeEmitInfo* null_check_info, CodeEmitInfo* range_check_info) {
   CodeStub* stub = new RangeCheckStub(range_check_info, index);
   if (index->is_constant()) {
-    cmp_mem_int(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(),
-                index->as_jint(), null_check_info);
-    __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
+    cmp_mem_int_branch(lir_cond_belowEqual, array, arrayOopDesc::length_offset_in_bytes(),
+                       index->as_jint(), stub, null_check_info); // forward branch
   } else {
-    cmp_reg_mem(lir_cond_aboveEqual, index, array,
-                arrayOopDesc::length_offset_in_bytes(), T_INT, null_check_info);
-    __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
+    cmp_reg_mem_branch(lir_cond_aboveEqual, index, array, arrayOopDesc::length_offset_in_bytes(),
+                       T_INT, stub, null_check_info); // forward branch
   }
 }
 
@@ -497,12 +501,10 @@ void LIRGenerator::array_range_check(LIR_Opr array, LIR_Opr index,
 void LIRGenerator::nio_range_check(LIR_Opr buffer, LIR_Opr index, LIR_Opr result, CodeEmitInfo* info) {
   CodeStub* stub = new RangeCheckStub(info, index, true);
   if (index->is_constant()) {
-    cmp_mem_int(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), info);
-    __ branch(lir_cond_belowEqual, T_INT, stub); // forward branch
+    cmp_mem_int_branch(lir_cond_belowEqual, buffer, java_nio_Buffer::limit_offset(), index->as_jint(), stub, info); // forward branch
   } else {
-    cmp_reg_mem(lir_cond_aboveEqual, index, buffer,
-                java_nio_Buffer::limit_offset(), T_INT, info);
-    __ branch(lir_cond_aboveEqual, T_INT, stub); // forward branch
+    cmp_reg_mem_branch(lir_cond_aboveEqual, index, buffer,
+                java_nio_Buffer::limit_offset(), T_INT, stub, info); // forward branch
   }
   __ move(index, result);
 }
@@ -935,7 +937,7 @@ LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) {
   return tmp;
 }
 
-void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) {
+void LIRGenerator::profile_branch(If* if_instr, If::Condition cond, LIR_Opr left, LIR_Opr right) {
   if (if_instr->should_profile()) {
     ciMethod* method = if_instr->profiled_method();
     assert(method != NULL, "method should be set if branch is profiled");
@@ -956,10 +958,17 @@ void LIRGenerator::profile_branch(If* if_instr, If::Condition cond) {
     __ metadata2reg(md->constant_encoding(), md_reg);
 
     LIR_Opr data_offset_reg = new_pointer_register();
-    __ cmove(lir_cond(cond),
-             LIR_OprFact::intptrConst(taken_count_offset),
-             LIR_OprFact::intptrConst(not_taken_count_offset),
-             data_offset_reg, as_BasicType(if_instr->x()->type()));
+    if (left == LIR_OprFact::illegalOpr && right == LIR_OprFact::illegalOpr) {
+      __ cmove(lir_cond(cond),
+               LIR_OprFact::intptrConst(taken_count_offset),
+               LIR_OprFact::intptrConst(not_taken_count_offset),
+               data_offset_reg, as_BasicType(if_instr->x()->type()));
+    } else {
+      __ cmp_cmove(lir_cond(cond), left, right,
+                   LIR_OprFact::intptrConst(taken_count_offset),
+                   LIR_OprFact::intptrConst(not_taken_count_offset),
+                   data_offset_reg, as_BasicType(if_instr->x()->type()));
+    }
 
     // MDO cells are intptr_t, so the data_reg width is arch-dependent.
     LIR_Opr data_reg = new_pointer_register();
@@ -1306,8 +1315,8 @@ void LIRGenerator::do_isPrimitive(Intrinsic* x) {
   }
 
   __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info);
-  __ cmp(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0));
-  __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
+  __ cmp_cmove(lir_cond_notEqual, temp, LIR_OprFact::metadataConst(0),
+               LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
 }
 
 // Example: Thread.currentThread()
@@ -1500,7 +1509,6 @@ void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr p
   // Read the marking-in-progress flag.
   LIR_Opr flag_val = new_register(T_INT);
   __ load(mark_active_flag_addr, flag_val);
-  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
 
   LIR_PatchCode pre_val_patch_code = lir_patch_none;
 
@@ -1529,7 +1537,7 @@ void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr p
     slow = new G1PreBarrierStub(pre_val);
   }
 
-  __ branch(lir_cond_notEqual, T_INT, slow);
+  __ cmp_branch(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0), T_INT, slow);
   __ branch_destination(slow->continuation());
 }
 
@@ -1587,10 +1595,8 @@ void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_Opr
   }
   assert(new_val->is_register(), "must be a register at this point");
 
-  __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
-
   CodeStub* slow = new G1PostBarrierStub(addr, new_val);
-  __ branch(lir_cond_notEqual, LP64_ONLY(T_LONG) NOT_LP64(T_INT), slow);
+  __ cmp_branch(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD), T_INT, slow);
   __ branch_destination(slow->continuation());
 }
 
@@ -1860,12 +1866,10 @@ void LIRGenerator::do_NIOCheckIndex(Intrinsic* x) {
     CodeEmitInfo* info = state_for(x);
     CodeStub* stub = new RangeCheckStub(info, index.result(), true);
     if (index.result()->is_constant()) {
-      cmp_mem_int(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), info);
-      __ branch(lir_cond_belowEqual, T_INT, stub);
+      cmp_mem_int_branch(lir_cond_belowEqual, buf.result(), java_nio_Buffer::limit_offset(), index.result()->as_jint(), stub, info);
     } else {
-      cmp_reg_mem(lir_cond_aboveEqual, index.result(), buf.result(),
-                  java_nio_Buffer::limit_offset(), T_INT, info);
-      __ branch(lir_cond_aboveEqual, T_INT, stub);
+      cmp_reg_mem_branch(lir_cond_aboveEqual, index.result(), buf.result(),
+                         java_nio_Buffer::limit_offset(), T_INT, stub, info);
     }
     __ move(index.result(), result);
   } else {
@@ -1946,8 +1950,8 @@ void LIRGenerator::do_LoadIndexed(LoadIndexed* x) {
     } else if (use_length) {
       // TODO: use a (modified) version of array_range_check that does not require a
       //       constant length to be loaded to a register
-      __ cmp(lir_cond_belowEqual, length.result(), index.result());
-      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result()));
+      CodeStub* stub = new RangeCheckStub(range_check_info, index.result());
+      __ cmp_branch(lir_cond_belowEqual, length.result(), index.result(), T_INT, stub);
     } else {
       array_range_check(array.result(), index.result(), null_check_info, range_check_info);
       // The range check performs the null check, so clear it out for the load
@@ -2129,7 +2133,7 @@ void LIRGenerator::do_UnsafeGetRaw(UnsafeGetRaw* x) {
     assert(index_op->type() == T_INT, "only int constants supported");
     addr = new LIR_Address(base_op, index_op->as_jint(), dst_type);
   } else {
-#if defined(X86) || defined(AARCH64)
+#if defined(X86) || defined(AARCH64) || defined(LOONGARCH)
     addr = new LIR_Address(base_op, index_op, LIR_Address::Scale(log2_scale), 0, dst_type);
 #elif defined(GENERATE_ADDRESS_IS_PREFERRED)
     addr = generate_address(base_op, index_op, log2_scale, 0, dst_type);
@@ -2344,19 +2348,18 @@ void LIRGenerator::do_UnsafeGetObject(UnsafeGetObject* x) {
 
         if (off.type()->is_int()) {
           referent_off = LIR_OprFact::intConst(java_lang_ref_Reference::referent_offset);
+          __ cmp_branch(lir_cond_notEqual, off.result(), referent_off, T_INT, Lcont->label());
         } else {
           assert(off.type()->is_long(), "what else?");
           referent_off = new_register(T_LONG);
           __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off);
+          __ cmp_branch(lir_cond_notEqual, off.result(), referent_off, T_LONG, Lcont->label());
         }
-        __ cmp(lir_cond_notEqual, off.result(), referent_off);
-        __ branch(lir_cond_notEqual, as_BasicType(off.type()), Lcont->label());
       }
       if (gen_source_check) {
         // offset is a const and equals referent offset
         // if (source == null) -> continue
-        __ cmp(lir_cond_equal, src_reg, LIR_OprFact::oopConst(NULL));
-        __ branch(lir_cond_equal, T_OBJECT, Lcont->label());
+        __ cmp_branch(lir_cond_equal, src_reg, LIR_OprFact::oopConst(NULL), T_OBJECT, Lcont->label());
       }
       LIR_Opr src_klass = new_register(T_METADATA);
       if (gen_type_check) {
@@ -2366,8 +2369,7 @@ void LIRGenerator::do_UnsafeGetObject(UnsafeGetObject* x) {
         LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(InstanceKlass::reference_type_offset()), T_BYTE);
         LIR_Opr reference_type = new_register(T_INT);
         __ move(reference_type_addr, reference_type);
-        __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE));
-        __ branch(lir_cond_equal, T_INT, Lcont->label());
+        __ cmp_branch(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE), T_INT, Lcont->label());
       }
       {
         // We have determined that src->_klass->_reference_type != REF_NONE
@@ -2447,19 +2449,14 @@ void LIRGenerator::do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegi
     int high_key = one_range->high_key();
     BlockBegin* dest = one_range->sux();
     if (low_key == high_key) {
-      __ cmp(lir_cond_equal, value, low_key);
-      __ branch(lir_cond_equal, T_INT, dest);
+      __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest);
     } else if (high_key - low_key == 1) {
-      __ cmp(lir_cond_equal, value, low_key);
-      __ branch(lir_cond_equal, T_INT, dest);
-      __ cmp(lir_cond_equal, value, high_key);
-      __ branch(lir_cond_equal, T_INT, dest);
+      __ cmp_branch(lir_cond_equal, value, low_key, T_INT, dest);
+      __ cmp_branch(lir_cond_equal, value, high_key, T_INT, dest);
     } else {
       LabelObj* L = new LabelObj();
-      __ cmp(lir_cond_less, value, low_key);
-      __ branch(lir_cond_less, T_INT, L->label());
-      __ cmp(lir_cond_lessEqual, value, high_key);
-      __ branch(lir_cond_lessEqual, T_INT, dest);
+      __ cmp_branch(lir_cond_less, value, low_key, T_INT, L->label());
+      __ cmp_branch(lir_cond_lessEqual, value, high_key, T_INT, dest);
       __ branch_destination(L->label());
     }
   }
@@ -2546,8 +2543,7 @@ void LIRGenerator::do_TableSwitch(TableSwitch* x) {
     do_SwitchRanges(create_lookup_ranges(x), value, x->default_sux());
   } else {
     for (int i = 0; i < len; i++) {
-      __ cmp(lir_cond_equal, value, i + lo_key);
-      __ branch(lir_cond_equal, T_INT, x->sux_at(i));
+      __ cmp_branch(lir_cond_equal, value, i + lo_key, T_INT, x->sux_at(i));
     }
     __ jump(x->default_sux());
   }
@@ -2572,8 +2568,7 @@ void LIRGenerator::do_LookupSwitch(LookupSwitch* x) {
   } else {
     int len = x->length();
     for (int i = 0; i < len; i++) {
-      __ cmp(lir_cond_equal, value, x->key_at(i));
-      __ branch(lir_cond_equal, T_INT, x->sux_at(i));
+      __ cmp_branch(lir_cond_equal, value, x->key_at(i), T_INT, x->sux_at(i));
     }
     __ jump(x->default_sux());
   }
@@ -2625,7 +2620,6 @@ void LIRGenerator::do_Goto(Goto* x) {
     }
     LIR_Opr md_reg = new_register(T_METADATA);
     __ metadata2reg(md->constant_encoding(), md_reg);
-
     increment_counter(new LIR_Address(md_reg, offset,
                                       NOT_LP64(T_INT) LP64_ONLY(T_LONG)), DataLayout::counter_increment);
   }
@@ -3079,8 +3073,8 @@ void LIRGenerator::do_IfOp(IfOp* x) {
   f_val.dont_load_item();
   LIR_Opr reg = rlock_result(x);
 
-  __ cmp(lir_cond(x->cond()), left.result(), right.result());
-  __ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type()));
+  __ cmp_cmove(lir_cond(x->cond()), left.result(), right.result(),
+               t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type()));
 }
 
 #ifdef JFR_HAVE_INTRINSICS
@@ -3120,8 +3114,7 @@ void LIRGenerator::do_getEventWriter(Intrinsic* x) {
                                            T_OBJECT);
   LIR_Opr result = rlock_result(x);
   __ move_wide(jobj_addr, result);
-  __ cmp(lir_cond_equal, result, LIR_OprFact::oopConst(NULL));
-  __ branch(lir_cond_equal, T_OBJECT, L_end->label());
+  __ cmp_branch(lir_cond_equal, result, LIR_OprFact::oopConst(0), T_OBJECT, L_end->label());
   __ move_wide(new LIR_Address(result, T_OBJECT), result);
 
   __ branch_destination(L_end->label());
@@ -3485,10 +3478,9 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info,
     LIR_Opr meth = new_register(T_METADATA);
     __ metadata2reg(method->constant_encoding(), meth);
     __ logical_and(result, mask, result);
-    __ cmp(lir_cond_equal, result, LIR_OprFact::intConst(0));
     // The bci for info can point to cmp for if's we want the if bci
     CodeStub* overflow = new CounterOverflowStub(info, bci, meth);
-    __ branch(lir_cond_equal, T_INT, overflow);
+    __ cmp_branch(lir_cond_equal, result, LIR_OprFact::intConst(0), T_INT, overflow);
     __ branch_destination(overflow->continuation());
   }
 }
@@ -3600,8 +3592,7 @@ void LIRGenerator::do_RangeCheckPredicate(RangeCheckPredicate *x) {
     CodeEmitInfo *info = state_for(x, x->state());
     CodeStub* stub = new PredicateFailedStub(info);
 
-    __ cmp(lir_cond(cond), left, right);
-    __ branch(lir_cond(cond), right->type(), stub);
+    __ cmp_branch(lir_cond(cond), left, right, right->type(), stub);
   }
 }
 
@@ -3749,8 +3740,7 @@ LIR_Opr LIRGenerator::maybe_mask_boolean(StoreIndexed* x, LIR_Opr array, LIR_Opr
     __ move(new LIR_Address(klass, in_bytes(Klass::layout_helper_offset()), T_INT), layout);
     int diffbit = Klass::layout_helper_boolean_diffbit();
     __ logical_and(layout, LIR_OprFact::intConst(diffbit), layout);
-    __ cmp(lir_cond_notEqual, layout, LIR_OprFact::intConst(0));
-    __ cmove(lir_cond_notEqual, value_fixed, value, value_fixed, T_BYTE);
+    __ cmp_cmove(lir_cond_notEqual, layout, LIR_OprFact::intConst(0), value_fixed, value, value_fixed, T_BYTE);
     value = value_fixed;
   }
   return value;
diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
index 27be79fee14..57c253db690 100644
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2015. These
+ * modifications are Copyright (c) 2015 Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_C1_C1_LIRGENERATOR_HPP
 #define SHARE_VM_C1_C1_LIRGENERATOR_HPP
 
@@ -246,6 +252,9 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
   void do_getClass(Intrinsic* x);
   void do_currentThread(Intrinsic* x);
   void do_MathIntrinsic(Intrinsic* x);
+#if defined(LOONGARCH64)
+  void do_LibmIntrinsic(Intrinsic* x);
+#endif
   void do_ArrayCopy(Intrinsic* x);
   void do_CompareAndSwap(Intrinsic* x, ValueType* type);
   void do_NIOCheckIndex(Intrinsic* x);
@@ -335,8 +344,10 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
   void new_instance    (LIR_Opr  dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr  scratch1, LIR_Opr  scratch2, LIR_Opr  scratch3,  LIR_Opr scratch4, LIR_Opr  klass_reg, CodeEmitInfo* info);
 
   // machine dependent
-  void cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info);
-  void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info);
+  template<typename T>
+  void cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info);
+  template<typename T>
+  void cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info);
   void cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, LIR_Opr disp, BasicType type, CodeEmitInfo* info);
 
   void arraycopy_helper(Intrinsic* x, int* flags, ciArrayKlass** expected_type);
@@ -364,7 +375,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
 
   LIR_Opr safepoint_poll_register();
 
-  void profile_branch(If* if_instr, If::Condition cond);
+  void profile_branch(If* if_instr, If::Condition cond, LIR_Opr left = LIR_OprFact::illegalOpr, LIR_Opr right = LIR_OprFact::illegalOpr);
   void increment_event_counter_impl(CodeEmitInfo* info,
                                     ciMethod *method, int frequency,
                                     int bci, bool backedge, bool notify);
diff --git a/hotspot/src/share/vm/c1/c1_LinearScan.cpp b/hotspot/src/share/vm/c1/c1_LinearScan.cpp
index 1f6281bf250..4549ff09282 100644
--- a/hotspot/src/share/vm/c1/c1_LinearScan.cpp
+++ b/hotspot/src/share/vm/c1/c1_LinearScan.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "c1/c1_CFGPrinter.hpp"
 #include "c1/c1_CodeStubs.hpp"
@@ -35,6 +41,9 @@
 #ifdef TARGET_ARCH_x86
 # include "vmreg_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "vmreg_loongarch.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "vmreg_aarch64.inline.hpp"
 #endif
@@ -1256,6 +1265,23 @@ void LinearScan::add_register_hints(LIR_Op* op) {
       LIR_Opr move_from = cmove->in_opr1();
       LIR_Opr move_to = cmove->result_opr();
 
+      if (move_to->is_register() && move_from->is_register()) {
+        Interval* from = interval_at(reg_num(move_from));
+        Interval* to = interval_at(reg_num(move_to));
+        if (from != NULL && to != NULL) {
+          to->set_register_hint(from);
+          TRACE_LINEAR_SCAN(4, tty->print_cr("operation at op_id %d: added hint from interval %d to %d", cmove->id(), from->reg_num(), to->reg_num()));
+        }
+      }
+      break;
+    }
+    case lir_cmp_cmove: {
+      assert(op->as_Op4() != NULL, "lir_cmp_cmove must be LIR_Op4");
+      LIR_Op4* cmove = (LIR_Op4*)op;
+
+      LIR_Opr move_from = cmove->in_opr3();
+      LIR_Opr move_to = cmove->result_opr();
+
       if (move_to->is_register() && move_from->is_register()) {
         Interval* from = interval_at(reg_num(move_from));
         Interval* to = interval_at(reg_num(move_to));
@@ -2104,7 +2130,7 @@ LIR_Opr LinearScan::calc_operand_for_interval(const Interval* interval) {
 #ifdef _LP64
         return LIR_OprFact::double_cpu(assigned_reg, assigned_reg);
 #else
-#if defined(SPARC) || defined(PPC)
+#if defined(SPARC) || defined(PPC) || defined(LOONGARCH)
         return LIR_OprFact::double_cpu(assigned_regHi, assigned_reg);
 #else
         return LIR_OprFact::double_cpu(assigned_reg, assigned_regHi);
@@ -3285,7 +3311,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() {
           check_live = (move->patch_code() == lir_patch_none);
         }
         LIR_OpBranch* branch = op->as_OpBranch();
-        if (branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) {
+        LIR_OpCmpBranch* cmp_branch = op->as_OpCmpBranch();
+        if ((branch != NULL && branch->stub() != NULL && branch->stub()->is_exception_throw_stub()) ||
+            (cmp_branch != NULL && cmp_branch->stub() != NULL && cmp_branch->stub()->is_exception_throw_stub())) {
           // Don't bother checking the stub in this case since the
           // exception stub will never return to normal control flow.
           check_live = false;
@@ -6142,6 +6170,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi
       assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
       LIR_OpBranch* branch = (LIR_OpBranch*)op;
 
+      if (branch->block() == target_from) {
+        branch->change_block(target_to);
+      }
+      if (branch->ublock() == target_from) {
+        branch->change_ublock(target_to);
+      }
+    } else if (op->code() == lir_cmp_branch || op->code() == lir_cmp_float_branch) {
+      assert(op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch");
+      LIR_OpCmpBranch* branch = (LIR_OpCmpBranch*)op;
+
       if (branch->block() == target_from) {
         branch->change_block(target_to);
       }
@@ -6252,6 +6290,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
                 instructions->truncate(instructions->length() - 1);
               }
             }
+          } else if (prev_op->code() == lir_cmp_branch || prev_op->code() == lir_cmp_float_branch) {
+            assert(prev_op->as_OpCmpBranch() != NULL, "branch must be of type LIR_OpCmpBranch");
+            LIR_OpCmpBranch* prev_branch = (LIR_OpCmpBranch*)prev_op;
+
+            if (prev_branch->stub() == NULL) {
+              if (prev_branch->block() == code->at(i + 1) && prev_branch->info() == NULL) {
+                TRACE_LINEAR_SCAN(3, tty->print_cr("Negating conditional branch and deleting unconditional branch at end of block B%d", block->block_id()));
+
+                // eliminate a conditional branch to the immediate successor
+                prev_branch->change_block(last_branch->block());
+                prev_branch->negate_cond();
+                instructions->trunc_to(instructions->length() - 1);
+              }
+            }
           }
         }
       }
@@ -6328,6 +6380,13 @@ void ControlFlowOptimizer::verify(BlockList* code) {
         assert(op_branch->block() == NULL || code->index_of(op_branch->block()) != -1, "branch target not valid");
         assert(op_branch->ublock() == NULL || code->index_of(op_branch->ublock()) != -1, "branch target not valid");
       }
+
+      LIR_OpCmpBranch* op_cmp_branch = instructions->at(j)->as_OpCmpBranch();
+
+      if (op_cmp_branch != NULL) {
+        assert(op_cmp_branch->block() == NULL || code->find(op_cmp_branch->block()) != -1, "branch target not valid");
+        assert(op_cmp_branch->ublock() == NULL || code->find(op_cmp_branch->ublock()) != -1, "branch target not valid");
+      }
     }
 
     for (j = 0; j < block->number_of_sux() - 1; j++) {
@@ -6571,6 +6630,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
           break;
         }
 
+        case lir_cmp_branch:
+        case lir_cmp_float_branch: {
+          LIR_OpCmpBranch* branch = op->as_OpCmpBranch();
+          if (branch->block() == NULL) {
+            inc_counter(counter_stub_branch);
+          } else {
+            inc_counter(counter_cond_branch);
+          }
+          inc_counter(counter_cmp);
+          break;
+        }
+
+        case lir_cmp_cmove: {
+          inc_counter(counter_misc_inst);
+          inc_counter(counter_cmp);
+          break;
+        }
+
         case lir_neg:
         case lir_add:
         case lir_sub:
diff --git a/hotspot/src/share/vm/c1/c1_LinearScan.hpp b/hotspot/src/share/vm/c1/c1_LinearScan.hpp
index 96e6b3babff..576a07d73dc 100644
--- a/hotspot/src/share/vm/c1/c1_LinearScan.hpp
+++ b/hotspot/src/share/vm/c1/c1_LinearScan.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_C1_C1_LINEARSCAN_HPP
 #define SHARE_VM_C1_C1_LINEARSCAN_HPP
 
@@ -976,6 +982,9 @@ class LinearScanTimers : public StackObj {
 #ifdef TARGET_ARCH_x86
 # include "c1_LinearScan_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "c1_LinearScan_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "c1_LinearScan_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp b/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp
index 7e22bbaa270..12aca7bf50c 100644
--- a/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp
+++ b/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_C1_C1_MACROASSEMBLER_HPP
 #define SHARE_VM_C1_C1_MACROASSEMBLER_HPP
 
@@ -50,6 +56,9 @@ class C1_MacroAssembler: public MacroAssembler {
 #ifdef TARGET_ARCH_x86
 # include "c1_MacroAssembler_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "c1_MacroAssembler_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "c1_MacroAssembler_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/c1/c1_Runtime1.cpp b/hotspot/src/share/vm/c1/c1_Runtime1.cpp
index aebc377527c..f1253506f67 100644
--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "asm/codeBuffer.hpp"
 #include "c1/c1_CodeStubs.hpp"
@@ -710,6 +716,7 @@ JRT_ENTRY(void, Runtime1::deoptimize(JavaThread* thread))
   // Return to the now deoptimized frame.
 JRT_END
 
+#ifndef LOONGARCH
 
 static Klass* resolve_field_return_klass(methodHandle caller, int bci, TRAPS) {
   Bytecode_field field_access(caller, bci);
@@ -1186,6 +1193,47 @@ JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_i
   }
 JRT_END
 
+#else
+
+JRT_ENTRY(void, Runtime1::patch_code(JavaThread* thread, Runtime1::StubID stub_id ))
+{
+  RegisterMap reg_map(thread, false);
+
+  NOT_PRODUCT(_patch_code_slowcase_cnt++;)
+  // According to the LoongArch, "Concurrent modification and
+  // execution of instructions can lead to the resulting instruction
+  // performing any behavior that can be achieved by executing any
+  // sequence of instructions that can be executed from the same
+  // Exception level, except where the instruction before
+  // modification and the instruction after modification is a B, BL,
+  // NOP, BRK instruction."
+  //
+  // This effectively makes the games we play when patching
+  // impossible, so when we come across an access that needs
+  // patching we must deoptimize.
+
+  if (TracePatching) {
+    tty->print_cr("Deoptimizing because patch is needed");
+  }
+
+  frame runtime_frame = thread->last_frame();
+  frame caller_frame = runtime_frame.sender(&reg_map);
+
+  // It's possible the nmethod was invalidated in the last
+  // safepoint, but if it's still alive then make it not_entrant.
+  nmethod* nm = CodeCache::find_nmethod(caller_frame.pc());
+  if (nm != NULL) {
+    nm->make_not_entrant();
+  }
+
+  Deoptimization::deoptimize_frame(thread, caller_frame.id());
+
+  // Return to the now deoptimized frame.
+}
+JRT_END
+
+#endif
+
 //
 // Entry point for compiled code. We want to patch a nmethod.
 // We don't do a normal VM transition here because we want to
diff --git a/hotspot/src/share/vm/c1/c1_globals.hpp b/hotspot/src/share/vm/c1/c1_globals.hpp
index 8f7f9f61c90..0e2d926bdfb 100644
--- a/hotspot/src/share/vm/c1/c1_globals.hpp
+++ b/hotspot/src/share/vm/c1/c1_globals.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2015, 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_C1_C1_GLOBALS_HPP
 #define SHARE_VM_C1_C1_GLOBALS_HPP
 
@@ -29,6 +35,9 @@
 #ifdef TARGET_ARCH_x86
 # include "c1_globals_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "c1_globals_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "c1_globals_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp b/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp
index f067419ffcd..5aa19dc84f0 100644
--- a/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp
+++ b/hotspot/src/share/vm/classfile/bytecodeAssembler.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 
 #include "classfile/bytecodeAssembler.hpp"
@@ -32,6 +38,12 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_sparc
 # include "bytes_sparc.hpp"
 #endif
diff --git a/hotspot/src/share/vm/classfile/classFileStream.hpp b/hotspot/src/share/vm/classfile/classFileStream.hpp
index 9632c8c8c24..fad25c44fc3 100644
--- a/hotspot/src/share/vm/classfile/classFileStream.hpp
+++ b/hotspot/src/share/vm/classfile/classFileStream.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_CLASSFILE_CLASSFILESTREAM_HPP
 #define SHARE_VM_CLASSFILE_CLASSFILESTREAM_HPP
 
@@ -29,6 +35,12 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "bytes_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/classfile/stackMapTable.hpp b/hotspot/src/share/vm/classfile/stackMapTable.hpp
index a36a7ba3cfd..d7c1f086442 100644
--- a/hotspot/src/share/vm/classfile/stackMapTable.hpp
+++ b/hotspot/src/share/vm/classfile/stackMapTable.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP
 #define SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP
 
@@ -34,6 +40,12 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "bytes_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/classfile/verifier.cpp b/hotspot/src/share/vm/classfile/verifier.cpp
index 2dddd1fdedc..4a20d15f311 100644
--- a/hotspot/src/share/vm/classfile/verifier.cpp
+++ b/hotspot/src/share/vm/classfile/verifier.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/classFileStream.hpp"
 #include "classfile/javaClasses.hpp"
@@ -48,6 +54,12 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "bytes_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/code/codeBlob.cpp b/hotspot/src/share/vm/code/codeBlob.cpp
index aff2aaf0ca7..9ba76007cd0 100644
--- a/hotspot/src/share/vm/code/codeBlob.cpp
+++ b/hotspot/src/share/vm/code/codeBlob.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "code/codeBlob.hpp"
 #include "code/codeCache.hpp"
@@ -57,6 +63,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "nativeInst_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "nativeInst_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "nativeInst_loongarch.hpp"
+#endif
 #ifdef COMPILER1
 #include "c1/c1_Runtime1.hpp"
 #endif
diff --git a/hotspot/src/share/vm/code/compiledIC.hpp b/hotspot/src/share/vm/code/compiledIC.hpp
index f910f11886e..e282a3f3afe 100644
--- a/hotspot/src/share/vm/code/compiledIC.hpp
+++ b/hotspot/src/share/vm/code/compiledIC.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_CODE_COMPILEDIC_HPP
 #define SHARE_VM_CODE_COMPILEDIC_HPP
 
@@ -45,6 +51,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "nativeInst_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "nativeInst_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "nativeInst_loongarch.hpp"
+#endif
 
 //-----------------------------------------------------------------------------
 // The CompiledIC represents a compiled inline cache.
diff --git a/hotspot/src/share/vm/code/relocInfo.hpp b/hotspot/src/share/vm/code/relocInfo.hpp
index ad55a2fd93a..813504821d3 100644
--- a/hotspot/src/share/vm/code/relocInfo.hpp
+++ b/hotspot/src/share/vm/code/relocInfo.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2021. These
+ * modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_CODE_RELOCINFO_HPP
 #define SHARE_VM_CODE_RELOCINFO_HPP
 
@@ -261,7 +267,11 @@ class relocInfo VALUE_OBJ_CLASS_SPEC {
     poll_return_type        = 11, // polling instruction for safepoints at return
     metadata_type           = 12, // metadata that used to be oops
     trampoline_stub_type    = 13, // stub-entry for trampoline
+#if !defined MIPS64
     yet_unused_type_1       = 14, // Still unused
+#else
+    internal_pc_type        = 14, // tag for internal data,??
+#endif
     data_prefix_tag         = 15, // tag for a prefix (carries data arguments)
     type_mask               = 15  // A mask which selects only the above values
   };
@@ -288,6 +298,7 @@ class relocInfo VALUE_OBJ_CLASS_SPEC {
   ;
 #endif
 
+#if defined MIPS64 && !defined ZERO
   #define APPLY_TO_RELOCATIONS(visitor) \
     visitor(oop) \
     visitor(metadata) \
@@ -300,9 +311,26 @@ class relocInfo VALUE_OBJ_CLASS_SPEC {
     visitor(internal_word) \
     visitor(poll) \
     visitor(poll_return) \
-    visitor(section_word) \
     visitor(trampoline_stub) \
+    visitor(internal_pc) \
 
+#else
+  #define APPLY_TO_RELOCATIONS(visitor) \
+    visitor(oop) \
+    visitor(metadata) \
+    visitor(virtual_call) \
+    visitor(opt_virtual_call) \
+    visitor(static_call) \
+    visitor(static_stub) \
+    visitor(runtime_call) \
+    visitor(external_word) \
+    visitor(internal_word) \
+    visitor(poll) \
+    visitor(poll_return) \
+    visitor(trampoline_stub) \
+    visitor(section_word) \
+
+#endif
 
  public:
   enum {
@@ -432,6 +460,12 @@ class relocInfo VALUE_OBJ_CLASS_SPEC {
 #endif
 #ifdef TARGET_ARCH_ppc
 # include "relocInfo_ppc.hpp"
+#endif
+#ifdef TARGET_ARCH_mips
+# include "relocInfo_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "relocInfo_loongarch.hpp"
 #endif
 
 
@@ -1024,6 +1058,15 @@ class metadata_Relocation : public DataRelocation {
   // Note:  metadata_value transparently converts Universe::non_metadata_word to NULL.
 };
 
+#if defined MIPS64
+// to handle the set_last_java_frame pc
+class internal_pc_Relocation : public Relocation {
+  relocInfo::relocType type() { return relocInfo::internal_pc_type; }
+ public:
+  address pc() { return pd_get_address_from_code(); }
+  void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest);
+};
+#endif
 
 class virtual_call_Relocation : public CallRelocation {
   relocInfo::relocType type() { return relocInfo::virtual_call_type; }
diff --git a/hotspot/src/share/vm/code/vmreg.hpp b/hotspot/src/share/vm/code/vmreg.hpp
index 07b595b60a0..5bc7131a8a0 100644
--- a/hotspot/src/share/vm/code/vmreg.hpp
+++ b/hotspot/src/share/vm/code/vmreg.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_CODE_VMREG_HPP
 #define SHARE_VM_CODE_VMREG_HPP
 
@@ -47,6 +53,12 @@
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/adGlobals_ppc_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_mips_64
+# include "adfiles/adGlobals_mips_64.hpp"
+#endif
+#ifdef TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/adGlobals_loongarch_64.hpp"
+#endif
 #endif
 
 //------------------------------VMReg------------------------------------------
@@ -158,6 +170,12 @@ friend class OptoReg;
 #ifdef TARGET_ARCH_x86
 # include "vmreg_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "vmreg_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "vmreg_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "vmreg_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/compiler/disassembler.cpp b/hotspot/src/share/vm/compiler/disassembler.cpp
index dfdd5f77e79..2dd0ff69ac4 100644
--- a/hotspot/src/share/vm/compiler/disassembler.cpp
+++ b/hotspot/src/share/vm/compiler/disassembler.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/javaClasses.hpp"
 #include "code/codeCache.hpp"
@@ -50,6 +56,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "depChecker_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "depChecker_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "depChecker_loongarch.hpp"
+#endif
 #ifdef SHARK
 #include "shark/sharkEntry.hpp"
 #endif
diff --git a/hotspot/src/share/vm/compiler/disassembler.hpp b/hotspot/src/share/vm/compiler/disassembler.hpp
index 168851cc264..8b632748f23 100644
--- a/hotspot/src/share/vm/compiler/disassembler.hpp
+++ b/hotspot/src/share/vm/compiler/disassembler.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_COMPILER_DISASSEMBLER_HPP
 #define SHARE_VM_COMPILER_DISASSEMBLER_HPP
 
@@ -95,6 +101,12 @@ class Disassembler {
 #endif
 #ifdef TARGET_ARCH_ppc
 # include "disassembler_ppc.hpp"
+#endif
+#ifdef TARGET_ARCH_mips
+# include "disassembler_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "disassembler_loongarch.hpp"
 #endif
 
 
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp
index 733b5c91ad9..678a1ee8367 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp
@@ -86,6 +86,9 @@ class CardTableExtension : public CardTableModRefBS {
   void inline_write_ref_field_gc(void* field, oop new_val) {
     jbyte* byte = byte_for(field);
     *byte = youngergen_card;
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+      if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
   }
 
   // Adaptive size policy support
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp
index 1dde10746d2..8b800b31c55 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parMarkBitMap.cpp
@@ -105,6 +105,9 @@ ParMarkBitMap::mark_obj(HeapWord* addr, size_t size)
     assert(end_bit_ok, "concurrency problem");
     DEBUG_ONLY(Atomic::inc_ptr(&mark_bitmap_count));
     DEBUG_ONLY(Atomic::add_ptr(size, &mark_bitmap_size));
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
     return true;
   }
   return false;
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp
index 6cf76353d9c..4d34bc209bd 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.inline.hpp
@@ -33,6 +33,9 @@ void ParCompactionManager::push_objarray(oop obj, size_t index)
   ObjArrayTask task(obj, index);
   assert(task.is_valid(), "bad ObjArrayTask");
   _objarray_stack.push(task);
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+  if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
 }
 
 void ParCompactionManager::push_region(size_t index)
@@ -44,6 +47,9 @@ void ParCompactionManager::push_region(size_t index)
   assert(region_ptr->_pushed++ == 0, "should only be pushed once");
 #endif
   region_stack()->push(index);
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+  if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
 }
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSCOMPACTIONMANAGER_INLINE_HPP
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
index 0fa980ef83c..2f66493e0a8 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
@@ -499,6 +499,9 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len)
   if (beg_region == end_region) {
     // All in one region.
     _region_data[beg_region].add_live_obj(len);
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
     return;
   }
 
@@ -517,6 +520,9 @@ void ParallelCompactData::add_obj(HeapWord* addr, size_t len)
   const size_t end_ofs = region_offset(addr + len - 1);
   _region_data[end_region].set_partial_obj_size(end_ofs + 1);
   _region_data[end_region].set_partial_obj_addr(addr);
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
 }
 
 void
@@ -3229,6 +3235,9 @@ void PSParallelCompact::fill_blocks(size_t region_idx)
     if (new_block != cur_block) {
       cur_block = new_block;
       sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits));
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+      if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
     }
 
     const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end);
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
index 881f380ceab..461b83930ff 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp
@@ -1329,6 +1329,9 @@ inline bool PSParallelCompact::mark_obj(oop obj) {
   const int obj_size = obj->size();
   if (mark_bitmap()->mark_obj(obj, obj_size)) {
     _summary_data.add_obj(obj, obj_size);
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
     return true;
   } else {
     return false;
@@ -1363,6 +1366,9 @@ inline void PSParallelCompact::mark_and_push(ParCompactionManager* cm, T* p) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
     if (mark_bitmap()->is_unmarked(obj) && mark_obj(obj)) {
       cm->push(obj);
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+      if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
     }
   }
 }
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
index a33132009c3..291019660a8 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
@@ -41,8 +41,9 @@ template <class T>
 inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) {
   if (p != NULL) { // XXX: error if p != NULL here
     oop o = oopDesc::load_decode_heap_oop_not_null(p);
-    if (o->is_forwarded()) {
-      o = o->forwardee();
+    markOop m = o->mark();
+    if (m->is_marked()) {
+      o = (oop) m->decode_pointer();
       // Card mark
       if (PSScavenge::is_obj_in_young(o)) {
         PSScavenge::card_table()->inline_write_ref_field_gc(p, o);
@@ -102,11 +103,19 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
 
   oop new_obj = NULL;
 
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+  if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+
   // NOTE! We must be very careful with any methods that access the mark
   // in o. There may be multiple threads racing on it, and it may be forwarded
   // at any time. Do not use oop methods for accessing the mark!
   markOop test_mark = o->mark();
 
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+  if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+
   // The same test as "o->is_forwarded()"
   if (!test_mark->is_marked()) {
     bool new_obj_is_tenured = false;
@@ -141,6 +150,10 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
             }
           }
         }
+
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+        if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
       }
     }
 
@@ -200,6 +213,9 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
 
     // Copy obj
     Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size);
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
 
     // Now we have to CAS in the header.
     if (o->cas_forward_to(new_obj, test_mark)) {
@@ -247,6 +263,10 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
       // don't update this before the unallocation!
       new_obj = o->forwardee();
     }
+
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
   } else {
     assert(o->is_forwarded(), "Sanity");
     new_obj = o->forwardee();
diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp
index 1a722a7ca72..4980be3946c 100644
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psScavenge.inline.hpp
@@ -71,14 +71,22 @@ inline void PSScavenge::copy_and_push_safe_barrier(PSPromotionManager* pm,
   assert(should_scavenge(p, true), "revisiting object?");
 
   oop o = oopDesc::load_decode_heap_oop_not_null(p);
-  oop new_obj = o->is_forwarded()
-        ? o->forwardee()
-        : pm->copy_to_survivor_space<promote_immediately>(o);
+#if defined MIPS || defined LOONGARCH
+  if (oopDesc::is_null(o)) return;
+#endif
+
+  oop new_obj;
+  markOop m = o->mark();
+  if (m->is_marked()) {
+    new_obj = (oop) m->decode_pointer();
+  } else {
+    new_obj = pm->copy_to_survivor_space<promote_immediately>(o);
+  }
 
 #ifndef PRODUCT
   // This code must come after the CAS test, or it will print incorrect
   // information.
-  if (TraceScavenge &&  o->is_forwarded()) {
+  if (TraceScavenge && m->is_marked()) {
     gclog_or_tty->print_cr("{%s %s " PTR_FORMAT " -> " PTR_FORMAT " (%d)}",
        "forwarding",
        new_obj->klass()->internal_name(), p2i((void *)o), p2i((void *)new_obj), new_obj->size());
@@ -138,8 +146,9 @@ class PSScavengeFromKlassClosure: public OopClosure {
 
       oop o = *p;
       oop new_obj;
-      if (o->is_forwarded()) {
-        new_obj = o->forwardee();
+      markOop m = o->mark();
+      if (m->is_marked()) {
+        new_obj = (oop) m->decode_pointer();
       } else {
         new_obj = _pm->copy_to_survivor_space</*promote_immediately=*/false>(o);
       }
diff --git a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp
index e14c50bf01c..8b3860070c1 100644
--- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp
+++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_ABSTRACTINTERPRETER_HPP
 #define SHARE_VM_INTERPRETER_ABSTRACTINTERPRETER_HPP
 
@@ -42,6 +48,10 @@
 # include "interp_masm_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "interp_masm_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "interp_masm_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "interp_masm_loongarch_64.hpp"
 #endif
 
 // This file contains the platform-independent parts
diff --git a/hotspot/src/share/vm/interpreter/bytecode.hpp b/hotspot/src/share/vm/interpreter/bytecode.hpp
index 7e55fd009a2..a06dcd58bc3 100644
--- a/hotspot/src/share/vm/interpreter/bytecode.hpp
+++ b/hotspot/src/share/vm/interpreter/bytecode.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_BYTECODE_HPP
 #define SHARE_VM_INTERPRETER_BYTECODE_HPP
 
@@ -31,6 +37,12 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "bytes_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp
index 28843715c75..c17fe8d7e05 100644
--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp
+++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_HPP
 #define SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_HPP
 
@@ -35,6 +41,9 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "bytes_aarch64.hpp"
 #endif
@@ -592,6 +601,12 @@ void print();
 #ifdef TARGET_ARCH_x86
 # include "bytecodeInterpreter_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytecodeInterpreter_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytecodeInterpreter_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "bytecodeInterpreter_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp
index f5db0b4d9d2..8adbf95acb5 100644
--- a/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp
+++ b/hotspot/src/share/vm/interpreter/bytecodeInterpreter.inline.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_INLINE_HPP
 #define SHARE_VM_INTERPRETER_BYTECODEINTERPRETER_INLINE_HPP
 
@@ -46,6 +52,12 @@
 #ifdef TARGET_ARCH_x86
 # include "bytecodeInterpreter_x86.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytecodeInterpreter_mips.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytecodeInterpreter_loongarch.inline.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "bytecodeInterpreter_aarch64.inline.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/bytecodeStream.hpp b/hotspot/src/share/vm/interpreter/bytecodeStream.hpp
index b814b88d5df..e1f2421600c 100644
--- a/hotspot/src/share/vm/interpreter/bytecodeStream.hpp
+++ b/hotspot/src/share/vm/interpreter/bytecodeStream.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_BYTECODESTREAM_HPP
 #define SHARE_VM_INTERPRETER_BYTECODESTREAM_HPP
 
@@ -32,6 +38,12 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "bytes_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/bytecodes.cpp b/hotspot/src/share/vm/interpreter/bytecodes.cpp
index ce5632ea19b..7f8e8801997 100644
--- a/hotspot/src/share/vm/interpreter/bytecodes.cpp
+++ b/hotspot/src/share/vm/interpreter/bytecodes.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "interpreter/bytecodes.hpp"
 #include "memory/resourceArea.hpp"
@@ -29,6 +35,12 @@
 #ifdef TARGET_ARCH_x86
 # include "bytes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "bytes_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/bytecodes.hpp b/hotspot/src/share/vm/interpreter/bytecodes.hpp
index c3463cd76df..bdf4c487f00 100644
--- a/hotspot/src/share/vm/interpreter/bytecodes.hpp
+++ b/hotspot/src/share/vm/interpreter/bytecodes.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_BYTECODES_HPP
 #define SHARE_VM_INTERPRETER_BYTECODES_HPP
 
@@ -292,6 +298,12 @@ class Bytecodes: AllStatic {
 #ifdef TARGET_ARCH_x86
 # include "bytecodes_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytecodes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytecodes_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "bytecodes_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp
index 6a6447503cf..f9c540fb4a1 100644
--- a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp
+++ b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_CPPINTERPRETER_HPP
 #define SHARE_VM_INTERPRETER_CPPINTERPRETER_HPP
 
@@ -84,6 +90,12 @@ class CppInterpreter: public AbstractInterpreter {
 #ifdef TARGET_ARCH_x86
 # include "cppInterpreter_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "cppInterpreter_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "cppInterpreter_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "cppInterpreter_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp
index 6a08a3f43f7..1fd19994d76 100644
--- a/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp
+++ b/hotspot/src/share/vm/interpreter/cppInterpreterGenerator.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_CPPINTERPRETERGENERATOR_HPP
 #define SHARE_VM_INTERPRETER_CPPINTERPRETERGENERATOR_HPP
 
@@ -50,6 +56,12 @@ class CppInterpreterGenerator: public AbstractInterpreterGenerator {
 #ifdef TARGET_ARCH_x86
 # include "cppInterpreterGenerator_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "cppInterpreterGenerator_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "cppInterpreterGenerator_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "cppInterpreterGenerator_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/interpreter.hpp b/hotspot/src/share/vm/interpreter/interpreter.hpp
index ebfb68d36b4..610949f3f77 100644
--- a/hotspot/src/share/vm/interpreter/interpreter.hpp
+++ b/hotspot/src/share/vm/interpreter/interpreter.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_INTERPRETER_HPP
 #define SHARE_VM_INTERPRETER_INTERPRETER_HPP
 
@@ -148,6 +154,12 @@ class Interpreter: public CC_INTERP_ONLY(CppInterpreter) NOT_CC_INTERP(TemplateI
 #ifdef TARGET_ARCH_x86
 # include "interpreter_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "interpreter_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "interpreter_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "interpreter_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp
index 1dc7cb29833..92bbe6b4407 100644
--- a/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp
+++ b/hotspot/src/share/vm/interpreter/interpreterGenerator.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_INTERPRETERGENERATOR_HPP
 #define SHARE_VM_INTERPRETER_INTERPRETERGENERATOR_HPP
 
@@ -44,6 +50,12 @@ InterpreterGenerator(StubQueue* _code);
 #ifdef TARGET_ARCH_x86
 # include "interpreterGenerator_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "interpreterGenerator_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "interpreterGenerator_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "interpreterGenerator_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp
index 5d2845383ca..f48622f67ef 100644
--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp
+++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -59,6 +65,12 @@
 #ifdef TARGET_ARCH_x86
 # include "vm_version_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "vm_version_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "vm_version_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "vm_version_aarch64.hpp"
 #endif
@@ -1290,7 +1302,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth
   // preparing the same method will be sure to see non-null entry & mirror.
 IRT_END
 
-#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64)
+#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH)
 IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address))
   if (src_address == dest_address) {
     return;
diff --git a/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp b/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp
index 472bf4d94cc..9a98d5559c8 100644
--- a/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp
+++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP
 #define SHARE_VM_INTERPRETER_INTERPRETERRUNTIME_HPP
 
@@ -156,7 +162,7 @@ class InterpreterRuntime: AllStatic {
                                         Method* method,
                                         intptr_t* from, intptr_t* to);
 
-#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64)
+#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH)
   // Popframe support (only needed on x86, AMD64 and ARM)
   static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address);
 #endif
@@ -165,6 +171,12 @@ class InterpreterRuntime: AllStatic {
 #ifdef TARGET_ARCH_x86
 # include "interpreterRT_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "interpreterRT_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "interpreterRT_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "interpreterRT_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp
index 5f76dca8a6f..757860f43cc 100644
--- a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp
+++ b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETER_HPP
 #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETER_HPP
 
@@ -190,6 +196,12 @@ class TemplateInterpreter: public AbstractInterpreter {
 #ifdef TARGET_ARCH_x86
 # include "templateInterpreter_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "templateInterpreter_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "templateInterpreter_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "templateInterpreter_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp
index bd94bd02bc3..28ca437eb2c 100644
--- a/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp
+++ b/hotspot/src/share/vm/interpreter/templateInterpreterGenerator.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP
 #define SHARE_VM_INTERPRETER_TEMPLATEINTERPRETERGENERATOR_HPP
 
@@ -89,6 +95,12 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator {
 #ifdef TARGET_ARCH_x86
 # include "templateInterpreterGenerator_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "templateInterpreterGenerator_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "templateInterpreterGenerator_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "templateInterpreterGenerator_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/interpreter/templateTable.hpp b/hotspot/src/share/vm/interpreter/templateTable.hpp
index 60d243c16a6..1b73822abd6 100644
--- a/hotspot/src/share/vm/interpreter/templateTable.hpp
+++ b/hotspot/src/share/vm/interpreter/templateTable.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_INTERPRETER_TEMPLATETABLE_HPP
 #define SHARE_VM_INTERPRETER_TEMPLATETABLE_HPP
 
@@ -40,6 +46,10 @@
 # include "interp_masm_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "interp_masm_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "interp_masm_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "interp_masm_loongarch_64.hpp"
 #endif
 
 #ifndef CC_INTERP
@@ -367,6 +377,10 @@ class TemplateTable: AllStatic {
 # include "templateTable_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "templateTable_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "templateTable_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "templateTable_loongarch_64.hpp"
 #endif
 
 };
diff --git a/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp b/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp
index 6d9ab39fdda..f4e9a4ca697 100644
--- a/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp
+++ b/hotspot/src/share/vm/jfr/utilities/jfrBigEndian.hpp
@@ -116,7 +116,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
 inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
 #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
   return true;
-#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
+#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(MIPS) || defined(LOONGARCH)
   return false;
 #else
   #warning "Unconfigured platform"
diff --git a/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp b/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp
index 42a8b719cd8..f08f6ee13ab 100644
--- a/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp
+++ b/hotspot/src/share/vm/jfr/writers/jfrEncoders.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_JFR_WRITERS_JFRENCODERS_HPP
 #define SHARE_VM_JFR_WRITERS_JFRENCODERS_HPP
 
@@ -46,6 +52,12 @@
 #ifdef TARGET_ARCH_aarch64
 # include "bytes_aarch64.hpp"
 #endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
 
 //
 // The Encoding policy prescribes a template
diff --git a/hotspot/src/share/vm/memory/barrierSet.hpp b/hotspot/src/share/vm/memory/barrierSet.hpp
index 13ff9b2738f..081b70744d1 100644
--- a/hotspot/src/share/vm/memory/barrierSet.hpp
+++ b/hotspot/src/share/vm/memory/barrierSet.hpp
@@ -27,6 +27,7 @@
 
 #include "memory/memRegion.hpp"
 #include "oops/oopsHierarchy.hpp"
+#include "runtime/orderAccess.hpp"
 
 // This class provides the interface between a barrier implementation and
 // the rest of the system.
@@ -95,8 +96,16 @@ class BarrierSet: public CHeapObj<mtGC> {
   // Keep this private so as to catch violations at build time.
   virtual void write_ref_field_pre_work(     void* field, oop new_val) { guarantee(false, "Not needed"); };
 protected:
-  virtual void write_ref_field_pre_work(      oop* field, oop new_val) {};
-  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {};
+  virtual void write_ref_field_pre_work(      oop* field, oop new_val) {
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+      if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+  };
+  virtual void write_ref_field_pre_work(narrowOop* field, oop new_val) {
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+      if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+  };
 public:
 
   // ...then the post-write version.
@@ -132,9 +141,17 @@ class BarrierSet: public CHeapObj<mtGC> {
 
   // Below length is the # array elements being written
   virtual void write_ref_array_pre(oop* dst, int length,
-                                   bool dest_uninitialized = false) {}
+                                   bool dest_uninitialized = false) {
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+      if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+  }
   virtual void write_ref_array_pre(narrowOop* dst, int length,
-                                   bool dest_uninitialized = false) {}
+                                   bool dest_uninitialized = false) {
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+      if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+}
   // Below count is the # array elements being written, starting
   // at the address "start", which may not necessarily be HeapWord-aligned
   inline void write_ref_array(HeapWord* start, size_t count);
diff --git a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp
index 01e46888366..80bd1518737 100644
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.hpp
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.hpp
@@ -316,6 +316,9 @@ class CardTableModRefBS: public ModRefBarrierSet {
 
   inline void inline_write_ref_array(MemRegion mr) {
     dirty_MemRegion(mr);
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
   }
 protected:
   void write_ref_array_work(MemRegion mr) {
@@ -329,7 +332,11 @@ class CardTableModRefBS: public ModRefBarrierSet {
 
   // *** Card-table-barrier-specific things.
 
-  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {}
+  template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+  }
 
   template <class T> inline void inline_write_ref_field(T* field, oop newVal, bool release) {
     jbyte* byte = byte_for((void*)field);
@@ -339,6 +346,9 @@ class CardTableModRefBS: public ModRefBarrierSet {
     } else {
       *byte = dirty_card;
     }
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
   }
 
   // These are used by G1, when it uses the card table as a temporary data
diff --git a/hotspot/src/share/vm/memory/cardTableRS.cpp b/hotspot/src/share/vm/memory/cardTableRS.cpp
index fb33a708aec..da22acba47e 100644
--- a/hotspot/src/share/vm/memory/cardTableRS.cpp
+++ b/hotspot/src/share/vm/memory/cardTableRS.cpp
@@ -252,6 +252,9 @@ void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
 // cur_youngergen_and_prev_nonclean_card ==> no change.
 void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) {
   jbyte* entry = ct_bs()->byte_for(field);
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+  if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
   do {
     jbyte entry_val = *entry;
     // We put this first because it's probably the most common case.
@@ -266,7 +269,12 @@ void CardTableRS::write_ref_field_gc_par(void* field, oop new_val) {
       jbyte new_val = cur_youngergen_and_prev_nonclean_card;
       jbyte res = Atomic::cmpxchg(new_val, entry, entry_val);
       // Did the CAS succeed?
-      if (res == entry_val) return;
+      if (res == entry_val) {
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+         if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+         return;
+      }
       // Otherwise, retry, to see the new value.
       continue;
     } else {
diff --git a/hotspot/src/share/vm/memory/cardTableRS.hpp b/hotspot/src/share/vm/memory/cardTableRS.hpp
index 25884feac8b..5d4e77f2693 100644
--- a/hotspot/src/share/vm/memory/cardTableRS.hpp
+++ b/hotspot/src/share/vm/memory/cardTableRS.hpp
@@ -121,7 +121,14 @@ class CardTableRS: public GenRemSet {
 
   void inline_write_ref_field_gc(void* field, oop new_val) {
     jbyte* byte = _ct_bs->byte_for(field);
-    *byte = youngergen_card;
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+   *byte = youngergen_card;
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+   if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+
   }
   void write_ref_field_gc_work(void* field, oop new_val) {
     inline_write_ref_field_gc(field, new_val);
diff --git a/hotspot/src/share/vm/memory/metaspace.cpp b/hotspot/src/share/vm/memory/metaspace.cpp
index fb0564ac276..9cec7d43750 100644
--- a/hotspot/src/share/vm/memory/metaspace.cpp
+++ b/hotspot/src/share/vm/memory/metaspace.cpp
@@ -21,6 +21,13 @@
  * questions.
  *
  */
+
+/*
+ * This file has been modified by Loongson Technology in 2021. These
+ * modifications are Copyright (c) 2021 Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "gc_interface/collectedHeap.hpp"
 #include "memory/allocation.hpp"
@@ -3065,12 +3072,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
   // Don't use large pages for the class space.
   bool large_pages = false;
 
-#ifndef AARCH64
+#if !defined(AARCH64) && !defined(MIPS64) && !defined(LOONGARCH)
   ReservedSpace metaspace_rs = ReservedSpace(compressed_class_space_size(),
                                              _reserve_alignment,
                                              large_pages,
                                              requested_addr, 0);
-#else // AARCH64
+#else // defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH)
   ReservedSpace metaspace_rs;
 
   // Our compressed klass pointers may fit nicely into the lower 32
@@ -3107,7 +3114,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
     }
   }
 
-#endif // AARCH64
+#endif // defined(AARCH64) || defined(MIPS64) || defined(LOONGARCH)
 
   if (!metaspace_rs.is_reserved()) {
 #if INCLUDE_CDS
diff --git a/hotspot/src/share/vm/oops/constantPool.hpp b/hotspot/src/share/vm/oops/constantPool.hpp
index ec111df04eb..6c0607105c1 100644
--- a/hotspot/src/share/vm/oops/constantPool.hpp
+++ b/hotspot/src/share/vm/oops/constantPool.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_OOPS_CONSTANTPOOLOOP_HPP
 #define SHARE_VM_OOPS_CONSTANTPOOLOOP_HPP
 
@@ -50,6 +56,13 @@
 #ifdef TARGET_ARCH_ppc
 # include "bytes_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
+
 
 // A constantPool is an array containing class constants as described in the
 // class file.
diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp
index acef3348499..23fc0b99881 100644
--- a/hotspot/src/share/vm/oops/klass.hpp
+++ b/hotspot/src/share/vm/oops/klass.hpp
@@ -32,6 +32,9 @@
 #include "oops/klassPS.hpp"
 #include "oops/metadata.hpp"
 #include "oops/oop.hpp"
+#if defined MIPS || defined LOONGARCH
+#include "runtime/orderAccess.hpp"
+#endif
 #include "utilities/accessFlags.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
@@ -289,8 +292,18 @@ class Klass : public Metadata {
   // The Klasses are not placed in the Heap, so the Card Table or
   // the Mod Union Table can't be used to mark when klasses have modified oops.
   // The CT and MUT bits saves this information for the individual Klasses.
-  void record_modified_oops()            { _modified_oops = 1; }
-  void clear_modified_oops()             { _modified_oops = 0; }
+  void record_modified_oops()            {
+    _modified_oops = 1;
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+  }
+  void clear_modified_oops()             {
+    _modified_oops = 0;
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) OrderAccess::fence();
+#endif
+  }
   bool has_modified_oops()               { return _modified_oops == 1; }
 
   void accumulate_modified_oops()        { if (has_modified_oops()) _accumulated_modified_oops = 1; }
diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp
index 0678c6b3fbb..1cb20e351f6 100644
--- a/hotspot/src/share/vm/oops/oop.hpp
+++ b/hotspot/src/share/vm/oops/oop.hpp
@@ -72,7 +72,13 @@ class oopDesc {
   markOop  mark() const         { return _mark; }
   markOop* mark_addr() const    { return (markOop*) &_mark; }
 
-  void set_mark(volatile markOop m)      { _mark = m;   }
+  void set_mark(volatile markOop m)      {
+#if (defined MIPS || defined LOONGARCH) && !defined ZERO
+    if (UseSyncLevel >= 2000) release_set_mark(m);
+    else
+#endif
+    _mark = m;
+  }
 
   void    release_set_mark(markOop m);
   markOop cas_set_mark(markOop new_mark, markOop old_mark);
diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp
index beec739d388..8660c1e3312 100644
--- a/hotspot/src/share/vm/oops/oop.inline.hpp
+++ b/hotspot/src/share/vm/oops/oop.inline.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP
 #define SHARE_VM_OOPS_OOP_INLINE_HPP
 
@@ -60,6 +66,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "bytes_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
 
 // Implementation of all inlined member functions defined in oop.hpp
 // We need a separate file to avoid circular references
diff --git a/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp b/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp
index 8a4603944ea..b28bb99189a 100644
--- a/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp
+++ b/hotspot/src/share/vm/oops/oop.pcgc.inline.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022. These
+ * modifications are Copyright (c) 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP
 #define SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP
 
@@ -75,7 +81,7 @@ inline oop oopDesc::forward_to_atomic(oop p) {
     // forwarding pointer.
     oldMark = curMark;
   }
-  return forwardee();
+  return (oop) oldMark->decode_pointer();
 }
 
 #endif // SHARE_VM_OOPS_OOP_PCGC_INLINE_HPP
diff --git a/hotspot/src/share/vm/opto/buildOopMap.cpp b/hotspot/src/share/vm/opto/buildOopMap.cpp
index 91642f1d7dd..5df185df04c 100644
--- a/hotspot/src/share/vm/opto/buildOopMap.cpp
+++ b/hotspot/src/share/vm/opto/buildOopMap.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "compiler/oopMap.hpp"
 #include "opto/addnode.hpp"
@@ -50,6 +56,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "vmreg_ppc.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "vmreg_mips.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "vmreg_loongarch.inline.hpp"
+#endif
 
 // The functions in this file builds OopMaps after all scheduling is done.
 //
diff --git a/hotspot/src/share/vm/opto/bytecodeInfo.cpp b/hotspot/src/share/vm/opto/bytecodeInfo.cpp
index 7fd615d35f2..ad472e87221 100644
--- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp
+++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp
@@ -361,9 +361,20 @@ bool InlineTree::try_to_inline(ciMethod* callee_method, ciMethod* caller_method,
     } else if (forced_inline()) {
       // Inlining was forced by CompilerOracle, ciReplay or annotation
     } else if (profile.count() == 0) {
+#ifndef MIPS
       // don't inline unreached call sites
        set_msg("call site not reached");
        return false;
+#else
+      ciMethodBlocks* blocks = caller_method->get_method_blocks();
+      // Check if the call site belongs to a start block:
+      // call sites in a start block must be reached before.
+      if (blocks->block_containing(0) != blocks->block_containing(jvms->bci())) {
+        // don't inline unreached call sites
+        set_msg("call site not reached");
+        return false;
+      }
+#endif
     }
   }
 
diff --git a/hotspot/src/share/vm/opto/c2_globals.hpp b/hotspot/src/share/vm/opto/c2_globals.hpp
index 82d2efef92c..d373b20456f 100644
--- a/hotspot/src/share/vm/opto/c2_globals.hpp
+++ b/hotspot/src/share/vm/opto/c2_globals.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_OPTO_C2_GLOBALS_HPP
 #define SHARE_VM_OPTO_C2_GLOBALS_HPP
 
@@ -35,6 +41,12 @@
 #ifdef TARGET_ARCH_sparc
 # include "c2_globals_sparc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "c2_globals_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "c2_globals_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_arm
 # include "c2_globals_arm.hpp"
 #endif
diff --git a/hotspot/src/share/vm/opto/c2compiler.cpp b/hotspot/src/share/vm/opto/c2compiler.cpp
index 137f49600d9..f689d64a386 100644
--- a/hotspot/src/share/vm/opto/c2compiler.cpp
+++ b/hotspot/src/share/vm/opto/c2compiler.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "opto/c2compiler.hpp"
 #include "opto/runtime.hpp"
@@ -39,6 +45,10 @@
 # include "adfiles/ad_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/ad_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/ad_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/ad_loongarch_64.hpp"
 #endif
 
 // register information defined by ADLC
diff --git a/hotspot/src/share/vm/opto/chaitin.hpp b/hotspot/src/share/vm/opto/chaitin.hpp
index de6d443cd30..0b27dc9335e 100644
--- a/hotspot/src/share/vm/opto/chaitin.hpp
+++ b/hotspot/src/share/vm/opto/chaitin.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022, These
+ * modifications are Copyright (c) 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_OPTO_CHAITIN_HPP
 #define SHARE_VM_OPTO_CHAITIN_HPP
 
@@ -136,8 +142,12 @@ class LRG : public ResourceObj {
 
   // Number of registers this live range uses when it colors
 private:
+#ifdef LOONGARCH64
+  uint16_t _num_regs;
+#else
   uint8 _num_regs;              // 2 for Longs and Doubles, 1 for all else
                                 // except _num_regs is kill count for fat_proj
+#endif
 public:
   int num_regs() const { return _num_regs; }
   void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
@@ -145,7 +155,11 @@ class LRG : public ResourceObj {
 private:
   // Number of physical registers this live range uses when it colors
   // Architecture and register-set dependent
+#ifdef LOONGARCH64
+  uint16_t _reg_pressure;
+#else
   uint8 _reg_pressure;
+#endif
 public:
   void set_reg_pressure(int i)  { _reg_pressure = i; }
   int      reg_pressure() const { return _reg_pressure; }
diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp
index ae22ba84d9f..9004dc0d727 100644
--- a/hotspot/src/share/vm/opto/compile.cpp
+++ b/hotspot/src/share/vm/opto/compile.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "asm/macroAssembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
@@ -81,6 +87,10 @@
 # include "adfiles/ad_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/ad_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/ad_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/ad_loongarch_64.hpp"
 #endif
 
 // -------------------- Compile::mach_constant_base_node -----------------------
diff --git a/hotspot/src/share/vm/opto/compile.hpp b/hotspot/src/share/vm/opto/compile.hpp
index b4f4cfefed3..d263ee2fc45 100644
--- a/hotspot/src/share/vm/opto/compile.hpp
+++ b/hotspot/src/share/vm/opto/compile.hpp
@@ -1025,7 +1025,7 @@ class Compile : public Phase {
   bool           in_scratch_emit_size() const   { return _in_scratch_emit_size;     }
 
   enum ScratchBufferBlob {
-    MAX_inst_size       = 1024,
+    MAX_inst_size       = 1024 MIPS64_ONLY(* 2) LOONGARCH64_ONLY(*2),
     MAX_locs_size       = 128, // number of relocInfo elements
     MAX_const_size      = 128,
     MAX_stubs_size      = 128
diff --git a/hotspot/src/share/vm/opto/gcm.cpp b/hotspot/src/share/vm/opto/gcm.cpp
index f51484efb0f..12457b7c344 100644
--- a/hotspot/src/share/vm/opto/gcm.cpp
+++ b/hotspot/src/share/vm/opto/gcm.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "libadt/vectset.hpp"
 #include "memory/allocation.inline.hpp"
@@ -49,6 +55,10 @@
 # include "adfiles/ad_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/ad_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/ad_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/ad_loongarch_64.hpp"
 #endif
 
 
diff --git a/hotspot/src/share/vm/opto/lcm.cpp b/hotspot/src/share/vm/opto/lcm.cpp
index c6178a715b8..2d492568d96 100644
--- a/hotspot/src/share/vm/opto/lcm.cpp
+++ b/hotspot/src/share/vm/opto/lcm.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "memory/allocation.inline.hpp"
 #include "opto/block.hpp"
@@ -44,6 +50,10 @@
 # include "adfiles/ad_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/ad_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/ad_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/ad_loongarch_64.hpp"
 #endif
 
 // Optimization - Graph Style
diff --git a/hotspot/src/share/vm/opto/locknode.hpp b/hotspot/src/share/vm/opto/locknode.hpp
index b320f6bfb2f..4bfb0ff072f 100644
--- a/hotspot/src/share/vm/opto/locknode.hpp
+++ b/hotspot/src/share/vm/opto/locknode.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_OPTO_LOCKNODE_HPP
 #define SHARE_VM_OPTO_LOCKNODE_HPP
 
@@ -42,6 +48,10 @@
 # include "adfiles/ad_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/ad_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/ad_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/ad_loongarch_64.hpp"
 #endif
 
 //------------------------------BoxLockNode------------------------------------
diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp
index 6660b4b467b..dec46861d3a 100644
--- a/hotspot/src/share/vm/opto/matcher.cpp
+++ b/hotspot/src/share/vm/opto/matcher.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "memory/allocation.inline.hpp"
 #include "opto/addnode.hpp"
@@ -52,6 +58,10 @@
 # include "adfiles/ad_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/ad_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/ad_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/ad_loongarch_64.hpp"
 #endif
 
 OptoReg::Name OptoReg::c_frame_pointer;
diff --git a/hotspot/src/share/vm/opto/output.cpp b/hotspot/src/share/vm/opto/output.cpp
index 6032b72a9b2..7fb4dea28e9 100644
--- a/hotspot/src/share/vm/opto/output.cpp
+++ b/hotspot/src/share/vm/opto/output.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2021. These
+ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "asm/assembler.inline.hpp"
 #include "code/compiledIC.hpp"
@@ -844,6 +850,27 @@ void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) {
   // Add the safepoint in the DebugInfoRecorder
   if( !mach->is_MachCall() ) {
     mcall = NULL;
+#if defined(MIPS) || defined(LOONGARCH)
+    // safepoint_pc_offset should point to tha last instruction in safePoint.
+    // In X86 and sparc, their safePoints only contain one instruction.
+    // However, we should add current_offset with the size of safePoint in MIPS.
+    // 0x2d6ff22c: lw s2, 0x14(s2)
+    // last_pd->pc_offset()=308, pc_offset=304, bci=64
+    // last_pd->pc_offset()=312, pc_offset=312, bci=64
+    // src/hotspot/share/code/debugInfoRec.cpp:295, assert(last_pd->pc_offset() == pc_offset, "must be last pc")
+    //
+    // ;; Safepoint:
+    // ---> pc_offset=304
+    // 0x2d6ff230: lui at, 0x2b7a            ; OopMap{s2=Oop s5=Oop t4=Oop off=308}
+    //                                       ;*goto
+    //                                       ; - java.util.Hashtable::get@64 (line 353)
+    // ---> last_pd(308)
+    // 0x2d6ff234: lw at, 0xffffc100(at)     ;*goto
+    //                                       ; - java.util.Hashtable::get@64 (line 353)
+    //                                       ;   {poll}
+    // 0x2d6ff238: addiu s0, zero, 0x0
+    safepoint_pc_offset += sfn->size(_regalloc) - 4;
+#endif
     debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
   } else {
     mcall = mach->as_MachCall();
@@ -1502,6 +1529,22 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
       DEBUG_ONLY( uint instr_offset = cb->insts_size(); )
       n->emit(*cb, _regalloc);
       current_offset  = cb->insts_size();
+#if defined(MIPS) || defined(LOONGARCH)
+      if (!n->is_Proj() && (cb->insts()->end() != badAddress)) {
+        // For MIPS, the first instruction of the previous node (usually a instruction sequence) sometime
+        // is not the instruction which access memory. adjust is needed. previous_offset points to the
+        // instruction which access memory. Instruction size is 4. cb->insts_size() and
+        // cb->insts()->end() are the location of current instruction.
+        int adjust = 4;
+        NativeInstruction* inst = (NativeInstruction*) (cb->insts()->end() - 4);
+        if (inst->is_sync()) {
+          // a sync may be the last instruction, see store_B_immI_enc_sync
+          adjust += 4;
+          inst = (NativeInstruction*) (cb->insts()->end() - 8);
+        }
+        previous_offset = current_offset - adjust;
+      }
+#endif
 
       // Above we only verified that there is enough space in the instruction section.
       // However, the instruction may emit stubs that cause code buffer expansion.
diff --git a/hotspot/src/share/vm/opto/output.hpp b/hotspot/src/share/vm/opto/output.hpp
index ba728413632..37f954de9bf 100644
--- a/hotspot/src/share/vm/opto/output.hpp
+++ b/hotspot/src/share/vm/opto/output.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_OPTO_OUTPUT_HPP
 #define SHARE_VM_OPTO_OUTPUT_HPP
 
@@ -41,6 +47,10 @@
 # include "adfiles/ad_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/ad_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/ad_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/ad_loongarch_64.hpp"
 #endif
 
 class Arena;
diff --git a/hotspot/src/share/vm/opto/regmask.cpp b/hotspot/src/share/vm/opto/regmask.cpp
index 352ccfb9d95..9a656d03ee4 100644
--- a/hotspot/src/share/vm/opto/regmask.cpp
+++ b/hotspot/src/share/vm/opto/regmask.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "opto/compile.hpp"
 #include "opto/regmask.hpp"
@@ -39,6 +45,10 @@
 # include "adfiles/ad_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/ad_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/ad_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/ad_loongarch_64.hpp"
 #endif
 
 #define RM_SIZE _RM_SIZE /* a constant private to the class RegMask */
diff --git a/hotspot/src/share/vm/opto/regmask.hpp b/hotspot/src/share/vm/opto/regmask.hpp
index 5ceebb3fb86..6d08b687316 100644
--- a/hotspot/src/share/vm/opto/regmask.hpp
+++ b/hotspot/src/share/vm/opto/regmask.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_OPTO_REGMASK_HPP
 #define SHARE_VM_OPTO_REGMASK_HPP
 
@@ -42,6 +48,10 @@
 # include "adfiles/adGlobals_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/adGlobals_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/adGlobals_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/adGlobals_loongarch_64.hpp"
 #endif
 
 // Some fun naming (textual) substitutions:
diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp
index a43b37f2c56..f2bcafa2c52 100644
--- a/hotspot/src/share/vm/opto/runtime.cpp
+++ b/hotspot/src/share/vm/opto/runtime.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -82,6 +88,10 @@
 # include "adfiles/ad_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/ad_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/ad_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/ad_loongarch_64.hpp"
 #endif
 
 
diff --git a/hotspot/src/share/vm/opto/type.cpp b/hotspot/src/share/vm/opto/type.cpp
index 58572f137db..299d48b12ab 100644
--- a/hotspot/src/share/vm/opto/type.cpp
+++ b/hotspot/src/share/vm/opto/type.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2022, These
+ * modifications are Copyright (c) 2022, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "ci/ciMethodData.hpp"
 #include "ci/ciTypeFlow.hpp"
@@ -68,6 +74,16 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_RegD,              relocInfo::none          },  // VectorD
   { Bad,             T_ILLEGAL,    "vectorx:",      false, 0,                    relocInfo::none          },  // VectorX
   { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
+#elif defined(MIPS64)
+  { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
+  { Bad,             T_ILLEGAL,    "vectord:",      false, Op_VecD,              relocInfo::none          },  // VectorD
+  { Bad,             T_ILLEGAL,    "vectorx:",      false, 0,                    relocInfo::none          },  // VectorX
+  { Bad,             T_ILLEGAL,    "vectory:",      false, 0,                    relocInfo::none          },  // VectorY
+#elif defined(LOONGARCH64)
+  { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
+  { Bad,             T_ILLEGAL,    "vectord:",      false, 0,                    relocInfo::none          },  // VectorD
+  { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
+  { Bad,             T_ILLEGAL,    "vectory:",      false, Op_VecY,              relocInfo::none          },  // VectorY
 #elif defined(PPC64)
   { Bad,             T_ILLEGAL,    "vectors:",      false, 0,                    relocInfo::none          },  // VectorS
   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_RegL,              relocInfo::none          },  // VectorD
diff --git a/hotspot/src/share/vm/prims/jniCheck.cpp b/hotspot/src/share/vm/prims/jniCheck.cpp
index 593ca8a1e34..82813b71fe2 100644
--- a/hotspot/src/share/vm/prims/jniCheck.cpp
+++ b/hotspot/src/share/vm/prims/jniCheck.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -55,6 +61,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "jniTypes_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "jniTypes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "jniTypes_loongarch.hpp"
+#endif
 
 // Complain every extra number of unplanned local refs
 #define CHECK_JNI_LOCAL_REF_CAP_WARN_THRESHOLD 32
diff --git a/hotspot/src/share/vm/prims/jni_md.h b/hotspot/src/share/vm/prims/jni_md.h
index 6209a664496..271715d4a29 100644
--- a/hotspot/src/share/vm/prims/jni_md.h
+++ b/hotspot/src/share/vm/prims/jni_md.h
@@ -22,6 +22,12 @@
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 
 /* Switch to the correct jni_md.h file without reliance on -I options. */
 #ifdef TARGET_ARCH_x86
@@ -42,6 +48,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "jni_ppc.h"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "jni_mips.h"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "jni_loongarch.h"
+#endif
 
 
 /*
diff --git a/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp b/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp
index ab31d0d91e5..0d8570b7649 100644
--- a/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp
+++ b/hotspot/src/share/vm/prims/jvmtiClassFileReconstituter.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/symbolTable.hpp"
 #include "interpreter/bytecodeStream.hpp"
@@ -46,6 +52,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "bytes_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
 // FIXME: add Deprecated attribute
 // FIXME: fix Synthetic attribute
 // FIXME: per Serguei, add error return handling for ConstantPool::copy_cpool_bytes()
diff --git a/hotspot/src/share/vm/prims/methodHandles.hpp b/hotspot/src/share/vm/prims/methodHandles.hpp
index db6e06180d1..841082859a4 100644
--- a/hotspot/src/share/vm/prims/methodHandles.hpp
+++ b/hotspot/src/share/vm/prims/methodHandles.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_PRIMS_METHODHANDLES_HPP
 #define SHARE_VM_PRIMS_METHODHANDLES_HPP
 
@@ -198,6 +204,13 @@ class MethodHandles: AllStatic {
 #ifdef TARGET_ARCH_ppc
 # include "methodHandles_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "methodHandles_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "methodHandles_loongarch.hpp"
+#endif
+
 
   // Tracing
   static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
diff --git a/hotspot/src/share/vm/runtime/atomic.inline.hpp b/hotspot/src/share/vm/runtime/atomic.inline.hpp
index 222f29cbf41..7c7c6edb27f 100644
--- a/hotspot/src/share/vm/runtime/atomic.inline.hpp
+++ b/hotspot/src/share/vm/runtime/atomic.inline.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP
 #define SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP
 
@@ -31,6 +37,12 @@
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "atomic_linux_x86.inline.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_mips
+# include "atomic_linux_mips.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_loongarch
+# include "atomic_linux_loongarch.inline.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "atomic_linux_sparc.inline.hpp"
 #endif
diff --git a/hotspot/src/share/vm/runtime/deoptimization.cpp b/hotspot/src/share/vm/runtime/deoptimization.cpp
index f91afdc4165..36a924fd4fb 100644
--- a/hotspot/src/share/vm/runtime/deoptimization.cpp
+++ b/hotspot/src/share/vm/runtime/deoptimization.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "code/debugInfoRec.hpp"
@@ -68,6 +74,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "vmreg_ppc.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "vmreg_mips.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "vmreg_loongarch.inline.hpp"
+#endif
 #ifdef COMPILER2
 #if defined AD_MD_HPP
 # include AD_MD_HPP
@@ -84,6 +96,12 @@
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/ad_ppc_64.hpp"
 #endif
+#ifdef TARGET_ARCH_MODEL_mips_64
+# include "adfiles/ad_mips_64.hpp"
+#endif
+#ifdef TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/ad_loongarch_64.hpp"
+#endif
 #endif // COMPILER2
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
diff --git a/hotspot/src/share/vm/runtime/dtraceJSDT.hpp b/hotspot/src/share/vm/runtime/dtraceJSDT.hpp
index db568def348..490c5f5a4e9 100644
--- a/hotspot/src/share/vm/runtime/dtraceJSDT.hpp
+++ b/hotspot/src/share/vm/runtime/dtraceJSDT.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_DTRACEJSDT_HPP
 #define SHARE_VM_RUNTIME_DTRACEJSDT_HPP
 
@@ -44,6 +50,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "nativeInst_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "nativeInst_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "nativeInst_loongarch.hpp"
+#endif
 
 class RegisteredProbes;
 typedef jlong OpaqueProbes;
diff --git a/hotspot/src/share/vm/runtime/frame.cpp b/hotspot/src/share/vm/runtime/frame.cpp
index 338b7ad3a7b..5a161133baf 100644
--- a/hotspot/src/share/vm/runtime/frame.cpp
+++ b/hotspot/src/share/vm/runtime/frame.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "compiler/abstractCompiler.hpp"
 #include "compiler/disassembler.hpp"
@@ -64,6 +70,13 @@
 #ifdef TARGET_ARCH_ppc
 # include "nativeInst_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "nativeInst_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "nativeInst_loongarch.hpp"
+#endif
+
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
diff --git a/hotspot/src/share/vm/runtime/frame.hpp b/hotspot/src/share/vm/runtime/frame.hpp
index 2d80ecc2085..4a9e6edb541 100644
--- a/hotspot/src/share/vm/runtime/frame.hpp
+++ b/hotspot/src/share/vm/runtime/frame.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_FRAME_HPP
 #define SHARE_VM_RUNTIME_FRAME_HPP
 
@@ -45,6 +51,10 @@
 # include "adfiles/adGlobals_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/adGlobals_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/adGlobals_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/adGlobals_loongarch_64.hpp"
 #endif
 #endif // COMPILER2
 #ifdef TARGET_ARCH_zero
@@ -489,6 +499,12 @@ class frame VALUE_OBJ_CLASS_SPEC {
 #ifdef TARGET_ARCH_x86
 # include "frame_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "frame_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "frame_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "frame_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/runtime/frame.inline.hpp b/hotspot/src/share/vm/runtime/frame.inline.hpp
index 710b82306ab..704cc8df8f2 100644
--- a/hotspot/src/share/vm/runtime/frame.inline.hpp
+++ b/hotspot/src/share/vm/runtime/frame.inline.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_FRAME_INLINE_HPP
 #define SHARE_VM_RUNTIME_FRAME_INLINE_HPP
 
@@ -49,6 +55,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "jniTypes_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "jniTypes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "jniTypes_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_zero
 # include "entryFrame_zero.hpp"
 # include "fakeStubFrame_zero.hpp"
@@ -115,6 +127,12 @@ inline oop* frame::interpreter_frame_temp_oop_addr() const {
 #ifdef TARGET_ARCH_ppc
 # include "frame_ppc.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "frame_mips.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "frame_loongarch.inline.hpp"
+#endif
 
 
 #endif // SHARE_VM_RUNTIME_FRAME_INLINE_HPP
diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
index 23ce8af5696..f36137aabfb 100644
--- a/hotspot/src/share/vm/runtime/globals.hpp
+++ b/hotspot/src/share/vm/runtime/globals.hpp
@@ -55,6 +55,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "globals_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "globals_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "globals_loongarch.hpp"
+#endif
 #ifdef TARGET_OS_FAMILY_linux
 # include "globals_linux.hpp"
 #endif
@@ -79,6 +85,12 @@
 #ifdef TARGET_OS_ARCH_linux_sparc
 # include "globals_linux_sparc.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_mips
+# include "globals_linux_mips.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_loongarch
+# include "globals_linux_loongarch.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_zero
 # include "globals_linux_zero.hpp"
 #endif
@@ -116,6 +128,12 @@
 #ifdef TARGET_ARCH_sparc
 # include "c1_globals_sparc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "c1_globals_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "c1_globals_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_arm
 # include "c1_globals_arm.hpp"
 #endif
@@ -148,6 +166,12 @@
 #ifdef TARGET_ARCH_sparc
 # include "c2_globals_sparc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "c2_globals_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "c2_globals_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_arm
 # include "c2_globals_arm.hpp"
 #endif
@@ -3209,7 +3233,7 @@ class CommandLineFlags {
   product(uintx, InitialHeapSize, 0,                                        \
           "Initial heap size (in bytes); zero means use ergonomics")        \
                                                                             \
-  product(uintx, MaxHeapSize, ScaleForWordSize(96*M),                       \
+  product(uintx, MaxHeapSize, ScaleForWordSize(MIPS64_ONLY(1500) NOT_MIPS64(96) *M),                     \
           "Maximum heap size (in bytes)")                                   \
                                                                             \
   product(uintx, OldSize, ScaleForWordSize(4*M),                            \
diff --git a/hotspot/src/share/vm/runtime/icache.hpp b/hotspot/src/share/vm/runtime/icache.hpp
index ba81a06ff59..9c0cfdb7d70 100644
--- a/hotspot/src/share/vm/runtime/icache.hpp
+++ b/hotspot/src/share/vm/runtime/icache.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_ICACHE_HPP
 #define SHARE_VM_RUNTIME_ICACHE_HPP
 
@@ -86,7 +92,12 @@ class AbstractICache : AllStatic {
 #ifdef TARGET_ARCH_ppc
 # include "icache_ppc.hpp"
 #endif
-
+#ifdef TARGET_ARCH_mips
+# include "icache_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "icache_loongarch.hpp"
+#endif
 
 
 class ICacheStubGenerator : public StubCodeGenerator {
diff --git a/hotspot/src/share/vm/runtime/java.cpp b/hotspot/src/share/vm/runtime/java.cpp
index 0a263b017cf..9ba0decaae7 100644
--- a/hotspot/src/share/vm/runtime/java.cpp
+++ b/hotspot/src/share/vm/runtime/java.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/classLoader.hpp"
 #include "classfile/symbolTable.hpp"
@@ -84,6 +90,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "vm_version_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "vm_version_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "vm_version_loongarch.hpp"
+#endif
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
diff --git a/hotspot/src/share/vm/runtime/javaCalls.hpp b/hotspot/src/share/vm/runtime/javaCalls.hpp
index 6126bbe75ef..1747e2b2ee7 100644
--- a/hotspot/src/share/vm/runtime/javaCalls.hpp
+++ b/hotspot/src/share/vm/runtime/javaCalls.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_JAVACALLS_HPP
 #define SHARE_VM_RUNTIME_JAVACALLS_HPP
 
@@ -49,6 +55,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "jniTypes_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "jniTypes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "jniTypes_loongarch.hpp"
+#endif
 
 // A JavaCallWrapper is constructed before each JavaCall and destructed after the call.
 // Its purpose is to allocate/deallocate a new handle block and to save/restore the last
diff --git a/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp b/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp
index 129a01e293f..c2b1b2e6c3b 100644
--- a/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp
+++ b/hotspot/src/share/vm/runtime/javaFrameAnchor.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_JAVAFRAMEANCHOR_HPP
 #define SHARE_VM_RUNTIME_JAVAFRAMEANCHOR_HPP
 
@@ -80,6 +86,12 @@ friend class JavaCallWrapper;
 #ifdef TARGET_ARCH_x86
 # include "javaFrameAnchor_x86.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "javaFrameAnchor_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "javaFrameAnchor_loongarch.hpp"
+#endif
 #ifdef TARGET_ARCH_aarch64
 # include "javaFrameAnchor_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/runtime/os.cpp b/hotspot/src/share/vm/runtime/os.cpp
index 96eed036705..28c78409e7e 100644
--- a/hotspot/src/share/vm/runtime/os.cpp
+++ b/hotspot/src/share/vm/runtime/os.cpp
@@ -1122,7 +1122,8 @@ bool os::is_first_C_frame(frame* fr) {
 
   uintptr_t old_fp = (uintptr_t)fr->link();
   if ((old_fp & fp_align_mask) != 0) return true;
-  if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp) return true;
+  // The check for old_fp and ufp is harmful on MIPS due to its special ABI.
+  if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_MIPS64(|| old_fp == ufp)) return true;
 
   // stack grows downwards; if old_fp is below current fp or if the stack
   // frame is too large, either the stack is corrupted or fp is not saved
diff --git a/hotspot/src/share/vm/runtime/os.hpp b/hotspot/src/share/vm/runtime/os.hpp
index 836c231b03e..0ca6e645982 100644
--- a/hotspot/src/share/vm/runtime/os.hpp
+++ b/hotspot/src/share/vm/runtime/os.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_OS_HPP
 #define SHARE_VM_RUNTIME_OS_HPP
 
@@ -857,6 +863,12 @@ class os: AllStatic {
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "os_linux_x86.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_mips
+# include "os_linux_mips.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_loongarch
+# include "os_linux_loongarch.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_aarch64
 # include "os_linux_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/runtime/prefetch.inline.hpp b/hotspot/src/share/vm/runtime/prefetch.inline.hpp
index f4e30de34d9..fec16f842c9 100644
--- a/hotspot/src/share/vm/runtime/prefetch.inline.hpp
+++ b/hotspot/src/share/vm/runtime/prefetch.inline.hpp
@@ -46,6 +46,12 @@
 #ifdef TARGET_OS_ARCH_linux_ppc
 # include "prefetch_linux_ppc.inline.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_mips
+# include "prefetch_linux_mips.inline.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_loongarch
+# include "prefetch_linux_loongarch.inline.hpp"
+#endif
 
 // Solaris
 #ifdef TARGET_OS_ARCH_solaris_x86
diff --git a/hotspot/src/share/vm/runtime/registerMap.hpp b/hotspot/src/share/vm/runtime/registerMap.hpp
index 67ef212d659..1e26dfcba4e 100644
--- a/hotspot/src/share/vm/runtime/registerMap.hpp
+++ b/hotspot/src/share/vm/runtime/registerMap.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_REGISTERMAP_HPP
 #define SHARE_VM_RUNTIME_REGISTERMAP_HPP
 
@@ -45,6 +51,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "register_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "register_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "register_loongarch.hpp"
+#endif
 
 class JavaThread;
 
@@ -156,6 +168,12 @@ class RegisterMap : public StackObj {
 #ifdef TARGET_ARCH_ppc
 # include "registerMap_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "registerMap_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "registerMap_loongarch.hpp"
+#endif
 
 };
 
diff --git a/hotspot/src/share/vm/runtime/relocator.hpp b/hotspot/src/share/vm/runtime/relocator.hpp
index bb19c75fe65..53f3c9f6bdb 100644
--- a/hotspot/src/share/vm/runtime/relocator.hpp
+++ b/hotspot/src/share/vm/runtime/relocator.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_RELOCATOR_HPP
 #define SHARE_VM_RUNTIME_RELOCATOR_HPP
 
@@ -45,6 +51,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "bytes_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "bytes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "bytes_loongarch.hpp"
+#endif
 
 // This code has been converted from the 1.1E java virtual machine
 // Thanks to the JavaTopics group for using the code
diff --git a/hotspot/src/share/vm/runtime/safepoint.cpp b/hotspot/src/share/vm/runtime/safepoint.cpp
index 440617c8026..be0e4dd13c6 100644
--- a/hotspot/src/share/vm/runtime/safepoint.cpp
+++ b/hotspot/src/share/vm/runtime/safepoint.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/symbolTable.hpp"
 #include "classfile/systemDictionary.hpp"
@@ -78,6 +84,14 @@
 # include "nativeInst_ppc.hpp"
 # include "vmreg_ppc.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "nativeInst_mips.hpp"
+# include "vmreg_mips.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "nativeInst_loongarch.hpp"
+# include "vmreg_loongarch.inline.hpp"
+#endif
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
 #include "gc_implementation/shared/suspendibleThreadSet.hpp"
diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp
index 5f540247f9b..abcd6066b9c 100644
--- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp
+++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -82,6 +88,15 @@
 # include "nativeInst_ppc.hpp"
 # include "vmreg_ppc.inline.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "nativeInst_mips.hpp"
+# include "vmreg_mips.inline.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "nativeInst_loongarch.hpp"
+# include "vmreg_loongarch.inline.hpp"
+#endif
+
 #ifdef COMPILER1
 #include "c1/c1_Runtime1.hpp"
 #endif
@@ -220,7 +235,6 @@ void SharedRuntime::print_ic_miss_histogram() {
   }
 }
 #endif // PRODUCT
-
 #if INCLUDE_ALL_GCS
 
 // G1 write-barrier pre: executed before a pointer store.
diff --git a/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp b/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp
index 37880d8a5c5..3987880b16b 100644
--- a/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp
+++ b/hotspot/src/share/vm/runtime/sharedRuntimeTrig.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020, These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "prims/jni.h"
 #include "runtime/interfaceSupport.hpp"
@@ -534,6 +540,15 @@ static SAFEBUF int __ieee754_rem_pio2(double x, double *y) {
  *         then                   3    2
  *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
  */
+#if defined(MIPS) || defined(LOONGARCH)
+// TODO: LA
+#undef S1
+#undef S2
+#undef S3
+#undef S4
+#undef S5
+#undef S6
+#endif
 
 static const double
 S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
diff --git a/hotspot/src/share/vm/runtime/stackValueCollection.cpp b/hotspot/src/share/vm/runtime/stackValueCollection.cpp
index 87747683118..fe81c1bfd81 100644
--- a/hotspot/src/share/vm/runtime/stackValueCollection.cpp
+++ b/hotspot/src/share/vm/runtime/stackValueCollection.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "runtime/stackValueCollection.hpp"
 #ifdef TARGET_ARCH_x86
@@ -42,6 +48,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "jniTypes_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "jniTypes_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "jniTypes_loongarch.hpp"
+#endif
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
diff --git a/hotspot/src/share/vm/runtime/statSampler.cpp b/hotspot/src/share/vm/runtime/statSampler.cpp
index 41f469622f0..3b430890620 100644
--- a/hotspot/src/share/vm/runtime/statSampler.cpp
+++ b/hotspot/src/share/vm/runtime/statSampler.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020 Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -51,6 +57,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "vm_version_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "vm_version_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "vm_version_loongarch.hpp"
+#endif
 
 // --------------------------------------------------------
 // StatSamplerTask
diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp
index e18b9127df9..9bf933762a4 100644
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_STUBROUTINES_HPP
 #define SHARE_VM_RUNTIME_STUBROUTINES_HPP
 
@@ -49,6 +55,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "nativeInst_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "nativeInst_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "nativeInst_loongarch.hpp"
+#endif
 
 // StubRoutines provides entry points to assembly routines used by
 // compiled code and the run-time system. Platform-specific entry
@@ -116,6 +128,10 @@ class StubRoutines: AllStatic {
 # include "stubRoutines_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "stubRoutines_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "stubRoutines_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "stubRoutines_loongarch_64.hpp"
 #endif
 
   static jint    _verify_oop_count;
diff --git a/hotspot/src/share/vm/runtime/thread.cpp b/hotspot/src/share/vm/runtime/thread.cpp
index e6586c40cbc..3db678ff482 100644
--- a/hotspot/src/share/vm/runtime/thread.cpp
+++ b/hotspot/src/share/vm/runtime/thread.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/classLoader.hpp"
 #include "classfile/javaClasses.hpp"
diff --git a/hotspot/src/share/vm/runtime/thread.hpp b/hotspot/src/share/vm/runtime/thread.hpp
index 1c19ab72909..aa69217eeff 100644
--- a/hotspot/src/share/vm/runtime/thread.hpp
+++ b/hotspot/src/share/vm/runtime/thread.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_THREAD_HPP
 #define SHARE_VM_RUNTIME_THREAD_HPP
 
@@ -1711,6 +1717,12 @@ class JavaThread: public Thread {
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "thread_linux_x86.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_mips
+# include "thread_linux_mips.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_loongarch
+# include "thread_linux_loongarch.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_aarch64
 # include "thread_linux_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/runtime/threadLocalStorage.hpp b/hotspot/src/share/vm/runtime/threadLocalStorage.hpp
index 58c1afc810e..0938b2eddae 100644
--- a/hotspot/src/share/vm/runtime/threadLocalStorage.hpp
+++ b/hotspot/src/share/vm/runtime/threadLocalStorage.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_RUNTIME_THREADLOCALSTORAGE_HPP
 #define SHARE_VM_RUNTIME_THREADLOCALSTORAGE_HPP
 
@@ -51,6 +57,12 @@ class ThreadLocalStorage : AllStatic {
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "threadLS_linux_x86.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_mips
+# include "threadLS_linux_mips.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_loongarch
+# include "threadLS_linux_loongarch.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_aarch64
 # include "threadLS_linux_aarch64.hpp"
 #endif
diff --git a/hotspot/src/share/vm/runtime/virtualspace.cpp b/hotspot/src/share/vm/runtime/virtualspace.cpp
index 66392b75f13..5ced38d8389 100644
--- a/hotspot/src/share/vm/runtime/virtualspace.cpp
+++ b/hotspot/src/share/vm/runtime/virtualspace.cpp
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -147,6 +148,15 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large,
   bool special = large && !os::can_commit_large_page_memory();
   char* base = NULL;
 
+#if defined MIPS && !defined ZERO
+  size_t opt_reg_addr = 5 * os::Linux::page_size();
+  static int code_cache_init_flag = 1;
+  if (UseCodeCacheAllocOpt && code_cache_init_flag && executable) {
+    code_cache_init_flag = 0;
+    requested_address = (char*) opt_reg_addr;
+  }
+#endif
+
   if (requested_address != 0) {
     requested_address -= noaccess_prefix; // adjust requested address
     assert(requested_address != NULL, "huge noaccess prefix?");
@@ -193,6 +203,12 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large,
       if (failed_to_reserve_as_requested(base, requested_address, size, false)) {
         // OS ignored requested address. Try different address.
         base = NULL;
+#if defined MIPS && !defined ZERO
+        if (UseCodeCacheAllocOpt && requested_address == (char*) opt_reg_addr) {
+          requested_address = NULL;
+          base = os::reserve_memory(size, NULL, alignment);
+        }
+#endif
       }
     } else {
       base = os::reserve_memory(size, NULL, alignment);
diff --git a/hotspot/src/share/vm/runtime/vmStructs.cpp b/hotspot/src/share/vm/runtime/vmStructs.cpp
index 32e3921b2b5..c6cc4c4329f 100644
--- a/hotspot/src/share/vm/runtime/vmStructs.cpp
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "classfile/dictionary.hpp"
 #include "classfile/javaClasses.hpp"
@@ -122,6 +128,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "vmStructs_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "vmStructs_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "vmStructs_loongarch.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_linux_x86
 # include "vmStructs_linux_x86.hpp"
 #endif
@@ -149,6 +161,12 @@
 #ifdef TARGET_OS_ARCH_linux_ppc
 # include "vmStructs_linux_ppc.hpp"
 #endif
+#ifdef TARGET_OS_ARCH_linux_mips
+# include "vmStructs_linux_mips.hpp"
+#endif
+#ifdef TARGET_OS_ARCH_linux_loongarch
+# include "vmStructs_linux_loongarch.hpp"
+#endif
 #ifdef TARGET_OS_ARCH_aix_ppc
 # include "vmStructs_aix_ppc.hpp"
 #endif
@@ -208,6 +226,10 @@
 # include "adfiles/adGlobals_zero.hpp"
 #elif defined TARGET_ARCH_MODEL_ppc_64
 # include "adfiles/adGlobals_ppc_64.hpp"
+#elif defined TARGET_ARCH_MODEL_mips_64
+# include "adfiles/adGlobals_mips_64.hpp"
+#elif defined TARGET_ARCH_MODEL_loongarch_64
+# include "adfiles/adGlobals_loongarch_64.hpp"
 #endif
 #endif // COMPILER2
 
diff --git a/hotspot/src/share/vm/runtime/vm_version.cpp b/hotspot/src/share/vm/runtime/vm_version.cpp
index 91f9c70f5a3..d8dcfcfccad 100644
--- a/hotspot/src/share/vm/runtime/vm_version.cpp
+++ b/hotspot/src/share/vm/runtime/vm_version.cpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #include "precompiled.hpp"
 #include "memory/universe.hpp"
 #include "oops/oop.inline.hpp"
@@ -44,6 +50,12 @@
 #ifdef TARGET_ARCH_ppc
 # include "vm_version_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "vm_version_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "vm_version_loongarch.hpp"
+#endif
 
 const char* Abstract_VM_Version::_s_vm_release = Abstract_VM_Version::vm_release();
 const char* Abstract_VM_Version::_s_internal_vm_info_string = Abstract_VM_Version::internal_vm_info_string();
@@ -193,6 +205,14 @@ const char* Abstract_VM_Version::jre_release_version() {
 #else
 #define CPU      "ppc64"
 #endif
+#elif defined(MIPS64)
+#if defined(VM_LITTLE_ENDIAN)
+#define CPU      "mips64el"
+#else
+#define CPU      "mips64"
+#endif
+#elif defined(LOONGARCH64)
+#define CPU      "loongarch64"
 #else
 #define CPU      IA32_ONLY("x86")                \
                  IA64_ONLY("ia64")               \
diff --git a/hotspot/src/share/vm/utilities/copy.hpp b/hotspot/src/share/vm/utilities/copy.hpp
index c1d82c70838..73b858b86e0 100644
--- a/hotspot/src/share/vm/utilities/copy.hpp
+++ b/hotspot/src/share/vm/utilities/copy.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2023. These
+ * modifications are Copyright (c) 2015, 2023, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_UTILITIES_COPY_HPP
 #define SHARE_VM_UTILITIES_COPY_HPP
 
@@ -350,6 +356,13 @@ class Copy : AllStatic {
 #ifdef TARGET_ARCH_ppc
 # include "copy_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "copy_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "copy_loongarch.hpp"
+#endif
+
 
 };
 
diff --git a/hotspot/src/share/vm/utilities/debug.cpp b/hotspot/src/share/vm/utilities/debug.cpp
index 58a32a2b834..1026585f84c 100644
--- a/hotspot/src/share/vm/utilities/debug.cpp
+++ b/hotspot/src/share/vm/utilities/debug.cpp
@@ -690,6 +690,7 @@ void help() {
   tty->print_cr("                   pns($sp, $ebp, $pc) on Linux/x86 or");
   tty->print_cr("                   pns($sp, $fp, $pc)  on Linux/AArch64 or");
   tty->print_cr("                   pns($sp, 0, $pc)    on Linux/ppc64 or");
+  tty->print_cr("                   pns($sp, $s8, $pc)  on Linux/mips or");
   tty->print_cr("                   pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC");
   tty->print_cr("                 - in gdb do 'set overload-resolution off' before calling pns()");
   tty->print_cr("                 - in dbx do 'frame 1' before calling pns()");
diff --git a/hotspot/src/share/vm/utilities/globalDefinitions.hpp b/hotspot/src/share/vm/utilities/globalDefinitions.hpp
index 81866b84099..61fc0c48a24 100644
--- a/hotspot/src/share/vm/utilities/globalDefinitions.hpp
+++ b/hotspot/src/share/vm/utilities/globalDefinitions.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_UTILITIES_GLOBALDEFINITIONS_HPP
 #define SHARE_VM_UTILITIES_GLOBALDEFINITIONS_HPP
 
@@ -455,6 +461,12 @@ enum RTMState {
 #ifdef TARGET_ARCH_ppc
 # include "globalDefinitions_ppc.hpp"
 #endif
+#ifdef TARGET_ARCH_mips
+# include "globalDefinitions_mips.hpp"
+#endif
+#ifdef TARGET_ARCH_loongarch
+# include "globalDefinitions_loongarch.hpp"
+#endif
 
 /*
  * If a platform does not support native stack walking
diff --git a/hotspot/src/share/vm/utilities/macros.hpp b/hotspot/src/share/vm/utilities/macros.hpp
index 599e1074de5..41ef06e27fe 100644
--- a/hotspot/src/share/vm/utilities/macros.hpp
+++ b/hotspot/src/share/vm/utilities/macros.hpp
@@ -22,6 +22,12 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2020. These
+ * modifications are Copyright (c) 2015, 2020, Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
 #ifndef SHARE_VM_UTILITIES_MACROS_HPP
 #define SHARE_VM_UTILITIES_MACROS_HPP
 
@@ -373,6 +379,30 @@
 #define NOT_SPARC(code) code
 #endif
 
+#ifdef MIPS64
+#ifndef MIPS
+#define MIPS
+#endif
+#define MIPS64_ONLY(code) code
+#define NOT_MIPS64(code)
+#else
+#undef MIPS
+#define MIPS64_ONLY(code)
+#define NOT_MIPS64(code) code
+#endif
+
+#ifdef LOONGARCH64
+#ifndef LOONGARCH
+#define LOONGARCH
+#endif
+#define LOONGARCH64_ONLY(code) code
+#define NOT_LOONGARCH64(code)
+#else
+#undef LOONGARCH
+#define LOONGARCH64_ONLY(code)
+#define NOT_LOONGARCH64(code) code
+#endif
+
 #if defined(PPC32) || defined(PPC64)
 #ifndef PPC
 #define PPC
diff --git a/hotspot/src/share/vm/utilities/taskqueue.hpp b/hotspot/src/share/vm/utilities/taskqueue.hpp
index bc06caccb46..46be35a3256 100644
--- a/hotspot/src/share/vm/utilities/taskqueue.hpp
+++ b/hotspot/src/share/vm/utilities/taskqueue.hpp
@@ -121,11 +121,22 @@ class TaskQueueSuper: public CHeapObj<F> {
     Age(const Age& age)          { _data = age._data; }
     Age(idx_t top, idx_t tag)    { _fields._top = top; _fields._tag = tag; }
 
+#if !defined MIPS && !defined LOONGARCH
     Age   get()        const volatile { return _data; }
     void  set(Age age) volatile       { _data = age._data; }
 
     idx_t top()        const volatile { return _fields._top; }
     idx_t tag()        const volatile { return _fields._tag; }
+#else
+    Age   get()        const volatile {
+      size_t res = OrderAccess::load_ptr_acquire((volatile intptr_t*) &_data);
+      return *(Age*)(&res);
+    }
+    void  set(Age age) volatile       { OrderAccess::release_store_ptr((volatile intptr_t*) &_data, *(size_t*)(&age._data)); }
+
+    idx_t top()        const volatile { return OrderAccess::load_acquire((volatile idx_t*) &(_fields._top)); }
+    idx_t tag()        const volatile { return OrderAccess::load_acquire((volatile idx_t*) &(_fields._tag)); }
+#endif
 
     // Increment top; if it wraps, increment tag also.
     void increment() {
@@ -195,23 +206,50 @@ class TaskQueueSuper: public CHeapObj<F> {
 public:
   TaskQueueSuper() : _bottom(0), _age() {}
 
+#if defined MIPS || defined LOONGARCH
+  inline uint get_bottom() const {
+    return OrderAccess::load_acquire((volatile juint*)&_bottom);
+  }
+
+  inline void set_bottom(uint new_bottom) {
+    OrderAccess::release_store(&_bottom, new_bottom);
+  }
+#endif
   // Return true if the TaskQueue contains/does not contain any tasks.
-  bool peek()     const { return _bottom != _age.top(); }
+  bool peek()     const {
+#if defined MIPS || defined LOONGARCH
+    return get_bottom() != _age.top();
+#else
+    return _bottom != _age.top();
+#endif
+  }
   bool is_empty() const { return size() == 0; }
 
   // Return an estimate of the number of elements in the queue.
   // The "careful" version admits the possibility of pop_local/pop_global
   // races.
   uint size() const {
+#if defined MIPS || defined LOONGARCH
+    return size(get_bottom(), _age.top());
+#else
     return size(_bottom, _age.top());
+#endif
   }
 
   uint dirty_size() const {
+#if defined MIPS || defined LOONGARCH
+    return dirty_size(get_bottom(), _age.top());
+#else
     return dirty_size(_bottom, _age.top());
+#endif
   }
 
   void set_empty() {
+#if defined MIPS || defined LOONGARCH
+    set_bottom(0);
+#else
     _bottom = 0;
+#endif
     _age.set(0);
   }
 
@@ -263,7 +301,9 @@ class GenericTaskQueue: public TaskQueueSuper<N, F> {
   typedef typename TaskQueueSuper<N, F>::Age Age;
   typedef typename TaskQueueSuper<N, F>::idx_t idx_t;
 
+#if !defined MIPS && !defined LOONGARCH
   using TaskQueueSuper<N, F>::_bottom;
+#endif
   using TaskQueueSuper<N, F>::_age;
   using TaskQueueSuper<N, F>::increment_index;
   using TaskQueueSuper<N, F>::decrement_index;
@@ -327,7 +367,11 @@ template<class E, MEMFLAGS F, unsigned int N>
 void GenericTaskQueue<E, F, N>::oops_do(OopClosure* f) {
   // tty->print_cr("START OopTaskQueue::oops_do");
   uint iters = size();
+#if defined MIPS || defined LOONGARCH
+  uint index = this->get_bottom();
+#else
   uint index = _bottom;
+#endif
   for (uint i = 0; i < iters; ++i) {
     index = decrement_index(index);
     // tty->print_cr("  doing entry %d," INTPTR_T " -> " INTPTR_T,
@@ -345,14 +389,22 @@ template<class E, MEMFLAGS F, unsigned int N>
 bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) {
   if (dirty_n_elems == N - 1) {
     // Actually means 0, so do the push.
+#if defined MIPS || defined LOONGARCH
+    uint localBot = this->get_bottom();
+#else
     uint localBot = _bottom;
+#endif
     // g++ complains if the volatile result of the assignment is
     // unused, so we cast the volatile away.  We cannot cast directly
     // to void, because gcc treats that as not using the result of the
     // assignment.  However, casting to E& means that we trigger an
     // unused-value warning.  So, we cast the E& to void.
     (void)const_cast<E&>(_elems[localBot] = t);
+#if defined MIPS || defined LOONGARCH
+    this->set_bottom(increment_index(localBot));
+#else
     OrderAccess::release_store(&_bottom, increment_index(localBot));
+#endif
     TASKQUEUE_STATS_ONLY(stats.record_push());
     return true;
   }
@@ -407,7 +459,11 @@ bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) {
 #if !(defined SPARC || defined IA32 || defined AMD64)
   OrderAccess::fence();
 #endif
+#if defined MIPS || defined LOONGARCH
+  uint localBot = this->get_bottom();
+#else
   uint localBot = OrderAccess::load_acquire((volatile juint*)&_bottom);
+#endif
   uint n_elems = size(localBot, oldAge.top());
   if (n_elems == 0) {
     return false;
@@ -662,7 +718,11 @@ class ParallelTaskTerminator: public StackObj {
 
 template<class E, MEMFLAGS F, unsigned int N> inline bool
 GenericTaskQueue<E, F, N>::push(E t) {
+#if defined MIPS || defined LOONGARCH
+  uint localBot = this->get_bottom();
+#else
   uint localBot = _bottom;
+#endif
   assert(localBot < N, "_bottom out of range.");
   idx_t top = _age.top();
   uint dirty_n_elems = dirty_size(localBot, top);
@@ -674,7 +734,11 @@ GenericTaskQueue<E, F, N>::push(E t) {
     // assignment.  However, casting to E& means that we trigger an
     // unused-value warning.  So, we cast the E& to void.
     (void) const_cast<E&>(_elems[localBot] = t);
+#if defined MIPS || defined LOONGARCH
+    this->set_bottom(increment_index(localBot));
+#else
     OrderAccess::release_store(&_bottom, increment_index(localBot));
+#endif
     TASKQUEUE_STATS_ONLY(stats.record_push());
     return true;
   } else {
@@ -684,7 +748,11 @@ GenericTaskQueue<E, F, N>::push(E t) {
 
 template<class E, MEMFLAGS F, unsigned int N> inline bool
 GenericTaskQueue<E, F, N>::pop_local(volatile E& t) {
+#if defined MIPS || defined LOONGARCH
+  uint localBot = this->get_bottom();
+#else
   uint localBot = _bottom;
+#endif
   // This value cannot be N-1.  That can only occur as a result of
   // the assignment to bottom in this method.  If it does, this method
   // resets the size to 0 before the next call (which is sequential,
@@ -693,7 +761,11 @@ GenericTaskQueue<E, F, N>::pop_local(volatile E& t) {
   assert(dirty_n_elems != N - 1, "Shouldn't be possible...");
   if (dirty_n_elems == 0) return false;
   localBot = decrement_index(localBot);
+#if defined MIPS || defined LOONGARCH
+  this->set_bottom(localBot);
+#else
   _bottom = localBot;
+#endif
   // This is necessary to prevent any read below from being reordered
   // before the store just above.
   OrderAccess::fence();
diff --git a/hotspot/src/share/vm/utilities/vmError.cpp b/hotspot/src/share/vm/utilities/vmError.cpp
index fa7a32508e2..7098a98a9f7 100644
--- a/hotspot/src/share/vm/utilities/vmError.cpp
+++ b/hotspot/src/share/vm/utilities/vmError.cpp
@@ -22,6 +22,13 @@
  *
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2018. These
+ * modifications are Copyright (c) 2018 Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ *
+*/
+
 #include <fcntl.h>
 #include "precompiled.hpp"
 #include "compiler/compileBroker.hpp"
@@ -488,7 +495,12 @@ void VMError::report(outputStream* st) {
                                   JDK_Version::runtime_name() : "";
      const char* runtime_version = JDK_Version::runtime_version() != NULL ?
                                   JDK_Version::runtime_version() : "";
-     st->print_cr("# JRE version: %s (%s) (build %s)", runtime_name, buf, runtime_version);
+#ifdef LOONGSON_RUNTIME_NAME
+     const char* loongson_runtime_name_and_version = LOONGSON_RUNTIME_NAME;
+#else
+     const char* loongson_runtime_name_and_version = "";
+#endif
+     st->print_cr("# JRE version: %s (%s) (build %s) (%s)", runtime_name, buf, runtime_version, loongson_runtime_name_and_version);
      st->print_cr("# Java VM: %s (%s %s %s %s)",
                    Abstract_VM_Version::vm_name(),
                    Abstract_VM_Version::vm_release(),
diff --git a/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh b/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh
index fcf1d04b6aa..5b8e7dcce58 100644
--- a/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh
+++ b/hotspot/test/compiler/criticalnatives/argumentcorruption/Test8167409.sh
@@ -24,6 +24,12 @@
 #  questions.
 #
 
+#
+# This file has been modified by Loongson Technology in 2023. These
+# modifications are Copyright (c) 2023, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 ## @test Test8167409.sh
 ## @bug 8167409
 ## @summary Invalid value passed to critical JNI function
@@ -68,6 +74,18 @@ if [ $VM_CPU = "aarch64" ]; then
     exit 0;
 fi
 
+# CriticalJNINatives is not supported for loongarch64
+if [ $VM_CPU = "loongarch64" ]; then
+     echo "Test Passed"
+     exit 0;
+fi
+
+# CriticalJNINatives is not supported for mips64
+if [ $VM_CPU = "mips64" -o $VM_CPU = "mips64el" ]; then
+     echo "Test Passed"
+     exit 0;
+fi
+
 THIS_DIR=.
 
 cp ${TESTSRC}${FS}*.java ${THIS_DIR}
diff --git a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
index fa9a6f208b3..885957cf1c2 100644
--- a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
+++ b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForOtherCPU.java
@@ -34,11 +34,12 @@
 public class GenericTestCaseForOtherCPU extends
         SHAOptionsBase.TestCase {
     public GenericTestCaseForOtherCPU(String optionName) {
-        // Execute the test case on any CPU except SPARC and X86
+        // Execute the test case on any CPU except SPARC, LoongArch64 and X86
         super(optionName, new NotPredicate(new OrPredicate(Platform::isSparc,
                 new OrPredicate(Platform::isAArch64,
                 new OrPredicate(Platform::isPPC,
-                new OrPredicate(Platform::isX64, Platform::isX86))))));
+                new OrPredicate(Platform::isLoongArch64,
+                new OrPredicate(Platform::isX64, Platform::isX86)))))));
     }
 
     @Override
diff --git a/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java b/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
index dc8c3984081..2427b2bf7b9 100644
--- a/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
+++ b/hotspot/test/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java
@@ -62,18 +62,24 @@ public class IntrinsicPredicates {
             = new OrPredicate(
                     new CPUSpecificPredicate("sparc.*", new String[] { "sha1" },
                             null),
+              // Basic instructions are used to implement SHA1 Intrinsics on LA, so "sha1" feature is not needed.
+              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null,
+                            null),
                     new CPUSpecificPredicate("aarch64", new String[] { "sha1" },
-                            null));
+                            null)));
 
     public static final BooleanSupplier SHA256_INSTRUCTION_AVAILABLE
             = new OrPredicate(new CPUSpecificPredicate("aarch64", new String[] { "sha256" },
                                                        null),
               new OrPredicate(new CPUSpecificPredicate("sparc.*",   new String[] { "sha256" },
                                                        null),
+              // Basic instructions are used to implement SHA256 Intrinsics on LA, so "sha256" feature is not needed.
+              new OrPredicate(new CPUSpecificPredicate("loongarch64.*", null,
+                                                       null),
               new OrPredicate(new CPUSpecificPredicate("ppc64.*",   new String[] { "sha"    },
                                                        null),
                               new CPUSpecificPredicate("ppc64le.*", new String[] { "sha"    },
-                                                       null))));
+                                                       null)))));
 
     public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
             = new OrPredicate(
diff --git a/hotspot/test/runtime/6929067/Test6929067.sh b/hotspot/test/runtime/6929067/Test6929067.sh
index 2bbb3401ce5..1a5482e6451 100644
--- a/hotspot/test/runtime/6929067/Test6929067.sh
+++ b/hotspot/test/runtime/6929067/Test6929067.sh
@@ -97,6 +97,10 @@ case "$ARCH" in
   i686)
     ARCH=i386
     ;;
+  loongarch64)
+    COMP_FLAG=""
+    ARCH=loongarch64
+    ;;
   # Assuming other ARCH values need no translation
 esac
 
diff --git a/hotspot/test/runtime/Unsafe/RangeCheck.java b/hotspot/test/runtime/Unsafe/RangeCheck.java
index 9ded944cb25..4d4ea2e048a 100644
--- a/hotspot/test/runtime/Unsafe/RangeCheck.java
+++ b/hotspot/test/runtime/Unsafe/RangeCheck.java
@@ -43,6 +43,7 @@ public static void main(String args[]) throws Exception {
                 true,
                 "-Xmx32m",
                 "-XX:-TransmitErrorReport",
+                "-XX:-InlineUnsafeOps", // The compiler intrinsics doesn't have the assert
                 DummyClassWithMainRangeCheck.class.getName());
 
         OutputAnalyzer output = new OutputAnalyzer(pb.start());
diff --git a/hotspot/test/test_env.sh b/hotspot/test/test_env.sh
index 5ba4f28c455..d9d8bb6b6b6 100644
--- a/hotspot/test/test_env.sh
+++ b/hotspot/test/test_env.sh
@@ -211,6 +211,29 @@ if [ $? = 0 ]
 then
   VM_CPU="aarch64"
 fi
+grep "mips" vm_version.out > ${NULL}
+if [ $? = 0 ]
+then
+  VM_CPU="mips"
+  if [ $VM_BITS = "64" ]
+  then
+    VM_CPU="mips64"
+    grep "mips64el" vm_version.out > ${NULL}
+    if [ $? = 0 ]
+    then
+      VM_CPU="mips64el"
+    fi
+  fi
+fi
+grep "loongarch" vm_version.out > ${NULL}
+if [ $? = 0 ]
+then
+  VM_CPU="loongarch"
+  if [ $VM_BITS = "64" ]
+  then
+    VM_CPU="loongarch64"
+  fi
+fi
 export VM_TYPE VM_BITS VM_OS VM_CPU
 echo "VM_TYPE=${VM_TYPE}"
 echo "VM_BITS=${VM_BITS}"
diff --git a/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java b/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java
index 6a14079347f..56a6375b5f4 100644
--- a/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java
+++ b/hotspot/test/testlibrary/com/oracle/java/testlibrary/Platform.java
@@ -126,6 +126,10 @@ public static boolean isAArch64() {
         return isArch("aarch64");
     }
 
+    public static boolean isLoongArch64() {
+        return isArch("loongarch64");
+    }
+
     private static boolean isArch(String archnameRE) {
         return Pattern.compile(archnameRE, Pattern.CASE_INSENSITIVE)
             .matcher(osArch)
@@ -136,6 +140,10 @@ public static String getOsArch() {
         return osArch;
     }
 
+    public static boolean isMIPS() {
+        return isArch("mips.*");
+    }
+
     /**
      * Return a boolean for whether we expect to be able to attach
      * the SA to our own processes on this system.
diff --git a/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
index 7d56a4a3bc1..41825e18b35 100644
--- a/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+++ b/hotspot/test/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
@@ -43,7 +43,7 @@
  */
 public class TestMutuallyExclusivePlatformPredicates {
     private static enum MethodGroup {
-        ARCH("isARM", "isPPC", "isSparc", "isX86", "isX64", "isAArch64"),
+        ARCH("isARM", "isPPC", "isSparc", "isX86", "isX64", "isAArch64", "isMIPS", "isLoongArch64"),
         BITNESS("is32bit", "is64bit"),
         OS("isAix", "isLinux", "isSolaris", "isWindows", "isOSX"),
         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal"),
diff --git a/jdk/make/Images.gmk b/jdk/make/Images.gmk
index 991c0af7b4e..91716856553 100644
--- a/jdk/make/Images.gmk
+++ b/jdk/make/Images.gmk
@@ -23,6 +23,12 @@
 # questions.
 #
 
+#
+# This file has been modified by Loongson Technology in 2022. These
+# modifications are Copyright (c) 2018, 2022, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 include $(SPEC)
 include MakeBase.gmk
 include JavaCompilation.gmk
@@ -650,6 +656,11 @@ $(JDK_IMAGE_DIR)/src.zip: $(IMAGES_OUTPUTDIR)/src.zip
 	$(ECHO) $(LOG_INFO) Copying $(patsubst $(OUTPUT_ROOT)/%,%,$@)
 	$(install-file)
 
+# create link "mips64el -> mips64" for deploy
+$(JDK_IMAGE_DIR)/jre/lib/mips64: $(JDK_IMAGE_DIR)/jre/lib/mips64el
+	$(ECHO) $(LOG_INFO) Create link from mips64 to mips64
+	$(CD) $(JDK_IMAGE_DIR)/jre/lib && $(RM) mips64 && $(LN) -s mips64el mips64
+
 ################################################################################
 # Post processing (strip etc)
 
@@ -728,6 +739,14 @@ ifneq ($(POST_STRIP_CMD), )
 
 endif
 
+################################################################################
+# Loongson added list, architecture dependent files
+ifeq ($(OPENJDK_TARGET_CPU), mips64)
+  ifeq ($(OPENJDK_TARGET_CPU_ENDIAN), little)
+    JDK_IMAGE_LOONGSON_LIST := $(JDK_IMAGE_DIR)/jre/lib/mips64el
+  endif
+endif
+
 ################################################################################
 
 # Include the custom makefile right here, after all variables have been defined
@@ -753,6 +772,7 @@ jdk-image: $(JDK_BIN_TARGETS) $(JDKJRE_BIN_TARGETS) \
     $(JDKJRE_DOC_TARGETS) $(JDK_DOC_TARGETS) \
     $(JDK_INFO_FILE) $(JDKJRE_STRIP_LIST) $(JDK_BIN_STRIP_LIST) \
     $(JDK_IMAGE_DIR)/src.zip \
+    $(JDK_IMAGE_LOONGSON_LIST) \
     $(JDK_BIN_ISADIR_LINK_TARGETS) $(JDKJRE_BIN_ISADIR_LINK_TARGETS)
 
 jre-overlay-image: $(JRE_OVERLAY_BIN_TARGETS) $(JRE_OVERLAY_LIB_TARGETS) \
diff --git a/jdk/make/gensrc/GensrcMisc.gmk b/jdk/make/gensrc/GensrcMisc.gmk
index 78ec501956a..0804888f3eb 100644
--- a/jdk/make/gensrc/GensrcMisc.gmk
+++ b/jdk/make/gensrc/GensrcMisc.gmk
@@ -23,6 +23,12 @@
 # questions.
 #
 
+#
+# This file has been modified by Loongson Technology in 2018. These
+# modifications are Copyright (c) 2018, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 include ProfileNames.gmk
 
 ################################################################################
@@ -39,6 +45,7 @@ $(PROFILE_VERSION_JAVA_TARGETS): \
 	$(SED) -e 's/@@launcher_name@@/$(LAUNCHER_NAME)/g' \
 	    -e 's/@@java_version@@/$(RELEASE)/g' \
 	    -e 's/@@java_runtime_version@@/$(FULL_VERSION)/g' \
+	    -e 's/@@loongson_runtime_name@@/$(LOONGSON_RUNTIME_NAME)/g' \
 	    -e 's/@@java_runtime_name@@/$(RUNTIME_NAME)/g' \
 	    -e 's/@@java_profile_name@@/$(call profile_version_name, $@)/g' \
             -e 's/@@java_distro_name@@/$(DISTRO_NAME)/g' \
diff --git a/jdk/make/gensrc/GensrcMisc.gmk.orig b/jdk/make/gensrc/GensrcMisc.gmk.orig
new file mode 100644
index 00000000000..78ec501956a
--- /dev/null
+++ b/jdk/make/gensrc/GensrcMisc.gmk.orig
@@ -0,0 +1,172 @@
+#
+# Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.  Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+include ProfileNames.gmk
+
+################################################################################
+# Install the launcher name, release version string, full version
+# string and the runtime name into the Version.java file.
+# To be printed by java -version
+
+$(JDK_OUTPUTDIR)/gensrc/sun/misc/Version.java \
+$(PROFILE_VERSION_JAVA_TARGETS): \
+    $(JDK_TOPDIR)/src/share/classes/sun/misc/Version.java.template
+	$(MKDIR) -p $(@D)
+	$(RM) $@ $@.tmp
+	$(ECHO) Generating sun/misc/Version.java $(call profile_version_name, $@)
+	$(SED) -e 's/@@launcher_name@@/$(LAUNCHER_NAME)/g' \
+	    -e 's/@@java_version@@/$(RELEASE)/g' \
+	    -e 's/@@java_runtime_version@@/$(FULL_VERSION)/g' \
+	    -e 's/@@java_runtime_name@@/$(RUNTIME_NAME)/g' \
+	    -e 's/@@java_profile_name@@/$(call profile_version_name, $@)/g' \
+            -e 's/@@java_distro_name@@/$(DISTRO_NAME)/g' \
+            -e 's/@@java_distro_version@@/$(DISTRO_VERSION)/g' \
+	    $< > $@.tmp
+	$(MV) $@.tmp $@
+
+GENSRC_MISC += $(JDK_OUTPUTDIR)/gensrc/sun/misc/Version.java \
+    $(PROFILE_VERSION_JAVA_TARGETS)
+
+##########################################################################################
+# Version file for jconsole
+
+$(JDK_OUTPUTDIR)/gensrc/sun/tools/jconsole/Version.java: \
+    $(JDK_TOPDIR)/src/share/classes/sun/tools/jconsole/Version.java.template
+	$(MKDIR) -p $(@D)
+	$(RM) $@ $@.tmp
+	$(ECHO) $(LOG_INFO) Generating sun/tools/jconsole/Version.java
+	$(SED) -e 's/@@jconsole_version@@/$(FULL_VERSION)/g' $< > $@.tmp
+	$(MV) $@.tmp $@
+
+GENSRC_MISC += $(JDK_OUTPUTDIR)/gensrc/sun/tools/jconsole/Version.java
+
+################################################################################
+
+ifneq ($(filter $(TOOLCHAIN_TYPE), gcc clang), )
+  # Need to specify language since the template file has a non standard
+  # extension.
+  CPP_FLAGS += -x c
+else ifeq ($(TOOLCHAIN_TYPE), microsoft)
+  CPP_FLAGS += -nologo
+endif
+
+# Generate a java source file from a template through the C preprocessor for the
+# target system. First extract the copyright notice at the start of the file.
+# Run the preprocessor. Filter out the default compiler stderr output on
+# Windows. Filter out all the header files output. Remove all "PREFIX_" strings
+# that were added to variable references in the template files to avoid being
+# matched by the preprocessor. Remove any #line directives left by the
+# preprocessor.
+define generate-preproc-src
+	$(eval $(call MakeDir, $(@D)))
+	( $(NAWK) '/@@END_COPYRIGHT@@/{exit}1' $< && \
+	  $(CPP) $(CPP_FLAGS) $(SYSROOT_CFLAGS) $< \
+	      | $(GREP) -v '^$(<F)$$'  \
+	      | $(NAWK) '/@@START_HERE@@/,0' \
+	      | $(SED) -e 's/@@START_HERE@@/\/\/ AUTOMATICALLY GENERATED FILE - DO NOT EDIT/' \
+	      -e 's/PREFIX_//' -e 's/^#.*//' \
+	) > $@
+endef
+
+GENSRC_SOR_FILE += $(JDK_OUTPUTDIR)/gensrc/sun/nio/ch/SocketOptionRegistry.java
+
+$(GENSRC_SOR_FILE): \
+    $(JDK_TOPDIR)/src/share/classes/sun/nio/ch/SocketOptionRegistry.java.template
+	$(generate-preproc-src)
+
+GENSRC_MISC += $(GENSRC_SOR_FILE)
+
+################################################################################
+
+ifneq ($(OPENJDK_TARGET_OS), windows)
+
+  GENSRC_UC_FILE := $(JDK_OUTPUTDIR)/gensrc/sun/nio/fs/UnixConstants.java
+  $(GENSRC_UC_FILE): \
+      $(JDK_TOPDIR)/src/unix/classes/sun/nio/fs/UnixConstants.java.template
+	$(generate-preproc-src)
+
+  GENSRC_MISC += $(GENSRC_UC_FILE)
+
+endif
+
+##########################################################################################
+
+ifeq ($(OPENJDK_TARGET_OS), solaris)
+
+  GENSRC_SC_FILE := $(JDK_OUTPUTDIR)/gensrc/sun/nio/fs/SolarisConstants.java
+
+  $(GENSRC_SC_FILE): \
+      $(JDK_TOPDIR)/src/solaris/classes/sun/nio/fs/SolarisConstants.java.template
+	$(generate-preproc-src)
+
+  GENSRC_MISC += $(GENSRC_SC_FILE)
+
+endif
+
+##########################################################################################
+
+  ifeq ($(OPENJDK_TARGET_OS), windows)
+
+    AB_GENSRC_DIR := $(JDK_OUTPUTDIR)/gensrc_ab
+  AB_SRC_DIR := $(JDK_TOPDIR)/src/windows/classes/com/sun/java/accessibility
+
+    ifeq ($(OPENJDK_TARGET_CPU_BITS), 32)
+      $(AB_GENSRC_DIR)/32bit/com/sun/java/accessibility/AccessBridgeLoader.java: \
+          $(AB_SRC_DIR)/32bit/AccessBridgeLoader.java
+		$(install-file)
+
+      $(AB_GENSRC_DIR)/32bit/com/sun/java/accessibility/AccessBridge.java: \
+          $(AB_SRC_DIR)/AccessBridge.java
+		$(install-file)
+
+      $(AB_GENSRC_DIR)/legacy/com/sun/java/accessibility/AccessBridgeLoader.java: \
+          $(AB_SRC_DIR)/legacy/AccessBridgeLoader.java
+		$(install-file)
+
+      $(AB_GENSRC_DIR)/legacy/com/sun/java/accessibility/AccessBridge.java: \
+          $(AB_SRC_DIR)/AccessBridge.java
+		$(install-file)
+
+      GENSRC_MISC += $(AB_GENSRC_DIR)/32bit/com/sun/java/accessibility/AccessBridgeLoader.java \
+          $(AB_GENSRC_DIR)/legacy/com/sun/java/accessibility/AccessBridgeLoader.java \
+          $(AB_GENSRC_DIR)/32bit/com/sun/java/accessibility/AccessBridge.java \
+          $(AB_GENSRC_DIR)/legacy/com/sun/java/accessibility/AccessBridge.java
+
+    else
+      $(AB_GENSRC_DIR)/64bit/com/sun/java/accessibility/AccessBridgeLoader.java: \
+          $(AB_SRC_DIR)/64bit/AccessBridgeLoader.java
+		$(install-file)
+
+      $(AB_GENSRC_DIR)/64bit/com/sun/java/accessibility/AccessBridge.java: \
+          $(AB_SRC_DIR)/AccessBridge.java
+		$(install-file)
+
+      GENSRC_MISC += $(AB_GENSRC_DIR)/64bit/com/sun/java/accessibility/AccessBridgeLoader.java \
+          $(AB_GENSRC_DIR)/64bit/com/sun/java/accessibility/AccessBridge.java
+
+    endif
+  endif
+
+##########################################################################################
diff --git a/jdk/make/lib/SoundLibraries.gmk b/jdk/make/lib/SoundLibraries.gmk
index b59a9462ec5..8ce97dc8544 100644
--- a/jdk/make/lib/SoundLibraries.gmk
+++ b/jdk/make/lib/SoundLibraries.gmk
@@ -23,6 +23,12 @@
 # questions.
 #
 
+#
+# This file has been modified by Loongson Technology in 2021. These
+# modifications are Copyright (c) 2015, 2021, Loongson Technology, and are made
+# available on the same license terms set forth above.
+#
+
 LIBJSOUND_SRC_DIRS := \
     $(JDK_TOPDIR)/src/share/native/com/sun/media/sound \
     $(JDK_TOPDIR)/src/$(OPENJDK_TARGET_OS_API_DIR)/native/com/sun/media/sound
@@ -136,6 +142,14 @@ else
     LIBJSOUND_CFLAGS += -DX_ARCH=X_PPC
   endif
 
+  ifeq ($(OPENJDK_TARGET_CPU), mips64)
+    LIBJSOUND_CFLAGS += -DX_ARCH=X_MIPS64
+  endif
+
+  ifeq ($(OPENJDK_TARGET_CPU), loongarch64)
+    LIBJSOUND_CFLAGS += -DX_ARCH=X_LOONGARCH64
+  endif
+
   ifeq ($(OPENJDK_TARGET_CPU), ppc64)
        LIBJSOUND_CFLAGS += -DX_ARCH=X_PPC64
   endif
diff --git a/jdk/src/share/classes/sun/misc/Version.java.template b/jdk/src/share/classes/sun/misc/Version.java.template
index 9c65c022bc1..dd660bb7560 100644
--- a/jdk/src/share/classes/sun/misc/Version.java.template
+++ b/jdk/src/share/classes/sun/misc/Version.java.template
@@ -23,6 +23,13 @@
  * questions.
  */
 
+/*
+ * This file has been modified by Loongson Technology in 2018. These
+ * modifications are Copyright (c) 2018 Loongson Technology, and are made
+ * available on the same license terms set forth above.
+ */
+
+
 package sun.misc;
 import java.io.PrintStream;
 
@@ -50,6 +57,9 @@ public class Version {
     private static final String java_distro_version =
         "@@java_distro_version@@";
 
+    private static final String loongson_runtime_name =
+        "@@loongson_runtime_name@@";
+
     static {
         init();
     }
diff --git a/jdk/src/share/classes/sun/misc/Version.java.template.orig b/jdk/src/share/classes/sun/misc/Version.java.template.orig
new file mode 100644
index 00000000000..9c65c022bc1
--- /dev/null
+++ b/jdk/src/share/classes/sun/misc/Version.java.template.orig
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.misc;
+import java.io.PrintStream;
+
+public class Version {
+
+
+    private static final String launcher_name =
+        "@@launcher_name@@";
+
+    private static final String java_version =
+        "@@java_version@@";
+
+    private static final String java_runtime_name =
+        "@@java_runtime_name@@";
+
+    private static final String java_profile_name =
+        "@@java_profile_name@@";
+
+    private static final String java_runtime_version =
+        "@@java_runtime_version@@";
+
+    private static final String java_distro_name =
+        "@@java_distro_name@@";
+
+    private static final String java_distro_version =
+        "@@java_distro_version@@";
+
+    static {
+        init();
+    }
+
+    public static void init() {
+        System.setProperty("java.version", java_version);
+        System.setProperty("java.runtime.version", java_runtime_version);
+        System.setProperty("java.runtime.name", java_runtime_name);
+    }
+
+    private static boolean versionsInitialized = false;
+    private static int jvm_major_version = 0;
+    private static int jvm_minor_version = 0;
+    private static int jvm_micro_version = 0;
+    private static int jvm_update_version = 0;
+    private static int jvm_build_number = 0;
+    private static String jvm_special_version = null;
+    private static int jdk_major_version = 0;
+    private static int jdk_minor_version = 0;
+    private static int jdk_micro_version = 0;
+    private static int jdk_update_version = 0;
+    private static int jdk_build_number = 0;
+    private static String jdk_special_version = null;
+
+    /**
+     * In case you were wondering this method is called by java -version.
+     * Sad that it prints to stderr; would be nicer if default printed on
+     * stdout.
+     */
+    public static void print() {
+        print(System.err);
+    }
+
+    /**
+     * This is the same as print except that it adds an extra line-feed
+     * at the end, typically used by the -showversion in the launcher
+     */
+    public static void println() {
+        print(System.err);
+        System.err.println();
+    }
+
+    /**
+     * Give a stream, it will print version info on it.
+     */
+    public static void print(PrintStream ps) {
+        boolean isHeadless = false;
+
+        /* Report that we're running headless if the property is true */
+        String headless = System.getProperty("java.awt.headless");
+        if ( (headless != null) && (headless.equalsIgnoreCase("true")) ) {
+            isHeadless = true;
+        }
+
+        /* First line: platform version. */
+        ps.println(launcher_name + " version \"" + java_version + "\"");
+
+        /* Second line: runtime version (ie, libraries). */
+
+        ps.print(java_runtime_name +
+                 " (" + java_distro_name + " " + java_distro_version + ")" +
+                 " (build " + java_runtime_version);
+
+        if (java_profile_name.length() > 0) {
+            // profile name
+            ps.print(", profile " + java_profile_name);
+        }
+
+        if (java_runtime_name.indexOf("Embedded") != -1 && isHeadless) {
+            // embedded builds report headless state
+            ps.print(", headless");
+        }
+        ps.println(')');
+
+        /* Third line: JVM information. */
+        String java_vm_name    = System.getProperty("java.vm.name");
+        String java_vm_version = System.getProperty("java.vm.version");
+        String java_vm_info    = System.getProperty("java.vm.info");
+        ps.println(java_vm_name +
+                   " (" + java_distro_name + " " + java_distro_version + ")" +
+                   " (build " + java_vm_version + ", " +
+                   java_vm_info + ")");
+    }
+
+
+    /**
+     * Returns the major version of the running JVM if it's 1.6 or newer
+     * or any RE VM build. It will return 0 if it's an internal 1.5 or
+     * 1.4.x build.
+     *
+     * @since 1.6
+     */
+    public static synchronized int jvmMajorVersion() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        return jvm_major_version;
+    }
+
+    /**
+     * Returns the minor version of the running JVM if it's 1.6 or newer
+     * or any RE VM build. It will return 0 if it's an internal 1.5 or
+     * 1.4.x build.
+     * @since 1.6
+     */
+    public static synchronized int jvmMinorVersion() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        return jvm_minor_version;
+    }
+
+
+    /**
+     * Returns the micro version of the running JVM if it's 1.6 or newer
+     * or any RE VM build. It will return 0 if it's an internal 1.5 or
+     * 1.4.x build.
+     * @since 1.6
+     */
+    public static synchronized int jvmMicroVersion() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        return jvm_micro_version;
+    }
+
+    /**
+     * Returns the update release version of the running JVM if it's
+     * a RE build. It will return 0 if it's an internal build.
+     * @since 1.6
+     */
+    public static synchronized int jvmUpdateVersion() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        return jvm_update_version;
+    }
+
+    public static synchronized String jvmSpecialVersion() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        if (jvm_special_version == null) {
+            jvm_special_version = getJvmSpecialVersion();
+        }
+        return jvm_special_version;
+    }
+    public static native String getJvmSpecialVersion();
+
+    /**
+     * Returns the build number of the running JVM if it's a RE build
+     * It will return 0 if it's an internal build.
+     * @since 1.6
+     */
+    public static synchronized int jvmBuildNumber() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        return jvm_build_number;
+    }
+
+    /**
+     * Returns the major version of the running JDK.
+     *
+     * @since 1.6
+     */
+    public static synchronized int jdkMajorVersion() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        return jdk_major_version;
+    }
+
+    /**
+     * Returns the minor version of the running JDK.
+     * @since 1.6
+     */
+    public static synchronized int jdkMinorVersion() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        return jdk_minor_version;
+    }
+
+    /**
+     * Returns the micro version of the running JDK.
+     * @since 1.6
+     */
+    public static synchronized int jdkMicroVersion() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        return jdk_micro_version;
+    }
+
+    /**
+     * Returns the update release version of the running JDK if it's
+     * a RE build. It will return 0 if it's an internal build.
+     * @since 1.6
+     */
+    public static synchronized int jdkUpdateVersion() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        return jdk_update_version;
+    }
+
+    public static synchronized String jdkSpecialVersion() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        if (jdk_special_version == null) {
+            jdk_special_version = getJdkSpecialVersion();
+        }
+        return jdk_special_version;
+    }
+    public static native String getJdkSpecialVersion();
+
+    /**
+     * Returns the build number of the running JDK if it's a RE build
+     * It will return 0 if it's an internal build.
+     * @since 1.6
+     */
+    public static synchronized int jdkBuildNumber() {
+        if (!versionsInitialized) {
+            initVersions();
+        }
+        return jdk_build_number;
+    }
+
+    // true if JVM exports the version info including the capabilities
+    private static boolean jvmVersionInfoAvailable;
+    private static synchronized void initVersions() {
+        if (versionsInitialized) {
+            return;
+        }
+        jvmVersionInfoAvailable = getJvmVersionInfo();
+        if (!jvmVersionInfoAvailable) {
+            // parse java.vm.version for older JVM before the
+            // new JVM_GetVersionInfo is added.
+            // valid format of the version string is:
+            // n.n.n[_uu[c]][-<identifer>]-bxx
+            CharSequence cs = System.getProperty("java.vm.version");
+            if (cs.length() >= 5 &&
+                Character.isDigit(cs.charAt(0)) && cs.charAt(1) == '.' &&
+                Character.isDigit(cs.charAt(2)) && cs.charAt(3) == '.' &&
+                Character.isDigit(cs.charAt(4))) {
+                jvm_major_version = Character.digit(cs.charAt(0), 10);
+                jvm_minor_version = Character.digit(cs.charAt(2), 10);
+                jvm_micro_version = Character.digit(cs.charAt(4), 10);
+                cs = cs.subSequence(5, cs.length());
+                if (cs.charAt(0) == '_' && cs.length() >= 3) {
+                    int nextChar = 0;
+                    if (Character.isDigit(cs.charAt(1)) &&
+                        Character.isDigit(cs.charAt(2)) &&
+                        Character.isDigit(cs.charAt(3)))
+                    {
+                        nextChar = 4;
+                    } else if (Character.isDigit(cs.charAt(1)) &&
+                        Character.isDigit(cs.charAt(2)))
+                    {
+                        nextChar = 3;
+                    }
+
+                    try {
+                        String uu = cs.subSequence(1, nextChar).toString();
+                        jvm_update_version = Integer.valueOf(uu).intValue();
+                        if (cs.length() >= nextChar + 1) {
+                            char c = cs.charAt(nextChar);
+                            if (c >= 'a' && c <= 'z') {
+                                jvm_special_version = Character.toString(c);
+                                nextChar++;
+                            }
+                        }
+                    } catch (NumberFormatException e) {
+                        // not conforming to the naming convention
+                        return;
+                    }
+                    cs = cs.subSequence(nextChar, cs.length());
+                }
+                if (cs.charAt(0) == '-') {
+                    // skip the first character
+                    // valid format: <identifier>-bxx or bxx
+                    // non-product VM will have -debug|-release appended
+                    cs = cs.subSequence(1, cs.length());
+                    String[] res = cs.toString().split("-");
+                    for (String s : res) {
+                        if (s.charAt(0) == 'b' && s.length() == 3 &&
+                            Character.isDigit(s.charAt(1)) &&
+                            Character.isDigit(s.charAt(2))) {
+                            jvm_build_number =
+                                Integer.valueOf(s.substring(1, 3)).intValue();
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+        getJdkVersionInfo();
+        versionsInitialized = true;
+    }
+
+    // Gets the JVM version info if available and sets the jvm_*_version fields
+    // and its capabilities.
+    //
+    // Return false if not available which implies an old VM (Tiger or before).
+    private static native boolean getJvmVersionInfo();
+    private static native void getJdkVersionInfo();
+}
+
+// Help Emacs a little because this file doesn't end in .java.
+//
+// Local Variables: ***
+// mode: java ***
+// End: ***
diff --git a/jdk/src/solaris/bin/loongarch64/jvm.cfg b/jdk/src/solaris/bin/loongarch64/jvm.cfg
new file mode 100644
index 00000000000..42a06755da8
--- /dev/null
+++ b/jdk/src/solaris/bin/loongarch64/jvm.cfg
@@ -0,0 +1,36 @@
+# Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.  Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+# 
+# List of JVMs that can be used as an option to java, javac, etc.
+# Order is important -- first in this list is the default JVM.
+# NOTE that this both this file and its format are UNSUPPORTED and
+# WILL GO AWAY in a future release.
+#
+# You may also select a JVM in an arbitrary location with the
+# "-XXaltjvm=<jvm_dir>" option, but that too is unsupported
+# and may not be available in a future release.
+#
+-server KNOWN
+-client IGNORE
diff --git a/jdk/src/solaris/bin/mips64/jvm.cfg b/jdk/src/solaris/bin/mips64/jvm.cfg
new file mode 100644
index 00000000000..42a06755da8
--- /dev/null
+++ b/jdk/src/solaris/bin/mips64/jvm.cfg
@@ -0,0 +1,36 @@
+# Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.  Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+# 
+# List of JVMs that can be used as an option to java, javac, etc.
+# Order is important -- first in this list is the default JVM.
+# NOTE that this both this file and its format are UNSUPPORTED and
+# WILL GO AWAY in a future release.
+#
+# You may also select a JVM in an arbitrary location with the
+# "-XXaltjvm=<jvm_dir>" option, but that too is unsupported
+# and may not be available in a future release.
+#
+-server KNOWN
+-client IGNORE
diff --git a/jdk/test/jdk/jfr/event/os/TestCPUInformation.java b/jdk/test/jdk/jfr/event/os/TestCPUInformation.java
index 17c8419cbc0..a8b76cb71a8 100644
--- a/jdk/test/jdk/jfr/event/os/TestCPUInformation.java
+++ b/jdk/test/jdk/jfr/event/os/TestCPUInformation.java
@@ -54,8 +54,8 @@ public static void main(String[] args) throws Throwable {
             Events.assertField(event, "hwThreads").atLeast(1);
             Events.assertField(event, "cores").atLeast(1);
             Events.assertField(event, "sockets").atLeast(1);
-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
+            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch");
+            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "MIPS", "LoongArch");
         }
     }
 }
diff --git a/jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher b/jdk/test/sun/management/jmxremote/bootstrap/linux-loongarch64/launcher
new file mode 100755
index 00000000000..e69de29bb2d
diff --git a/jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher b/jdk/test/sun/management/jmxremote/bootstrap/linux-mips64el/launcher
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/jdk/test/sun/security/pkcs11/PKCS11Test.java b/jdk/test/sun/security/pkcs11/PKCS11Test.java
index 70c4b1e4174..e3882bf5798 100644
--- a/jdk/test/sun/security/pkcs11/PKCS11Test.java
+++ b/jdk/test/sun/security/pkcs11/PKCS11Test.java
@@ -21,6 +21,11 @@
  * questions.
  */
 
+ /*
+  * This file has been modified by Loongson Technology in 2022, These
+  * modifications are Copyright (c) 2022, Loongson Technology, and are made
+  * available on the same license terms set forth above.
+  */
 
 // common infrastructure for SunPKCS11 tests
 
@@ -596,6 +601,9 @@ boolean checkSupport(List<ECParameterSpec> supportedEC,
             "/usr/lib64/"});
         osMap.put("Linux-ppc64-64", new String[]{"/usr/lib64/"});
         osMap.put("Linux-ppc64le-64", new String[]{"/usr/lib64/"});
+        osMap.put("Linux-mips64el-64", new String[]{"/usr/lib64/"});
+        osMap.put("Linux-loongarch64-64", new String[]{"/usr/lib/loongarch64-linux-gnu/",
+                "/usr/lib64/" });
         osMap.put("Windows-x86-32", new String[]{
             PKCS11_BASE + "/nss/lib/windows-i586/".replace('/', SEP)});
         osMap.put("Windows-amd64-64", new String[]{
diff --git a/jdk/test/sun/security/pkcs11/PKCS11Test.java.orig b/jdk/test/sun/security/pkcs11/PKCS11Test.java.orig
new file mode 100644
index 00000000000..70c4b1e4174
--- /dev/null
+++ b/jdk/test/sun/security/pkcs11/PKCS11Test.java.orig
@@ -0,0 +1,704 @@
+/*
+ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+
+// common infrastructure for SunPKCS11 tests
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.StringReader;
+import java.lang.reflect.Constructor;
+import java.nio.charset.StandardCharsets;
+import java.security.AlgorithmParameters;
+import java.security.InvalidAlgorithmParameterException;
+import java.security.KeyPairGenerator;
+import java.security.NoSuchProviderException;
+import java.security.Provider;
+import java.security.ProviderException;
+import java.security.Security;
+
+import java.security.spec.ECGenParameterSpec;
+import java.security.spec.ECParameterSpec;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.ServiceLoader;
+import java.util.Set;
+
+public abstract class PKCS11Test {
+
+    private boolean enableSM = false;
+
+    static final Properties props = System.getProperties();
+
+    static final String PKCS11 = "PKCS11";
+
+    // directory of the test source
+    static final String BASE = System.getProperty("test.src", ".");
+
+    static final char SEP = File.separatorChar;
+
+    private static final String DEFAULT_POLICY =
+            BASE + SEP + ".." + SEP + "policy";
+
+    // directory corresponding to BASE in the /closed hierarchy
+    static final String CLOSED_BASE;
+
+    static {
+        // hack
+        String absBase = new File(BASE).getAbsolutePath();
+        int k = absBase.indexOf(SEP + "test" + SEP + "sun" + SEP);
+        if (k < 0) k = 0;
+        String p1 = absBase.substring(0, k + 6);
+        String p2 = absBase.substring(k + 5);
+        CLOSED_BASE = p1 + "closed" + p2;
+
+        // set it as a system property to make it available in policy file
+        System.setProperty("closed.base", CLOSED_BASE);
+    }
+
+    static String NSPR_PREFIX = "";
+
+    // NSS version info
+    public static enum ECCState { None, Basic, Extended };
+    static double nss_version = -1;
+    static ECCState nss_ecc_status = ECCState.Extended;
+
+    // The NSS library we need to search for in getNSSLibDir()
+    // Default is "libsoftokn3.so", listed as "softokn3"
+    // The other is "libnss3.so", listed as "nss3".
+    static String nss_library = "softokn3";
+
+    // NSS versions of each library.  It is simplier to keep nss_version
+    // for quick checking for generic testing than many if-else statements.
+    static double softoken3_version = -1;
+    static double nss3_version = -1;
+
+    static Provider getSunPKCS11(String config) throws Exception {
+        Class clazz = Class.forName("sun.security.pkcs11.SunPKCS11");
+        Constructor cons = clazz.getConstructor(new Class[] {String.class});
+        Object obj = cons.newInstance(new Object[] {config});
+        return (Provider)obj;
+    }
+
+    public abstract void main(Provider p) throws Exception;
+
+    private void premain(Provider p) throws Exception {
+        // set a security manager and policy before a test case runs,
+        // and disable them after the test case finished
+        try {
+            if (enableSM) {
+                System.setSecurityManager(new SecurityManager());
+            }
+            long start = System.currentTimeMillis();
+            System.out.printf(
+                    "Running test with provider %s (security manager %s) ...%n",
+                        p.getName(), enableSM ? "enabled" : "disabled");
+            main(p);
+            long stop = System.currentTimeMillis();
+            System.out.println("Completed test with provider " + p.getName() +
+                " (" + (stop - start) + " ms).");
+        } finally {
+            if (enableSM) {
+                System.setSecurityManager(null);
+            }
+        }
+    }
+
+    public static void main(PKCS11Test test) throws Exception {
+        main(test, null);
+    }
+
+    public static void main(PKCS11Test test, String[] args) throws Exception {
+        if (args != null) {
+            if (args.length > 0 && "sm".equals(args[0])) {
+                test.enableSM = true;
+            }
+            if (test.enableSM) {
+                System.setProperty("java.security.policy",
+                        (args.length > 1) ? BASE + SEP + args[1]
+                                : DEFAULT_POLICY);
+            }
+        }
+
+        Provider[] oldProviders = Security.getProviders();
+        try {
+            System.out.println("Beginning test run " + test.getClass().getName() + "...");
+            testDefault(test);
+            testNSS(test);
+            testDeimos(test);
+        } finally {
+            // NOTE: Do not place a 'return' in any finally block
+            // as it will suppress exceptions and hide test failures.
+            Provider[] newProviders = Security.getProviders();
+            boolean found = true;
+            // Do not restore providers if nothing changed. This is especailly
+            // useful for ./Provider/Login.sh, where a SecurityManager exists.
+            if (oldProviders.length == newProviders.length) {
+                found = false;
+                for (int i = 0; i<oldProviders.length; i++) {
+                    if (oldProviders[i] != newProviders[i]) {
+                        found = true;
+                        break;
+                    }
+                }
+            }
+            if (found) {
+                for (Provider p: newProviders) {
+                    Security.removeProvider(p.getName());
+                }
+                for (Provider p: oldProviders) {
+                    Security.addProvider(p);
+                }
+            }
+        }
+    }
+
+    public static void testDeimos(PKCS11Test test) throws Exception {
+        if (new File("/opt/SUNWconn/lib/libpkcs11.so").isFile() == false ||
+            "true".equals(System.getProperty("NO_DEIMOS"))) {
+            return;
+        }
+        String base = getBase();
+        String p11config = base + SEP + "nss" + SEP + "p11-deimos.txt";
+        Provider p = getSunPKCS11(p11config);
+        test.premain(p);
+    }
+
+    public static void testDefault(PKCS11Test test) throws Exception {
+        // run test for default configured PKCS11 providers (if any)
+
+        if ("true".equals(System.getProperty("NO_DEFAULT"))) {
+            return;
+        }
+
+        Provider[] providers = Security.getProviders();
+        for (int i = 0; i < providers.length; i++) {
+            Provider p = providers[i];
+            if (p.getName().startsWith("SunPKCS11-")) {
+                test.premain(p);
+            }
+        }
+    }
+
+    private static String PKCS11_BASE;
+    static {
+        try {
+            PKCS11_BASE = getBase();
+        } catch (Exception e) {
+            // ignore
+        }
+    }
+
+    private final static String PKCS11_REL_PATH = "sun/security/pkcs11";
+
+    public static String getBase() throws Exception {
+        if (PKCS11_BASE != null) {
+            return PKCS11_BASE;
+        }
+        File cwd = new File(System.getProperty("test.src", ".")).getCanonicalFile();
+        while (true) {
+            File file = new File(cwd, "TEST.ROOT");
+            if (file.isFile()) {
+                break;
+            }
+            cwd = cwd.getParentFile();
+            if (cwd == null) {
+                throw new Exception("Test root directory not found");
+            }
+        }
+        PKCS11_BASE = new File(cwd, PKCS11_REL_PATH.replace('/', SEP)).getAbsolutePath();
+        return PKCS11_BASE;
+    }
+
+    public static String getNSSLibDir() throws Exception {
+        return getNSSLibDir(nss_library);
+    }
+
+    static String getNSSLibDir(String library) throws Exception {
+        String osName = props.getProperty("os.name");
+        if (osName.startsWith("Win")) {
+            osName = "Windows";
+            NSPR_PREFIX = "lib";
+        }
+        String osid = osName + "-"
+                + props.getProperty("os.arch") + "-" + props.getProperty("sun.arch.data.model");
+        String[] nssLibDirs = osMap.get(osid);
+        if (nssLibDirs == null) {
+            System.out.println("Unsupported OS, skipping: " + osid);
+            return null;
+        }
+        if (nssLibDirs.length == 0) {
+            System.out.println("NSS not supported on this platform, skipping test");
+            return null;
+        }
+        String nssLibDir = null;
+        for (String dir : nssLibDirs) {
+            if (new File(dir).exists() &&
+                new File(dir + System.mapLibraryName(library)).exists()) {
+                nssLibDir = dir;
+                System.setProperty("pkcs11test.nss.libdir", nssLibDir);
+                break;
+            }
+        }
+        return nssLibDir;
+    }
+
+    static boolean isBadNSSVersion(Provider p) {
+        if (isNSS(p) && badNSSVersion) {
+            System.out.println("NSS 3.11 has a DER issue that recent " +
+                    "version do not.");
+            return true;
+        }
+        return false;
+    }
+
+    protected static void safeReload(String lib) throws Exception {
+        try {
+            System.load(lib);
+        } catch (UnsatisfiedLinkError e) {
+            if (e.getMessage().contains("already loaded")) {
+                return;
+            }
+        }
+    }
+
+    static boolean loadNSPR(String libdir) throws Exception {
+        // load NSS softoken dependencies in advance to avoid resolver issues
+        safeReload(libdir + System.mapLibraryName(NSPR_PREFIX + "nspr4"));
+        safeReload(libdir + System.mapLibraryName(NSPR_PREFIX + "plc4"));
+        safeReload(libdir + System.mapLibraryName(NSPR_PREFIX + "plds4"));
+        safeReload(libdir + System.mapLibraryName("sqlite3"));
+        safeReload(libdir + System.mapLibraryName("nssutil3"));
+        return true;
+    }
+
+    // Check the provider being used is NSS
+    public static boolean isNSS(Provider p) {
+        return p.getName().toUpperCase().equals("SUNPKCS11-NSS");
+    }
+
+    static double getNSSVersion() {
+        if (nss_version == -1)
+            getNSSInfo();
+        return nss_version;
+    }
+
+    static ECCState getNSSECC() {
+        if (nss_version == -1)
+            getNSSInfo();
+        return nss_ecc_status;
+    }
+
+    public static double getLibsoftokn3Version() {
+        if (softoken3_version == -1)
+            return getNSSInfo("softokn3");
+        return softoken3_version;
+    }
+
+    public static double getLibnss3Version() {
+        if (nss3_version == -1)
+            return getNSSInfo("nss3");
+        return nss3_version;
+    }
+
+    /* Read the library to find out the verison */
+    static void getNSSInfo() {
+        getNSSInfo(nss_library);
+    }
+
+    // Try to parse the version for the specified library.
+    // Assuming the library contains either of the following patterns:
+    // $Header: NSS <version>
+    // Version: NSS <version>
+    // Here, <version> stands for NSS version.
+    static double getNSSInfo(String library) {
+        // look for two types of headers in NSS libraries
+        String nssHeader1 = "$Header: NSS";
+        String nssHeader2 = "Version: NSS";
+        boolean found = false;
+        String s = null;
+        int i = 0;
+        String libfile = "";
+
+        if (library.compareTo("softokn3") == 0 && softoken3_version > -1)
+            return softoken3_version;
+        if (library.compareTo("nss3") == 0 && nss3_version > -1)
+            return nss3_version;
+
+        try {
+            String libdir = getNSSLibDir();
+            if (libdir == null) {
+                return 0.0;
+            }
+            libfile = libdir + System.mapLibraryName(library);
+            try (FileInputStream is = new FileInputStream(libfile)) {
+                byte[] data = new byte[1000];
+                int read = 0;
+
+                while (is.available() > 0) {
+                    if (read == 0) {
+                        read = is.read(data, 0, 1000);
+                    } else {
+                        // Prepend last 100 bytes in case the header was split
+                        // between the reads.
+                        System.arraycopy(data, 900, data, 0, 100);
+                        read = 100 + is.read(data, 100, 900);
+                    }
+
+                    s = new String(data, 0, read, StandardCharsets.US_ASCII);
+                    i = s.indexOf(nssHeader1);
+                    if (i > 0 || (i = s.indexOf(nssHeader2)) > 0) {
+                        found = true;
+                        // If the nssHeader is before 920 we can break, otherwise
+                        // we may not have the whole header so do another read.  If
+                        // no bytes are in the stream, that is ok, found is true.
+                        if (i < 920) {
+                            break;
+                        }
+                    }
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+
+        if (!found) {
+            System.out.println("lib" + library +
+                    " version not found, set to 0.0: " + libfile);
+            nss_version = 0.0;
+            return nss_version;
+        }
+
+        // the index after whitespace after nssHeader
+        int afterheader = s.indexOf("NSS", i) + 4;
+        String version = String.valueOf(s.charAt(afterheader));
+        for (char c = s.charAt(++afterheader);
+                c == '.' || (c >= '0' && c <= '9');
+                c = s.charAt(++afterheader)) {
+            version += c;
+        }
+
+        // If a "dot dot" release, strip the extra dots for double parsing
+        String[] dot = version.split("\\.");
+        if (dot.length > 2) {
+            version = dot[0]+"."+dot[1];
+            for (int j = 2; dot.length > j; j++) {
+                version += dot[j];
+            }
+        }
+
+        // Convert to double for easier version value checking
+        try {
+            nss_version = Double.parseDouble(version);
+        } catch (NumberFormatException e) {
+            System.out.println("===== Content start =====");
+            System.out.println(s);
+            System.out.println("===== Content end =====");
+            System.out.println("Failed to parse lib" + library +
+                    " version. Set to 0.0");
+            e.printStackTrace();
+        }
+
+        System.out.print("lib" + library + " version = "+version+".  ");
+
+        // Check for ECC
+        if (s.indexOf("Basic") > 0) {
+            nss_ecc_status = ECCState.Basic;
+            System.out.println("ECC Basic.");
+        } else if (s.indexOf("Extended") > 0) {
+            nss_ecc_status = ECCState.Extended;
+            System.out.println("ECC Extended.");
+        } else {
+            System.out.println("ECC None.");
+        }
+
+        if (library.compareTo("softokn3") == 0) {
+            softoken3_version = nss_version;
+        } else if (library.compareTo("nss3") == 0) {
+            nss3_version = nss_version;
+        }
+
+        return nss_version;
+    }
+
+    // Used to set the nss_library file to search for libsoftokn3.so
+    public static void useNSS() {
+        nss_library = "nss3";
+    }
+
+    public static void testNSS(PKCS11Test test) throws Exception {
+        String libdir = getNSSLibDir();
+        if (libdir == null) {
+            return;
+        }
+        String base = getBase();
+
+        if (loadNSPR(libdir) == false) {
+            return;
+        }
+
+        String libfile = libdir + System.mapLibraryName(nss_library);
+
+        String customDBdir = System.getProperty("CUSTOM_DB_DIR");
+        String dbdir = (customDBdir != null) ?
+                                customDBdir :
+                                base + SEP + "nss" + SEP + "db";
+        // NSS always wants forward slashes for the config path
+        dbdir = dbdir.replace('\\', '/');
+
+        String customConfig = System.getProperty("CUSTOM_P11_CONFIG");
+        String customConfigName = System.getProperty("CUSTOM_P11_CONFIG_NAME", "p11-nss.txt");
+        String p11config = (customConfig != null) ?
+                                customConfig :
+                                base + SEP + "nss" + SEP + customConfigName;
+
+        System.setProperty("pkcs11test.nss.lib", libfile);
+        System.setProperty("pkcs11test.nss.db", dbdir);
+        Provider p = getSunPKCS11(p11config);
+        test.premain(p);
+    }
+
+    // Generate a vector of supported elliptic curves of a given provider
+    static List<ECParameterSpec> getKnownCurves(Provider p) throws Exception {
+        int index;
+        int begin;
+        int end;
+        String curve;
+
+        List<ECParameterSpec> results = new ArrayList<>();
+        // Get Curves to test from SunEC.
+        String kcProp = Security.getProvider("SunEC").
+                getProperty("AlgorithmParameters.EC SupportedCurves");
+
+        if (kcProp == null) {
+            throw new RuntimeException(
+            "\"AlgorithmParameters.EC SupportedCurves property\" not found");
+        }
+
+        System.out.println("Finding supported curves using list from SunEC\n");
+        index = 0;
+        for (;;) {
+            // Each set of curve names is enclosed with brackets.
+            begin = kcProp.indexOf('[', index);
+            end = kcProp.indexOf(']', index);
+            if (begin == -1 || end == -1) {
+                break;
+            }
+
+            /*
+             * Each name is separated by a comma.
+             * Just get the first name in the set.
+             */
+            index = end + 1;
+            begin++;
+            end = kcProp.indexOf(',', begin);
+            if (end == -1) {
+                // Only one name in the set.
+                end = index -1;
+            }
+
+            curve = kcProp.substring(begin, end);
+
+            // Native ECDSA verification doesn't support curveSM2
+            if ("curveSM2".equalsIgnoreCase(curve)) {
+                System.out.print("\t " + curve + ": Unsupported\n");
+                continue;
+            }
+
+            ECParameterSpec e = getECParameterSpec(p, curve);
+            System.out.print("\t "+ curve + ": ");
+            try {
+                KeyPairGenerator kpg = KeyPairGenerator.getInstance("EC", p);
+                kpg.initialize(e);
+                kpg.generateKeyPair();
+                results.add(e);
+                System.out.println("Supported");
+            } catch (ProviderException ex) {
+                System.out.println("Unsupported: PKCS11: " +
+                        ex.getCause().getMessage());
+            } catch (InvalidAlgorithmParameterException ex) {
+                System.out.println("Unsupported: Key Length: " +
+                        ex.getMessage());
+            }
+        }
+
+        if (results.size() == 0) {
+            throw new RuntimeException("No supported EC curves found");
+        }
+
+        return results;
+    }
+
+    private static ECParameterSpec getECParameterSpec(Provider p, String name)
+            throws Exception {
+
+        AlgorithmParameters parameters =
+            AlgorithmParameters.getInstance("EC", p);
+
+        parameters.init(new ECGenParameterSpec(name));
+
+        return parameters.getParameterSpec(ECParameterSpec.class);
+    }
+
+    // Check support for a curve with a provided Vector of EC support
+    boolean checkSupport(List<ECParameterSpec> supportedEC,
+            ECParameterSpec curve) {
+        for (ECParameterSpec ec: supportedEC) {
+            if (ec.equals(curve)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    private static final Map<String,String[]> osMap;
+
+    // Location of the NSS libraries on each supported platform
+    static {
+        osMap = new HashMap<>();
+        osMap.put("SunOS-sparc-32", new String[]{"/usr/lib/mps/"});
+        osMap.put("SunOS-sparcv9-64", new String[]{"/usr/lib/mps/64/"});
+        osMap.put("SunOS-x86-32", new String[]{"/usr/lib/mps/"});
+        osMap.put("SunOS-amd64-64", new String[]{"/usr/lib/mps/64/"});
+        osMap.put("Linux-i386-32", new String[]{
+            "/usr/lib/i386-linux-gnu/", "/usr/lib32/", "/usr/lib/"});
+        osMap.put("Linux-amd64-64", new String[]{
+            "/usr/lib/x86_64-linux-gnu/", "/usr/lib/x86_64-linux-gnu/nss/",
+            "/usr/lib64/"});
+        osMap.put("Linux-ppc64-64", new String[]{"/usr/lib64/"});
+        osMap.put("Linux-ppc64le-64", new String[]{"/usr/lib64/"});
+        osMap.put("Windows-x86-32", new String[]{
+            PKCS11_BASE + "/nss/lib/windows-i586/".replace('/', SEP)});
+        osMap.put("Windows-amd64-64", new String[]{
+            PKCS11_BASE + "/nss/lib/windows-amd64/".replace('/', SEP)});
+        osMap.put("MacOSX-x86_64-64", new String[]{
+            PKCS11_BASE + "/nss/lib/macosx-x86_64/"});
+    }
+
+    private final static char[] hexDigits = "0123456789abcdef".toCharArray();
+
+    static final boolean badNSSVersion =
+            getNSSVersion() >= 3.11 && getNSSVersion() < 3.12;
+
+    public static String toString(byte[] b) {
+        if (b == null) {
+            return "(null)";
+        }
+        StringBuilder sb = new StringBuilder(b.length * 3);
+        for (int i = 0; i < b.length; i++) {
+            int k = b[i] & 0xff;
+            if (i != 0) {
+                sb.append(':');
+            }
+            sb.append(hexDigits[k >>> 4]);
+            sb.append(hexDigits[k & 0xf]);
+        }
+        return sb.toString();
+    }
+
+    public static byte[] parse(String s) {
+        if (s.equals("(null)")) {
+            return null;
+        }
+        try {
+            int n = s.length();
+            ByteArrayOutputStream out = new ByteArrayOutputStream(n / 3);
+            StringReader r = new StringReader(s);
+            while (true) {
+                int b1 = nextNibble(r);
+                if (b1 < 0) {
+                    break;
+                }
+                int b2 = nextNibble(r);
+                if (b2 < 0) {
+                    throw new RuntimeException("Invalid string " + s);
+                }
+                int b = (b1 << 4) | b2;
+                out.write(b);
+            }
+            return out.toByteArray();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private static int nextNibble(StringReader r) throws IOException {
+        while (true) {
+            int ch = r.read();
+            if (ch == -1) {
+                return -1;
+            } else if ((ch >= '0') && (ch <= '9')) {
+                return ch - '0';
+            } else if ((ch >= 'a') && (ch <= 'f')) {
+                return ch - 'a' + 10;
+            } else if ((ch >= 'A') && (ch <= 'F')) {
+                return ch - 'A' + 10;
+            }
+        }
+    }
+
+    static byte[] generateData(int length) {
+        byte data[] = new byte[length];
+        for (int i=0; i<data.length; i++) {
+            data[i] = (byte) (i % 256);
+        }
+        return data;
+    }
+
+    <T> T[] concat(T[] a, T[] b) {
+        if ((b == null) || (b.length == 0)) {
+            return a;
+        }
+        T[] r = (T[])java.lang.reflect.Array.newInstance(a.getClass().getComponentType(), a.length + b.length);
+        System.arraycopy(a, 0, r, 0, a.length);
+        System.arraycopy(b, 0, r, a.length, b.length);
+        return r;
+    }
+
+    /**
+     * Returns supported algorithms of specified type.
+     */
+    static List<String> getSupportedAlgorithms(String type, String alg,
+            Provider p) {
+        // prepare a list of supported algorithms
+        List<String> algorithms = new ArrayList<>();
+        Set<Provider.Service> services = p.getServices();
+        for (Provider.Service service : services) {
+            if (service.getType().equals(type)
+                    && service.getAlgorithm().startsWith(alg)) {
+                algorithms.add(service.getAlgorithm());
+            }
+        }
+        return algorithms;
+    }
+
+}