Skip to content

Commit

Permalink
8308363: Initial compiler support for FP16 scalar operations.
Browse files Browse the repository at this point in the history
Reviewed-by: sviswanathan
  • Loading branch information
Jatin Bhateja committed Sep 19, 2023
1 parent 4dca15b commit f03fb4e
Show file tree
Hide file tree
Showing 33 changed files with 880 additions and 13 deletions.
2 changes: 1 addition & 1 deletion make/common/JavaCompilation.gmk
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ define SetupJavaCompilationBody
PARANOIA_FLAGS := -implicit:none -Xprefer:source -XDignore.symbol.file=true -encoding ascii

$1_FLAGS += -g -Xlint:all $$($1_TARGET_RELEASE) $$(PARANOIA_FLAGS) $$(JAVA_WARNINGS_ARE_ERRORS)
$1_FLAGS += $$($1_JAVAC_FLAGS)
$1_FLAGS += $$($1_JAVAC_FLAGS) -XDenablePrimitiveClasses

ifneq ($$($1_DISABLED_WARNINGS), )
$1_FLAGS += -Xlint:$$(call CommaList, $$(addprefix -, $$($1_DISABLED_WARNINGS)))
Expand Down
34 changes: 33 additions & 1 deletion src/hotspot/cpu/x86/assembler_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3144,6 +3144,22 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
emit_int16(0x6F, (0xC0 | encode));
}

void Assembler::vmovw(XMMRegister dst, Register src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x6E, (0xC0 | encode));
}

void Assembler::vmovw(Register dst, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x7E, (0xC0 | encode));
}

void Assembler::vmovdqu(XMMRegister dst, Address src) {
assert(UseAVX > 0, "");
InstructionMark im(this);
Expand Down Expand Up @@ -7311,6 +7327,22 @@ void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector
emit_operand(dst, src, 0);
}

void Assembler::evaddph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(vector_len, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x58, (0xC0 | encode));
}

void Assembler::evaddsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
InstructionAttr attributes(AVX_128bit, false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_F3, VEX_OPCODE_MAP5, &attributes);
emit_int16(0x58, (0xC0 | encode));
}

void Assembler::psubb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
Expand Down Expand Up @@ -11480,7 +11512,7 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
byte2 = (~byte2) & 0xF0;
// confine opc opcode extensions in mm bits to lower two bits
// of form {0F, 0F_38, 0F_3A}
// of form {0F, 0F_38, 0F_3A, MAP5}
byte2 |= opc;

// P1: byte 3 as Wvvvv1pp
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/cpu/x86/assembler_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,7 @@ class Assembler : public AbstractAssembler {
VEX_OPCODE_0F = 0x1,
VEX_OPCODE_0F_38 = 0x2,
VEX_OPCODE_0F_3A = 0x3,
VEX_OPCODE_MAP5 = 0x5,
VEX_OPCODE_MASK = 0x1F
};

Expand Down Expand Up @@ -1649,6 +1650,9 @@ class Assembler : public AbstractAssembler {
void movsbl(Register dst, Address src);
void movsbl(Register dst, Register src);

void vmovw(XMMRegister dst, Register src);
void vmovw(Register dst, XMMRegister src);

#ifdef _LP64
void movsbq(Register dst, Address src);
void movsbq(Register dst, Register src);
Expand Down Expand Up @@ -2394,6 +2398,8 @@ class Assembler : public AbstractAssembler {
void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void evaddsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
void evaddph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

// Leaf level assembler routines for masked operations.
void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
Expand Down
5 changes: 5 additions & 0 deletions src/hotspot/cpu/x86/vm_version_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,7 @@ void VM_Version::get_processor_features() {
_features &= ~CPU_AVX512_VBMI2;
_features &= ~CPU_AVX512_BITALG;
_features &= ~CPU_AVX512_IFMA;
_features &= ~CPU_AVX512_FP16;
}

if (UseAVX < 2)
Expand Down Expand Up @@ -982,6 +983,7 @@ void VM_Version::get_processor_features() {
_features &= ~CPU_GFNI;
_features &= ~CPU_AVX512_BITALG;
_features &= ~CPU_AVX512_IFMA;
_features &= ~CPU_AVX512_FP16;
}
}

Expand Down Expand Up @@ -3017,6 +3019,9 @@ uint64_t VM_Version::feature_flags() {
}
if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
result |= CPU_SERIALIZE;

if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
result |= CPU_AVX512_FP16;
}

// ZX features.
Expand Down
8 changes: 6 additions & 2 deletions src/hotspot/cpu/x86/vm_version_x86.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,9 @@ class VM_Version : public Abstract_VM_Version {
serialize : 1,
: 5,
cet_ibt : 1,
: 11;
: 2,
avx512_fp16 : 1,
: 8;
} bits;
};

Expand Down Expand Up @@ -390,7 +392,8 @@ class VM_Version : public Abstract_VM_Version {
decl(OSPKE, "ospke", 55) /* OS enables protection keys */ \
decl(CET_IBT, "cet_ibt", 56) /* Control Flow Enforcement - Indirect Branch Tracking */ \
decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */ \
decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/
decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/ \
decl(AVX512_FP16, "avx512_fp16", 59) /* AVX512 FP16 ISA support*/

#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
Expand Down Expand Up @@ -696,6 +699,7 @@ class VM_Version : public Abstract_VM_Version {
static bool supports_avx512_bitalg() { return (_features & CPU_AVX512_BITALG) != 0; }
static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; }
static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
static bool supports_avx512_fp16() { return (_features & CPU_AVX512_FP16) != 0; }
static bool supports_hv() { return (_features & CPU_HV) != 0; }
static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; }
static bool supports_f16c() { return (_features & CPU_F16C) != 0; }
Expand Down
60 changes: 60 additions & 0 deletions src/hotspot/cpu/x86/x86.ad
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,13 @@ bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_AddHF:
case Op_ReinterpretS2HF:
case Op_ReinterpretHF2S:
if (!VM_Version::supports_avx512_fp16()) {
return false;
}
break;
case Op_VectorLoadShuffle:
case Op_VectorRearrange:
case Op_MulReductionVI:
Expand Down Expand Up @@ -1722,6 +1729,11 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
// * 128bit vroundpd instruction is present only in AVX1
int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
switch (opcode) {
case Op_AddVHF:
if (!VM_Version::supports_avx512_fp16()) {
return false;
}
break;
case Op_AbsVF:
case Op_NegVF:
if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
Expand Down Expand Up @@ -10149,4 +10161,52 @@ instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlag
ins_pipe(pipe_slow);
%}

instruct reinterpretS2H (regF dst, rRegI src)
%{
match(Set dst (ReinterpretS2HF src));
format %{ "vmovw $dst, $src" %}
ins_encode %{
__ vmovw($dst$$XMMRegister, $src$$Register);
%}
ins_pipe(pipe_slow);
%}

instruct convF2HFAndS2HF (regF dst, regF src)
%{
match(Set dst (ReinterpretS2HF (ConvF2HF src)));
format %{ "convF2HFAndS2HF $dst, $src" %}
ins_encode %{
__ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit);
%}
ins_pipe(pipe_slow);
%}

instruct reinterpretH2S (rRegI dst, regF src)
%{
match(Set dst (ReinterpretHF2S src));
format %{ "vmovw $dst, $src" %}
ins_encode %{
__ vmovw($dst$$Register, $src$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}
instruct addFP16_scalar (regF dst, regF src1, regF src2)
%{
match(Set dst (AddHF src1 src2));
format %{ "vaddsh $dst, $src1, $src2" %}
ins_encode %{
__ evaddsh($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
%}
ins_pipe(pipe_slow);
%}

instruct vaddVHF (vec dst, vec src1, vec src2)
%{
match(Set dst (AddVHF src1 src2));
format %{ "vaddph $dst, $src1, $src2" %}
ins_encode %{
int vlen_enc = vector_length_encoding(this);
__ evaddph($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe(pipe_slow);
%}
2 changes: 1 addition & 1 deletion src/hotspot/share/adlc/formssel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4201,7 +4201,7 @@ Form::DataType MatchRule::is_ideal_load() const {

bool MatchRule::is_vector() const {
static const char *vector_list[] = {
"AddVB","AddVS","AddVI","AddVL","AddVF","AddVD",
"AddVB","AddVHF", "AddVS","AddVI","AddVL","AddVF","AddVD",
"SubVB","SubVS","SubVI","SubVL","SubVF","SubVD",
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
"DivVF","DivVD",
Expand Down
20 changes: 18 additions & 2 deletions src/hotspot/share/classfile/classFileParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4695,6 +4695,22 @@ static void check_illegal_static_method(const InstanceKlass* this_klass, TRAPS)
}
}

// utility function to skip over internal jdk primitive classes used to override the need for passing
// an explict JVM flag EnablePrimitiveClasses.
bool ClassFileParser::is_jdk_internal_class(const Symbol* class_name) const {
if (vmSymbols::java_lang_Float16() == class_name) {
return (EnablePrimitiveClasses = true);
}
return false;
}

bool ClassFileParser::is_jdk_internal_class_sig(const char* sig) const {
if (strstr(sig, vmSymbols::java_lang_Float16_signature()->as_C_string())) {
return true;
}
return false;
}

// utility methods for format checking

void ClassFileParser::verify_legal_class_modifiers(jint flags, const char* name, bool is_Object, TRAPS) const {
Expand Down Expand Up @@ -4725,7 +4741,7 @@ void ClassFileParser::verify_legal_class_modifiers(jint flags, const char* name,
return;
}

if (is_primitive_class && !EnablePrimitiveClasses) {
if (is_primitive_class && !is_jdk_internal_class(_class_name) && !EnablePrimitiveClasses) {
ResourceMark rm(THREAD);
Exceptions::fthrow(
THREAD_AND_LOCATION,
Expand Down Expand Up @@ -5157,7 +5173,7 @@ const char* ClassFileParser::skip_over_field_signature(const char* signature,
case JVM_SIGNATURE_PRIMITIVE_OBJECT:
// Can't enable this check fully until JDK upgrades the bytecode generators (TODO: JDK-8270852).
// For now, compare to class file version 51 so old verifier doesn't see Q signatures.
if ( (_major_version < 51 /* CONSTANT_CLASS_DESCRIPTORS */ ) || (!EnablePrimitiveClasses)) {
if ( (_major_version < 51 /* CONSTANT_CLASS_DESCRIPTORS */ ) || (!EnablePrimitiveClasses && !is_jdk_internal_class_sig(signature))) {
classfile_parse_error("Class name contains illegal Q-signature "
"in descriptor in class file %s, requires option -XX:+EnablePrimitiveClasses",
CHECK_0);
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/classfile/classFileParser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,10 @@ class ClassFileParser {
bool _has_vanilla_constructor;
int _max_bootstrap_specifier_index; // detects BSS values

bool is_jdk_internal_class(const Symbol* class_name) const;

bool is_jdk_internal_class_sig(const char* sig) const;

void parse_stream(const ClassFileStream* const stream, TRAPS);

void mangle_hidden_class_name(InstanceKlass* const ik);
Expand Down
1 change: 1 addition & 0 deletions src/hotspot/share/classfile/vmClassMacros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@
do_klass(Boolean_klass, java_lang_Boolean ) \
do_klass(Character_klass, java_lang_Character ) \
do_klass(Float_klass, java_lang_Float ) \
do_klass(Float16_klass, java_lang_Float16 ) \
do_klass(Double_klass, java_lang_Double ) \
do_klass(Byte_klass, java_lang_Byte ) \
do_klass(Short_klass, java_lang_Short ) \
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/share/classfile/vmIntrinsics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,12 @@ class methodHandle;
do_intrinsic(_dsignum, java_lang_Math, signum_name, double_double_signature, F_S) \
do_intrinsic(_fsignum, java_lang_Math, signum_name, float_float_signature, F_S) \
\
\
/* Float16 intrinsics, similar to what we have in Math. */ \
do_intrinsic(_sum_float16, java_lang_Float16, sum_name, floa16_float16_signature, F_S) \
do_name(sum_name, "sum") \
do_signature(floa16_float16_signature, "(Qjava/lang/Float16;Qjava/lang/Float16;)Qjava/lang/Float16;") \
\
/* StrictMath intrinsics, similar to what we have in Math. */ \
do_intrinsic(_min_strict, java_lang_StrictMath, min_name, int2_int_signature, F_S) \
do_intrinsic(_max_strict, java_lang_StrictMath, max_name, int2_int_signature, F_S) \
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/share/classfile/vmSymbols.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ class SerializeClosure;
template(java_lang_Character_CharacterCache, "java/lang/Character$CharacterCache") \
template(java_lang_CharacterDataLatin1, "java/lang/CharacterDataLatin1") \
template(java_lang_Float, "java/lang/Float") \
template(java_lang_Float16, "java/lang/Float16") \
template(java_lang_Float16_signature, "Qjava/lang/Float16;") \
template(java_lang_Double, "java/lang/Double") \
template(java_lang_Byte, "java/lang/Byte") \
template(java_lang_Byte_ByteCache, "java/lang/Byte$ByteCache") \
Expand Down
3 changes: 3 additions & 0 deletions src/hotspot/share/oops/inlineKlass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,9 @@ void InlineKlass::restore_unshareable_info(ClassLoaderData* loader_data, Handle
if (value_array_klasses() != nullptr) {
value_array_klasses()->restore_unshareable_info(ClassLoaderData::the_null_class_loader_data(), Handle(), CHECK);
}
if (vmSymbols::java_lang_Float16() == name()) {
EnablePrimitiveClasses = true;
}
}

// oop verify
Expand Down
10 changes: 9 additions & 1 deletion src/hotspot/share/opto/addnode.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ class AddLNode : public AddNode {
};

//------------------------------AddFNode---------------------------------------
// Add 2 floats
// Add 2 half-precision floats
class AddFNode : public AddNode {
public:
AddFNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
Expand All @@ -131,6 +131,14 @@ class AddFNode : public AddNode {
virtual uint ideal_reg() const { return Op_RegF; }
};

//------------------------------AddHFNode---------------------------------------
// Add 2 floats
class AddHFNode : public AddFNode {
public:
AddHFNode( Node *in1, Node *in2 ) : AddFNode(in1,in2) {}
virtual int Opcode() const;
};

//------------------------------AddDNode---------------------------------------
// Add 2 doubles
class AddDNode : public AddNode {
Expand Down
4 changes: 3 additions & 1 deletion src/hotspot/share/opto/c2compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,9 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method) {
case vmIntrinsics::_Preconditions_checkLongIndex:
case vmIntrinsics::_getObjectSize:
break;

case vmIntrinsics::_sum_float16:
if (!Matcher::match_rule_supported(Op_AddHF)) return false;
break;
case vmIntrinsics::_VectorCompressExpand:
case vmIntrinsics::_VectorUnaryOp:
case vmIntrinsics::_VectorBinaryOp:
Expand Down
4 changes: 4 additions & 0 deletions src/hotspot/share/opto/classes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ macro(AddF)
macro(AddI)
macro(AddL)
macro(AddP)
macro(AddHF)
macro(Allocate)
macro(AllocateArray)
macro(AndI)
Expand Down Expand Up @@ -373,6 +374,7 @@ macro(XorL)
macro(InlineType)
macro(Vector)
macro(AddVB)
macro(AddVHF)
macro(AddVS)
macro(AddVI)
macro(AddReductionVI)
Expand Down Expand Up @@ -486,6 +488,8 @@ macro(ExtractF)
macro(ExtractD)
macro(Digit)
macro(LowerCase)
macro(ReinterpretS2HF)
macro(ReinterpretHF2S)
macro(UpperCase)
macro(Whitespace)
macro(VectorBox)
Expand Down
Loading

0 comments on commit f03fb4e

Please sign in to comment.