From ddd7a550e4c8c4eea670fe39bdf0a3455764b091 Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Fri, 11 Oct 2024 16:15:40 +0200 Subject: [PATCH 1/4] Remove check on top 16bits Makes it uniform among all 3DNow tests instead of some checking and some don't. --- unittests/ASM/3DNow/0C.asm | 8 ++++---- unittests/ASM/3DNow/0D.asm | 8 ++++---- unittests/ASM/3DNow/1C.asm | 8 ++++---- unittests/ASM/3DNow/1D.asm | 8 ++++---- unittests/ASM/3DNow/86.asm | 6 +++--- unittests/ASM/3DNow/87.asm | 6 +++--- unittests/ASM/3DNow/8A.asm | 4 ++-- unittests/ASM/3DNow/8E.asm | 4 ++-- unittests/ASM/3DNow/90.asm | 8 ++++---- unittests/ASM/3DNow/96.asm | 6 +++--- unittests/ASM/3DNow/97.asm | 6 +++--- unittests/ASM/3DNow/9A.asm | 4 ++-- unittests/ASM/3DNow/9E.asm | 4 ++-- unittests/ASM/3DNow/A0.asm | 8 ++++---- unittests/ASM/3DNow/A4.asm | 8 ++++---- unittests/ASM/3DNow/A6.asm | 4 ++-- unittests/ASM/3DNow/A7.asm | 4 ++-- unittests/ASM/3DNow/AA.asm | 4 ++-- unittests/ASM/3DNow/AE.asm | 4 ++-- unittests/ASM/3DNow/B0.asm | 8 ++++---- unittests/ASM/3DNow/B4.asm | 4 ++-- unittests/ASM/3DNow/B6.asm | 4 ++-- unittests/ASM/3DNow/BB.asm | 4 ++-- 23 files changed, 66 insertions(+), 66 deletions(-) diff --git a/unittests/ASM/3DNow/0C.asm b/unittests/ASM/3DNow/0C.asm index 0caee7af78..8660bb353b 100644 --- a/unittests/ASM/3DNow/0C.asm +++ b/unittests/ASM/3DNow/0C.asm @@ -1,10 +1,10 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x3f800000bf800000", "0x0"], - "MM1": ["0x43000000c3000000", "0x0"], - "MM2": ["0xc700000046fffe00", "0x0"], - "MM3": ["0x0", "0x0"] + "MM0": "0x3f800000bf800000", + "MM1": "0x43000000c3000000", + "MM2": "0xc700000046fffe00", + "MM3": "0x0" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/0D.asm b/unittests/ASM/3DNow/0D.asm index 6bf49715e1..21878b7c9e 100644 --- a/unittests/ASM/3DNow/0D.asm +++ b/unittests/ASM/3DNow/0D.asm @@ -1,10 +1,10 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x3f800000bf800000", "0x0"], - "MM1": ["0x43000000c3000000", "0x0"], - "MM2": ["0xbf8000003f800000", "0x0"], - "MM3": ["0x0", "0x0"] + "MM0": "0x3f800000bf800000", + "MM1": "0x43000000c3000000", + "MM2": "0xbf8000003f800000", + "MM3": "0x0" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/1C.asm b/unittests/ASM/3DNow/1C.asm index c1ff0750e5..3804745282 100644 --- a/unittests/ASM/3DNow/1C.asm +++ b/unittests/ASM/3DNow/1C.asm @@ -1,10 +1,10 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x00000001FFFFFFFF", "0x0"], - "MM1": ["0x00000080FFFFFF80", "0x0"], - "MM2": ["0xFFFF800000007FFF", "0x0"], - "MM3": ["0x0", "0x0"] + "MM0": "0x00000001FFFFFFFF", + "MM1": "0x00000080FFFFFF80", + "MM2": "0xFFFF800000007FFF", + "MM3": "0x0" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/1D.asm b/unittests/ASM/3DNow/1D.asm index 0a1f51c7f5..7249e00ac8 100644 --- a/unittests/ASM/3DNow/1D.asm +++ b/unittests/ASM/3DNow/1D.asm @@ -1,10 +1,10 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x00000001FFFFFFFF", "0x0"], - "MM1": ["0x00000080FFFFFF80", "0x0"], - "MM2": ["0xFFFFFFFF00000001", "0x0"], - "MM3": ["0x0", "0x0"] + "MM0": "0x00000001FFFFFFFF", + "MM1": "0x00000080FFFFFF80", + "MM2": "0xFFFFFFFF00000001", + "MM3": "0x0" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/86.asm b/unittests/ASM/3DNow/86.asm index 815ed5315f..86e593e711 100644 --- a/unittests/ASM/3DNow/86.asm +++ b/unittests/ASM/3DNow/86.asm @@ -1,9 +1,9 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x3f800000bf800000", "0x0"], - "MM1": ["0x3c000000bc000000", "0x0"], - "MM2": ["0xbf8000003f800000", "0x0"] + "MM0": "0x3f800000bf800000", + "MM1": "0x3c000000bc000000", + "MM2": "0xbf8000003f800000" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/87.asm b/unittests/ASM/3DNow/87.asm index f98fcac396..220c3fc44b 100644 --- a/unittests/ASM/3DNow/87.asm +++ b/unittests/ASM/3DNow/87.asm @@ -1,9 +1,9 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x3e8000003f800000", "0x0"], - "MM1": ["0x3e4ccccd3f000000", "0x0"], - "MM2": ["0x3f8000003eaaaaab", "0x0"] + "MM0": "0x3e8000003f800000", + "MM1": "0x3e4ccccd3f000000", + "MM2": "0x3f8000003eaaaaab" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/8A.asm b/unittests/ASM/3DNow/8A.asm index af9caf9755..4aca75b670 100644 --- a/unittests/ASM/3DNow/8A.asm +++ b/unittests/ASM/3DNow/8A.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x44000000c0000000", "0x0"], - "MM1": ["0x44800000c3800000", "0x0"] + "MM0": "0x44000000c0000000", + "MM1": "0x44800000c3800000" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/8E.asm b/unittests/ASM/3DNow/8E.asm index 2c14b91b97..5cfed20c4c 100644 --- a/unittests/ASM/3DNow/8E.asm +++ b/unittests/ASM/3DNow/8E.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x00000000c0000000", "0x0"], - "MM1": ["0x00000000c3800000", "0x0"] + "MM0": "0x00000000c0000000", + "MM1": "0x00000000c3800000" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/90.asm b/unittests/ASM/3DNow/90.asm index f75bb7f668..68fb00c0d1 100644 --- a/unittests/ASM/3DNow/90.asm +++ b/unittests/ASM/3DNow/90.asm @@ -1,10 +1,10 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x0", "0x0"], - "MM1": ["0xFFFFFFFF00000000", "0x0"], - "MM2": ["0xFFFFFFFFFFFFFFFF", "0x0"], - "MM3": ["0x00000000FFFFFFFF", "0x0"] + "MM0": "0x0", + "MM1": "0xFFFFFFFF00000000", + "MM2": "0xFFFFFFFFFFFFFFFF", + "MM3": "0x00000000FFFFFFFF" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/96.asm b/unittests/ASM/3DNow/96.asm index 0cb31ecb85..a54b45bdb5 100644 --- a/unittests/ASM/3DNow/96.asm +++ b/unittests/ASM/3DNow/96.asm @@ -1,9 +1,9 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0xbf800000bf800000", "0x0"], - "MM1": ["0xbc000000bc000000", "0x0"], - "MM2": ["0x3f8000003f800000", "0x0"] + "MM0": "0xbf800000bf800000", + "MM1": "0xbc000000bc000000", + "MM2": "0x3f8000003f800000" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/97.asm b/unittests/ASM/3DNow/97.asm index 6c00647dc7..d1bcf1dced 100644 --- a/unittests/ASM/3DNow/97.asm +++ b/unittests/ASM/3DNow/97.asm @@ -1,9 +1,9 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x3f8000003f800000", "0x0"], - "MM1": ["0x3f0000003f000000", "0x0"], - "MM2": ["0x3eaaaaab3eaaaaab", "0x0"] + "MM0": "0x3f8000003f800000", + "MM1": "0x3f0000003f000000", + "MM2": "0x3eaaaaab3eaaaaab" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/9A.asm b/unittests/ASM/3DNow/9A.asm index 2a2b6b9eee..c2cc84c27e 100644 --- a/unittests/ASM/3DNow/9A.asm +++ b/unittests/ASM/3DNow/9A.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x43808000c3808000", "0x0"], - "MM1": ["0x44200000c4200000", "0x0"] + "MM0": "0x43808000c3808000", + "MM1": "0x44200000c4200000" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/9E.asm b/unittests/ASM/3DNow/9E.asm index 718c26d5a2..f5e4fa8243 100644 --- a/unittests/ASM/3DNow/9E.asm +++ b/unittests/ASM/3DNow/9E.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0xc37f0000437f0000", "0x0"], - "MM1": ["0xc3c0000043c00000", "0x0"] + "MM0": "0xc37f0000437f0000", + "MM1": "0xc3c0000043c00000" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/A0.asm b/unittests/ASM/3DNow/A0.asm index 427594bb07..9f295db584 100644 --- a/unittests/ASM/3DNow/A0.asm +++ b/unittests/ASM/3DNow/A0.asm @@ -1,10 +1,10 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x0", "0x0"], - "MM1": ["0xFFFFFFFF00000000", "0x0"], - "MM2": ["0xFFFFFFFFFFFFFFFF", "0x0"], - "MM3": ["0x00000000FFFFFFFF", "0x0"] + "MM0": "0x0", + "MM1": "0xFFFFFFFF00000000", + "MM2": "0xFFFFFFFFFFFFFFFF", + "MM3": "0x00000000FFFFFFFF" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/A4.asm b/unittests/ASM/3DNow/A4.asm index fbc8dd11b7..bc21a51ffe 100644 --- a/unittests/ASM/3DNow/A4.asm +++ b/unittests/ASM/3DNow/A4.asm @@ -1,10 +1,10 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x3f8000003f800000", "0x0"], - "MM1": ["0x3f8000003f800000", "0x0"], - "MM2": ["0x00000000bf800000", "0x0"], - "MM3": ["0x3f8000003f800000", "0x0"] + "MM0": "0x3f8000003f800000", + "MM1": "0x3f8000003f800000", + "MM2": "0x00000000bf800000", + "MM3": "0x3f8000003f800000" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/A6.asm b/unittests/ASM/3DNow/A6.asm index 6ea1b786ce..695fb57edb 100644 --- a/unittests/ASM/3DNow/A6.asm +++ b/unittests/ASM/3DNow/A6.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x9192939481828384", "0x0"], - "MM1": ["0xB1B2B3B4A1A2A3A4", "0x0"] + "MM0": "0x9192939481828384", + "MM1": "0xB1B2B3B4A1A2A3A4" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/A7.asm b/unittests/ASM/3DNow/A7.asm index 80e4034362..e2a8aa134e 100644 --- a/unittests/ASM/3DNow/A7.asm +++ b/unittests/ASM/3DNow/A7.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x9192939481828384", "0x0"], - "MM1": ["0xB1B2B3B4A1A2A3A4", "0x0"] + "MM0": "0x9192939481828384", + "MM1": "0xB1B2B3B4A1A2A3A4" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/AA.asm b/unittests/ASM/3DNow/AA.asm index 033b1a9afb..ffb0dacd2b 100644 --- a/unittests/ASM/3DNow/AA.asm +++ b/unittests/ASM/3DNow/AA.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0xc380800043808000", "0x0"], - "MM1": ["0xc420000044200000", "0x0"] + "MM0": "0xc380800043808000", + "MM1": "0xc420000044200000" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/AE.asm b/unittests/ASM/3DNow/AE.asm index 907f1cfb27..46dd13733f 100644 --- a/unittests/ASM/3DNow/AE.asm +++ b/unittests/ASM/3DNow/AE.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x0", "0x0"], - "MM1": ["0x0", "0x0"] + "MM0": "0x0", + "MM1": "0x0" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/B0.asm b/unittests/ASM/3DNow/B0.asm index caab89237f..40ea74b3f0 100644 --- a/unittests/ASM/3DNow/B0.asm +++ b/unittests/ASM/3DNow/B0.asm @@ -1,10 +1,10 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x0", "0x0"], - "MM1": ["0xFFFFFFFF00000000", "0x0"], - "MM2": ["0xFFFFFFFFFFFFFFFF", "0x0"], - "MM3": ["0x00000000FFFFFFFF", "0x0"] + "MM0": "0x0", + "MM1": "0xFFFFFFFF00000000", + "MM2": "0xFFFFFFFFFFFFFFFF", + "MM3": "0x00000000FFFFFFFF" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/B4.asm b/unittests/ASM/3DNow/B4.asm index f74fb1b389..f1517b5e58 100644 --- a/unittests/ASM/3DNow/B4.asm +++ b/unittests/ASM/3DNow/B4.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0xc3800000c3800000", "0x0"], - "MM1": ["0xc7800000c7800000", "0x0"] + "MM0": "0xc3800000c3800000", + "MM1": "0xc7800000c7800000" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/B6.asm b/unittests/ASM/3DNow/B6.asm index 9dcaec3669..1d11c77897 100644 --- a/unittests/ASM/3DNow/B6.asm +++ b/unittests/ASM/3DNow/B6.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x9192939481828384", "0x0"], - "MM1": ["0xB1B2B3B4A1A2A3A4", "0x0"] + "MM0": "0x9192939481828384", + "MM1": "0xB1B2B3B4A1A2A3A4" }, "HostFeatures": ["3DNOW"] } diff --git a/unittests/ASM/3DNow/BB.asm b/unittests/ASM/3DNow/BB.asm index 94b0f36e99..2911090e42 100644 --- a/unittests/ASM/3DNow/BB.asm +++ b/unittests/ASM/3DNow/BB.asm @@ -1,8 +1,8 @@ %ifdef CONFIG { "RegData": { - "MM0": ["0x8182838491929394", "0x0"], - "MM1": ["0xA1A2A3A4B1B2B3B4", "0x0"] + "MM0": "0x8182838491929394", + "MM1": "0xA1A2A3A4B1B2B3B4" }, "HostFeatures": ["3DNOW"] } From ad665d89c31e57492b9ae1b1a75d78054865743f Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Fri, 11 Oct 2024 16:15:06 +0200 Subject: [PATCH 2/4] Implements explicit state switch between X87 and MMX Fixes #3850 --- FEXCore/Scripts/json_ir_generator.py | 5 +++- .../Interface/Core/OpcodeDispatcher.cpp | 7 +++--- .../Source/Interface/Core/OpcodeDispatcher.h | 25 +++++++++++++------ .../Core/OpcodeDispatcher/Vector.cpp | 9 +++++++ FEXCore/Source/Interface/IR/IREmitter.h | 5 ++++ 5 files changed, 40 insertions(+), 11 deletions(-) diff --git a/FEXCore/Scripts/json_ir_generator.py b/FEXCore/Scripts/json_ir_generator.py index 64a8f68095..6c93b974bc 100755 --- a/FEXCore/Scripts/json_ir_generator.py +++ b/FEXCore/Scripts/json_ir_generator.py @@ -699,8 +699,12 @@ def print_ir_allocator_helpers(): # We gather the "has x87?" flag as we go. This saves the user from # having to keep track of whether they emitted any x87. + # Also changes the mmx state to X87. if op.LoweredX87: output_file.write("\t\tRecordX87Use();\n") + output_file.write( + "\t\tif(MMXState == MMXState_MMX) ChgStateMMX_X87();\n" + ) output_file.write("\t\tauto _Op = AllocateOp();\n".format(op.Name, op.Name.upper())) @@ -826,4 +830,3 @@ def print_ir_dispatcher_dispatch(): print_ir_dispatcher_dispatch() output_dispatch_file.close() - diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 076231bf00..a1d8eb8f2e 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -4406,10 +4406,11 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl if (gpr >= FEXCore::X86State::REG_MM_0) { LOGMAN_THROW_A_FMT(OpSize == 8, "full"); LOGMAN_THROW_A_FMT(Class == FPRClass, "MMX is floaty"); + LOGMAN_THROW_A_FMT(MMXState == MMXState_MMX, "Not in MMX State"); - // Partial store into bottom 64-bits, leave the upper bits unaffected. - // XXX: We actually should set the upper bits to all-1s? - StoreContextPartial(MM0Index + gpr - FEXCore::X86State::REG_MM_0, Src); + uint8_t Index = MM0Index + gpr - FEXCore::X86State::REG_MM_0; + StoreContext(Index, Src); + RegCache.Partial |= (1ull << (uint64_t)Index); } else if (gpr >= FEXCore::X86State::REG_XMM_0) { const auto gprIndex = gpr - X86State::REG_XMM_0; const auto VectorSize = GetGuestVectorLength(); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index b321f4f3d4..4c66d3ebac 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1243,6 +1243,10 @@ class OpDispatchBuilder final : public IREmitter { Bits &= ~NextBit; } else { _StoreContext(Size, Class, Value, Offset); + // If Partial and MMX register, then we need to store all 1s in bits 64-80 + if (Partial && Index >= MM0Index && Index <= MM7Index) { + _StoreContext(2, IR::GPRClass, _Constant(0xFFFF), Offset + 8); + } } } @@ -1964,12 +1968,6 @@ class OpDispatchBuilder final : public IREmitter { RegCache.Written |= Bit; } - void StoreContextPartial(uint8_t Index, Ref Value) { - StoreContext(Index, Value); - - RegCache.Partial |= (1ull << (uint64_t)Index); - } - void StoreRegister(uint8_t Reg, bool FPR, Ref Value) { StoreContext(Reg + (FPR ? FPR0Index : GPR0Index), Value); } @@ -2333,7 +2331,6 @@ class OpDispatchBuilder final : public IREmitter { } } - /** @} */ /** @} */ Ref GetX87Top(); @@ -2342,6 +2339,20 @@ class OpDispatchBuilder final : public IREmitter { Ref GetX87FTW_Helper(); void SetX87Top(Ref Value); + void ChgStateX87_MMX() override { + LOGMAN_THROW_A_FMT(MMXState == MMXState_X87, "Expected state to be x87"); + _StackForceSlow(); + SetX87Top(_Constant(0)); // top reset to zero + StoreContext(AbridgedFTWIndex, _Constant(0xFFFFUL)); // all valid + MMXState = MMXState_MMX; + } + + void ChgStateMMX_X87() override { + LOGMAN_THROW_A_FMT(MMXState == MMXState_MMX, "Expected state to be MMX"); + FlushRegisterCache(); + MMXState = MMXState_X87; + } + bool DestIsLockedMem(FEXCore::X86Tables::DecodedOp Op) const { return DestIsMem(Op) && (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK) != 0; } diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 4e1eff5088..50d8aaf310 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -698,6 +698,10 @@ void OpDispatchBuilder::MOVQOp(OpcodeArgs, VectorOpType VectorType) { } void OpDispatchBuilder::MOVQMMXOp(OpcodeArgs) { + // Partial store into bottom 64-bits, leave the upper bits unaffected. + if (MMXState == MMXState_X87) { + ChgStateX87_MMX(); + } Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); StoreResult(FPRClass, Op, Src, 1); } @@ -2298,6 +2302,11 @@ void OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) { template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) { + // This function causes a change in MMX state from X87 to MMX + if (MMXState == MMXState_X87) { + ChgStateX87_MMX(); + } + // If loading a vector, use the full size, so we don't // unnecessarily zero extend the vector. Otherwise, if // memory, then we want to load the element size exactly. diff --git a/FEXCore/Source/Interface/IR/IREmitter.h b/FEXCore/Source/Interface/IR/IREmitter.h index 98d6790b40..5b0720e613 100644 --- a/FEXCore/Source/Interface/IR/IREmitter.h +++ b/FEXCore/Source/Interface/IR/IREmitter.h @@ -343,8 +343,13 @@ class IREmitter { return Ptr; } + // MMX State can be either MMX (for 64bit) or x87 FPU (for 80bit) + enum { MMXState_MMX, MMXState_X87 } MMXState = MMXState_MMX; + // Overriden by dispatcher, stubbed for IR tests virtual void RecordX87Use() {} + virtual void ChgStateX87_MMX() {} + virtual void ChgStateMMX_X87() {} virtual void SaveNZCV(IROps Op) {} Ref CurrentWriteCursor = nullptr; From 66ab7a8433c36957b39410ab0731dbf2038eeea2 Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Fri, 11 Oct 2024 16:16:41 +0200 Subject: [PATCH 3/4] ASM Test: Implements explicit state switch between X87 and MMX Tags is set to all valid in FEX, but in host it's set to all valid _and_ reinterpreted. Adding this to known failures in the host runner. --- unittests/ASM/Known_Failures_host | 2 + unittests/ASM/X87/X87MMXInteraction.asm | 53 +++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 unittests/ASM/X87/X87MMXInteraction.asm diff --git a/unittests/ASM/Known_Failures_host b/unittests/ASM/Known_Failures_host index 322f758bca..2158fda4e5 100644 --- a/unittests/ASM/Known_Failures_host +++ b/unittests/ASM/Known_Failures_host @@ -1 +1,3 @@ Test_X87/FXAM_Simple.asm +## Tag bits not completely modelled +Test_X87/X87MMXInteraction.asm \ No newline at end of file diff --git a/unittests/ASM/X87/X87MMXInteraction.asm b/unittests/ASM/X87/X87MMXInteraction.asm new file mode 100644 index 0000000000..3246ed3a6e --- /dev/null +++ b/unittests/ASM/X87/X87MMXInteraction.asm @@ -0,0 +1,53 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "0x0", + "RBX": "0x0", + "RCX": "0x8000000000000000", + "RDX": "0x3FFF", + "R8": "0xc90fdaa22168c235", + "R9": "0x4000", + "R10": "0xc90fdaa22168c235", + "R11": "0xFFFF" + } +} +%endif + +section .bss + x87env: resb 108 + +section .text +global _start +; Checks that after moving from X87 to MMX States, the +; values are correct and that MMX register writes, puts the top 16 bits as +; all 1s. +_start: +finit ; enters x87 state + +fldpi ; goes in mm7 +fld1 ; goes in mm6 + +movq mm5, mm7 ; enters mmx state, so 1 is now in st6 and pi in st7, while st5 has a broken pi. +o32 fnsave [rel x87env] + +; Top into eax +mov eax, dword [rel x87env + 4] +and eax, 0x3800 +shr eax, 11 ; top in eax + +; Tag into ebx +mov bx, word [rel x87env + 8] + +; st6 is 1 +mov rcx, qword [rel x87env + 88] +mov dx, word [rel x87env + 96] + +; st7 is pi +mov r8, qword [rel x87env + 98] +mov r9w, word [rel x87env + 106] + +; st5 is broken pi +mov r10, qword [rel x87env + 78] +mov r11w, word [rel x87env + 86] + +hlt From 2b486a4eeec85ab4ccef1d5af1f44fcc3861b638 Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Fri, 11 Oct 2024 16:33:41 +0200 Subject: [PATCH 4/4] instcountci: Implements explicit state switch between X87 and MMX --- unittests/InstructionCountCI/DDD.json | 150 +++-- .../FlagM/SecondaryGroup.json | 98 ++-- unittests/InstructionCountCI/H0F38.json | 90 ++- unittests/InstructionCountCI/H0F3A.json | 18 +- unittests/InstructionCountCI/RPRES/DDD.json | 24 +- unittests/InstructionCountCI/Secondary.json | 538 ++++++++++++------ .../InstructionCountCI/SecondaryGroup.json | 98 ++-- .../InstructionCountCI/Secondary_OpSize.json | 12 +- .../InstructionCountCI/Secondary_REPNE.json | 6 +- .../InstructionCountCI/Secondary_SVE128.json | 48 +- 10 files changed, 722 insertions(+), 360 deletions(-) diff --git a/unittests/InstructionCountCI/DDD.json b/unittests/InstructionCountCI/DDD.json index e3fcac148d..254aad64bc 100644 --- a/unittests/InstructionCountCI/DDD.json +++ b/unittests/InstructionCountCI/DDD.json @@ -16,7 +16,7 @@ ], "Instructions": { "pi2fw mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "0x0f 0x0f 0x0c" ], @@ -25,22 +25,26 @@ "uzp1 v2.4h, v2.4h, v2.4h", "sxtl v2.4s, v2.4h", "scvtf v2.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pi2fd mm0, mm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "0x0f 0x0f 0x0d" ], "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "scvtf v2.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pf2iw mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "0x0f 0x0f 0x1c" ], @@ -49,22 +53,26 @@ "fcvtzs v2.2s, v2.2s", "uzp1 v2.4h, v2.4h, v2.4h", "sxtl v2.4s, v2.4h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pf2id mm0, mm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "0x0f 0x0f 0x1d" ], "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "fcvtzs v2.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrcpv mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "0x0f 0x0f 0x86" ], @@ -72,11 +80,13 @@ "ldr d2, [x28, #1056]", "fmov v0.4s, #0x70 (1.0000)", "fdiv v2.4s, v0.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrsqrtv mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "0x0f 0x0f 0x87" ], @@ -85,11 +95,13 @@ "fmov v0.4s, #0x70 (1.0000)", "fsqrt v1.4s, v2.4s", "fdiv v2.4s, v0.4s, v1.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfnacc mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0x0f 0x8a", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", @@ -97,11 +109,13 @@ "uzp1 v4.2s, v2.2s, v3.2s", "uzp2 v2.2s, v2.2s, v3.2s", "fsub v2.4s, v4.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfpnacc mm0, mm1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0x0f 0x8e", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", @@ -110,32 +124,38 @@ "fsub s2, s2, s4", "faddp v3.4s, v3.4s, v3.4s", "mov v2.s[1], v3.s[0]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfcmpge mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x0f 0x90", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "fcmge v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfmin mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x0f 0x94", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "fcmgt v0.4s, v3.4s, v2.4s", "bif v2.16b, v3.16b, v0.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrcp mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "0x0f 0x0f 0x96" ], @@ -144,11 +164,13 @@ "fmov s0, #0x70 (1.0000)", "fdiv s2, s0, s2", "dup v2.2s, v2.s[0]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrsqrt mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "0x0f 0x0f 0x97" ], @@ -158,56 +180,68 @@ "fsqrt s1, s2", "fdiv s2, s0, s1", "dup v2.2s, v2.s[0]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfsub mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x0f 0x9a", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "fsub v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfadd mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x0f 0x9e", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "fadd v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfcmpgt mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x0f 0xa0", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "fcmgt v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfmax mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x0f 0xa4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "fcmgt v0.4s, v3.4s, v2.4s", "bit v2.16b, v3.16b, v0.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrcpit1 mm0, mm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x0f 0xa6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrcpit1 mm0, mm0": { @@ -216,11 +250,13 @@ "ExpectedArm64ASM": [] }, "pfrsqit1 mm0, mm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x0f 0xa7", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrsqit1 mm0, mm0": { @@ -229,41 +265,49 @@ "ExpectedArm64ASM": [] }, "pfsubr mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x0f 0xaa", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "fsub v2.4s, v2.4s, v3.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfcmpeq mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x0f 0xb0", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "fcmeq v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfmul mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x0f 0xb4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "fmul v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrcpit2 mm0, mm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x0f 0xb6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrcpit2 mm0, mm0": { @@ -272,7 +316,7 @@ "ExpectedArm64ASM": [] }, "db 0x0f, 0x0f, 0xc1, 0xb7": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": [ "nasm doesn't support emitting this instruction", "pmulhrw mm0, mm1", @@ -285,26 +329,32 @@ "movi v3.4s, #0x80, lsl #8", "add v2.4s, v2.4s, v3.4s", "shrn v2.4h, v2.4s, #16", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pswapd mm0, mm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x0f 0xbb", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "rev64 v2.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pavgusb mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x0f 0xbf", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "urhadd v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] } } diff --git a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json index ca1416853d..8a31b5944c 100644 --- a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json @@ -811,23 +811,27 @@ "ExpectedArm64ASM": [] }, "psrlw mm0, 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ushr v2.8h, v2.8h, #15", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrlw mm0, 16": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrlw xmm0, 0": { @@ -859,23 +863,27 @@ "ExpectedArm64ASM": [] }, "psraw mm0, 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "sshr v2.8h, v2.8h, #15", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psraw mm0, 16": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "sshr v2.8h, v2.8h, #15", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psraw xmm0, 0": { @@ -907,23 +915,27 @@ "ExpectedArm64ASM": [] }, "psllw mm0, 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "shl v2.8h, v2.8h, #15", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllw mm0, 16": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllw xmm0, 0": { @@ -955,23 +967,27 @@ "ExpectedArm64ASM": [] }, "psrld mm0, 31": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ushr v2.4s, v2.4s, #31", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrld mm0, 32": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrld xmm0, 0": { @@ -1003,23 +1019,27 @@ "ExpectedArm64ASM": [] }, "psrad mm0, 31": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "sshr v2.4s, v2.4s, #31", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrad mm0, 32": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "sshr v2.4s, v2.4s, #31", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrad xmm0, 0": { @@ -1051,23 +1071,27 @@ "ExpectedArm64ASM": [] }, "pslld mm0, 31": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "shl v2.4s, v2.4s, #31", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pslld mm0, 32": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pslld xmm0, 0": { @@ -1099,23 +1123,27 @@ "ExpectedArm64ASM": [] }, "psrlq mm0, 63": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ushr v2.2d, v2.2d, #63", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrlq mm0, 64": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrlq xmm0, 0": { @@ -1170,23 +1198,27 @@ "ExpectedArm64ASM": [] }, "psllq mm0, 63": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "shl v2.2d, v2.2d, #63", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllq mm0, 64": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllq xmm0, 0": { @@ -1514,12 +1546,12 @@ "mov w20, #0x37f", "strh w20, [x28, #1296]", "strb wzr, [x28, #1019]", + "strb wzr, [x28, #1298]", "strb wzr, [x28, #1016]", "strb wzr, [x28, #1017]", "strb wzr, [x28, #1018]", "strb wzr, [x28, #1022]", "movi v2.2d, #0x0", - "strb wzr, [x28, #1298]", "str q2, [x28, #1152]", "str q2, [x28, #1136]", "str q2, [x28, #1120]", diff --git a/unittests/InstructionCountCI/H0F38.json b/unittests/InstructionCountCI/H0F38.json index 8690369d85..d6bdcb649b 100644 --- a/unittests/InstructionCountCI/H0F38.json +++ b/unittests/InstructionCountCI/H0F38.json @@ -13,7 +13,7 @@ }, "Instructions": { "pshufb mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "NP 0x0f 0x38 0x00" ], @@ -23,7 +23,9 @@ "movi v4.16b, #0x87", "and v3.16b, v3.16b, v4.16b", "tbl v2.8b, {v2.16b}, v3.8b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pshufb xmm0, xmm1": { @@ -38,7 +40,7 @@ ] }, "phaddw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "NP 0x0f 0x38 0x01" ], @@ -46,7 +48,9 @@ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "addp v2.4h, v3.4h, v2.4h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "phaddw xmm0, xmm1": { @@ -59,7 +63,7 @@ ] }, "phaddd mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "NP 0x0f 0x38 0x02" ], @@ -67,7 +71,9 @@ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "addp v2.2s, v3.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "phaddd xmm0, xmm1": { @@ -80,7 +86,7 @@ ] }, "phaddsw mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "NP 0x0f 0x38 0x03" ], @@ -90,7 +96,9 @@ "uzp1 v4.4h, v2.4h, v3.4h", "uzp2 v2.4h, v2.4h, v3.4h", "sqadd v2.8h, v4.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "phaddsw xmm0, xmm1": { @@ -105,7 +113,7 @@ ] }, "pmaddubsw mm0, mm1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": [ "NP 0x0f 0x38 0x04" ], @@ -118,7 +126,9 @@ "smull2 v2.4s, v2.8h, v3.8h", "addp v2.4s, v4.4s, v2.4s", "sqxtn v2.4h, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pmaddubsw xmm0, xmm1": { @@ -143,7 +153,7 @@ ] }, "phsubw mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "NP 0x0f 0x38 0x05" ], @@ -153,7 +163,9 @@ "uzp1 v4.4h, v2.4h, v3.4h", "uzp2 v2.4h, v2.4h, v3.4h", "sub v2.8h, v4.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "phsubw xmm0, xmm1": { @@ -168,7 +180,7 @@ ] }, "phsubd mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "NP 0x0f 0x38 0x06" ], @@ -178,7 +190,9 @@ "uzp1 v4.2s, v2.2s, v3.2s", "uzp2 v2.2s, v2.2s, v3.2s", "sub v2.4s, v4.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "phsubd xmm0, xmm1": { @@ -193,7 +207,7 @@ ] }, "phsubsw mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "NP 0x0f 0x38 0x07" ], @@ -203,7 +217,9 @@ "uzp1 v4.4h, v2.4h, v3.4h", "uzp2 v2.4h, v2.4h, v3.4h", "sqsub v2.8h, v4.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "phsubsw xmm0, xmm1": { @@ -218,7 +234,7 @@ ] }, "psignb mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "NP 0x0f 0x38 0x08" ], @@ -228,7 +244,9 @@ "sqshl v2.8b, v2.8b, #7", "srshr v2.8b, v2.8b, #7", "mul v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psignb xmm0, xmm1": { @@ -243,7 +261,7 @@ ] }, "psignw mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "NP 0x0f 0x38 0x09" ], @@ -253,7 +271,9 @@ "sqshl v2.4h, v2.4h, #15", "srshr v2.4h, v2.4h, #15", "mul v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psignw xmm0, xmm1": { @@ -268,7 +288,7 @@ ] }, "psignd mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "NP 0x0f 0x38 0x0a" ], @@ -278,7 +298,9 @@ "sqshl v2.2s, v2.2s, #31", "srshr v2.2s, v2.2s, #31", "mul v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psignd xmm0, xmm1": { @@ -293,7 +315,7 @@ ] }, "pmulhrsw mm0, mm1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": [ "Might be able to use sqdmulh", "NP 0x0f 0x38 0x0b" @@ -306,7 +328,9 @@ "movi v3.4s, #0x1, lsl #0", "add v2.4s, v2.4s, v3.4s", "shrn v2.4h, v2.4s, #1", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pmulhrsw xmm0, xmm1": { @@ -412,14 +436,16 @@ ] }, "pabsb mm0, mm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "NP 0x0f 0x38 0x1c" ], "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "abs v2.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pabsb xmm0, xmm1": { @@ -432,14 +458,16 @@ ] }, "pabsw mm0, mm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "NP 0x0f 0x38 0x1d" ], "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "abs v2.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pabsw xmm0, xmm1": { @@ -452,14 +480,16 @@ ] }, "pabsd mm0, mm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "NP 0x0f 0x38 0x1e" ], "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "abs v2.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pabsd xmm0, xmm1": { diff --git a/unittests/InstructionCountCI/H0F3A.json b/unittests/InstructionCountCI/H0F3A.json index c938b4d59e..f34c72d31e 100644 --- a/unittests/InstructionCountCI/H0F3A.json +++ b/unittests/InstructionCountCI/H0F3A.json @@ -15,17 +15,19 @@ ], "Instructions": { "palignr mm0, mm1, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "NP 0x0f 0x3a 0x0f" ], "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "palignr mm0, mm1, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "NP 0x0f 0x3a 0x0f" ], @@ -33,17 +35,21 @@ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "ext v2.8b, v2.8b, v3.8b, #1", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "palignr mm0, mm1, 255": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "NP 0x0f 0x3a 0x0f" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "roundps xmm0, xmm1, 00000000b": { diff --git a/unittests/InstructionCountCI/RPRES/DDD.json b/unittests/InstructionCountCI/RPRES/DDD.json index 3f9c91ddd2..6166d2aa9f 100644 --- a/unittests/InstructionCountCI/RPRES/DDD.json +++ b/unittests/InstructionCountCI/RPRES/DDD.json @@ -12,29 +12,33 @@ }, "Instructions": { "pfrcpv mm0, mm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "0x0f 0x0f 0x86" ], "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "frecpe v2.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrsqrtv mm0, mm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "0x0f 0x0f 0x87" ], "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "frsqrte v2.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrcp mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "0x0f 0x0f 0x96" ], @@ -42,11 +46,13 @@ "ldr d2, [x28, #1056]", "frecpe s2, s2", "dup v2.2s, v2.s[0]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pfrsqrt mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "0x0f 0x0f 0x97" ], @@ -54,7 +60,9 @@ "ldr d2, [x28, #1056]", "frsqrte s2, s2", "dup v2.2s, v2.s[0]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] } } diff --git a/unittests/InstructionCountCI/Secondary.json b/unittests/InstructionCountCI/Secondary.json index 92cbb0127f..ee8006df55 100644 --- a/unittests/InstructionCountCI/Secondary.json +++ b/unittests/InstructionCountCI/Secondary.json @@ -168,39 +168,49 @@ ] }, "cvttps2pi mm0, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x2c", "ExpectedArm64ASM": [ + "strb wzr, [x28, #1019]", + "mov w20, #0xffff", "ldr d2, [x4]", "fcvtzs v2.2s, v2.2s", - "str d2, [x28, #1040]" + "strb w20, [x28, #1298]", + "str d2, [x28, #1040]", + "strh w20, [x28, #1048]" ] }, "cvttps2pi mm0, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x2c", "ExpectedArm64ASM": [ "fcvtzs v2.2s, v16.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "cvtps2pi mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x2d", "ExpectedArm64ASM": [ "ldr d2, [x4]", "frinti v2.2s, v2.2s", "fcvtzs v2.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "cvtps2pi mm0, xmm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x2d", "ExpectedArm64ASM": [ "frinti v2.2s, v16.2s", "fcvtzs v2.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "ucomiss xmm0, xmm1": { @@ -775,324 +785,390 @@ ] }, "punpcklbw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x60", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "zip1 v2.8b, v2.8b, v3.8b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpcklbw mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x60", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x4]", "zip1 v2.8b, v2.8b, v3.8b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpcklwd mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x61", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "zip1 v2.4h, v2.4h, v3.4h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpcklwd mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x61", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x4]", "zip1 v2.4h, v2.4h, v3.4h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpckldq mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x62", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "zip1 v2.2s, v2.2s, v3.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpckldq mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x62", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x4]", "zip1 v2.2s, v2.2s, v3.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "packsswb mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x63", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "zip1 v2.2d, v2.2d, v3.2d", "sqxtn v2.8b, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "packsswb mm0, [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x63", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x4]", "zip1 v2.2d, v2.2d, v3.2d", "sqxtn v2.8b, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "packsswb mm0, mm0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x63", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "zip1 v2.2d, v2.2d, v2.2d", "sqxtn v2.8b, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pcmpgtb mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x64", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "cmgt v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pcmpgtw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x65", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "cmgt v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pcmpgtd mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x66", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "cmgt v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpckhbw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x68", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "zip2 v2.8b, v2.8b, v3.8b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpckhbw mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x68", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x4]", "zip2 v2.8b, v2.8b, v3.8b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpckhwd mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x69", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "zip2 v2.4h, v2.4h, v3.4h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpckhwd mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x69", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x4]", "zip2 v2.4h, v2.4h, v3.4h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpckhdq mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x6a", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "zip2 v2.2s, v2.2s, v3.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "punpckhdq mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x6a", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x4]", "zip2 v2.2s, v2.2s, v3.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "packssdw mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x6b", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "zip1 v2.2d, v2.2d, v3.2d", "sqxtn v2.4h, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "movd mm0, eax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x6e", "ExpectedArm64ASM": [ "fmov s2, w4", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "movd mm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x6e", "ExpectedArm64ASM": [ "ldr s2, [x4]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "movq mm0, mm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x6f", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "movq mm0, mm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x6f", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "movq mm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x6f", "ExpectedArm64ASM": [ "ldr d2, [x4]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pshufw mm0, mm1, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "dup v2.4h, v2.h[0]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pshufw mm0, [rax], 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x4]", "dup v2.4h, v2.h[0]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pshufw mm0, mm1, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr x0, [x28, #1976]", "ldr d3, [x0, #16]", "tbl v2.8b, {v2.16b}, v3.8b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pshufw mm0, [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x4]", "ldr x0, [x28, #1976]", "ldr d3, [x0, #16]", "tbl v2.8b, {v2.16b}, v3.8b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pshufw mm0, mm1, 0xff": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "dup v2.4h, v2.h[3]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pshufw mm0, [rax], 0xff": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x4]", "dup v2.4h, v2.h[3]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pcmpeqb mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x74", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "cmeq v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pcmpeqw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x75", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "cmeq v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pcmpeqd mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x76", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "cmeq v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "emms": { @@ -1119,15 +1195,19 @@ ] }, "db 0x0f, 0x7f, 0xc1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "movq mm0, mm1", "Manual encoded since nasm would encode 0x6f version", "0x0f 0x7f" ], "ExpectedArm64ASM": [ + "strb wzr, [x28, #1019]", + "mov w20, #0xffff", "ldr d2, [x28, #1040]", - "str d2, [x28, #1056]" + "strb w20, [x28, #1298]", + "str d2, [x28, #1056]", + "strh w20, [x28, #1064]" ] }, "movq [rax], mm0": { @@ -2707,93 +2787,113 @@ ] }, "pinsrw mm0, eax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "mov v2.h[0], w4", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pinsrw mm0, eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "mov v2.h[1], w4", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pinsrw mm0, eax, 2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "mov v2.h[2], w4", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pinsrw mm0, eax, 3": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "mov v2.h[3], w4", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pinsrw mm0, eax, 4": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "mov v2.h[0], w4", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pinsrw mm0, [rax], 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ld1 {v2.h}[0], [x4]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pinsrw mm0, [rax], 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ld1 {v2.h}[1], [x4]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pinsrw mm0, [rax], 2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ld1 {v2.h}[2], [x4]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pinsrw mm0, [rax], 3": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ld1 {v2.h}[3], [x4]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pinsrw mm0, [rax], 4": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ld1 {v2.h}[0], [x4]", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pextrw eax, mm0, 0": { @@ -3367,7 +3467,7 @@ ] }, "psrlw mm0, mm1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xd1", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", @@ -3377,11 +3477,13 @@ "dup v0.8h, v0.h[0]", "neg v0.8h, v0.8h", "ushl v2.8h, v2.8h, v0.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrld mm0, mm1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xd2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", @@ -3391,11 +3493,13 @@ "dup v0.4s, v0.s[0]", "neg v0.4s, v0.4s", "ushl v2.4s, v2.4s, v0.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrlq mm0, mm1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xd3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", @@ -3405,27 +3509,33 @@ "dup v0.2d, v0.d[0]", "neg v0.2d, v0.2d", "ushl v2.2d, v2.2d, v0.2d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "paddq mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xd4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "add v2.2d, v3.2d, v2.2d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pmullw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xd5", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "mul v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pmovmskb eax, mm0": { @@ -3443,97 +3553,115 @@ ] }, "psubusb mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xd8", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "uqsub v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psubusw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xd9", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "uqsub v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pminub mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xda", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "umin v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pand mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xdb", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "and v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "paddusb mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xdc", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "uqadd v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "paddusw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xdd", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "uqadd v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pmaxub mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xde", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "umax v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pandn mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xdf", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "bic v2.16b, v2.16b, v3.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pavgb mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xe0", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "urhadd v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psraw mm0, mm1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xe1", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", @@ -3543,11 +3671,13 @@ "dup v0.8h, v0.h[0]", "neg v0.8h, v0.8h", "sshl v2.8h, v2.8h, v0.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrad mm0, mm1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xe2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", @@ -3557,39 +3687,47 @@ "dup v0.4s, v0.s[0]", "neg v0.4s, v0.4s", "sshl v2.4s, v2.4s, v0.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pavgw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xe3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "urhadd v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pmulhuw mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xe4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "umull v2.4s, v2.4h, v3.4h", "shrn v2.4h, v2.4s, #16", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pmulhw mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xe5", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "smull v2.4s, v2.4h, v3.4h", "shrn v2.4h, v2.4s, #16", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "movntq [rax], mm0": { @@ -3601,95 +3739,113 @@ ] }, "psubsb mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xe8", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "sqsub v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psubsw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xe9", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "sqsub v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pminsw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xea", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "smin v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "por mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xeb", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "orr v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "paddsb mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xec", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "sqadd v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "paddsw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xed", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "sqadd v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pmaxsw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xee", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "smax v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pxor mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xef", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "eor v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pxor mm0, mm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xef", "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllw mm0, mm1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xf1", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", @@ -3698,11 +3854,13 @@ "ushr d0, d0, #57", "dup v0.8h, v0.h[0]", "ushl v2.8h, v2.8h, v0.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pslld mm0, mm1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xf2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", @@ -3711,11 +3869,13 @@ "ushr d0, d0, #57", "dup v0.4s, v0.s[0]", "ushl v2.4s, v2.4s, v0.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllq mm0, mm1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xf3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", @@ -3724,39 +3884,47 @@ "ushr d0, d0, #57", "dup v0.2d, v0.d[0]", "ushl v2.2d, v2.2d, v0.2d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pmuludq mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xf4", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "umull v2.2d, v2.2s, v3.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pmaddwd mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xf5", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "smull v2.4s, v2.4h, v3.4h", "addp v2.4s, v2.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psadbw mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xf6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "uabdl v2.8h, v2.8b, v3.8b", "addv h2, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "maskmovq mm0, mm1": { @@ -3772,73 +3940,87 @@ ] }, "psubb mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xf8", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "sub v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psubw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xf9", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "sub v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psubd mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xfa", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "sub v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psubq mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xfb", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "sub v2.2d, v3.2d, v2.2d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "paddb mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xfc", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "add v2.16b, v3.16b, v2.16b", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "paddw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xfd", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "add v2.8h, v3.8h, v2.8h", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "paddd mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xfe", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", "ldr d3, [x28, #1040]", "add v2.4s, v3.4s, v2.4s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] } } diff --git a/unittests/InstructionCountCI/SecondaryGroup.json b/unittests/InstructionCountCI/SecondaryGroup.json index 778f26b6b0..7e5f0bc232 100644 --- a/unittests/InstructionCountCI/SecondaryGroup.json +++ b/unittests/InstructionCountCI/SecondaryGroup.json @@ -1001,23 +1001,27 @@ "ExpectedArm64ASM": [] }, "psrlw mm0, 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ushr v2.8h, v2.8h, #15", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrlw mm0, 16": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrlw xmm0, 0": { @@ -1049,23 +1053,27 @@ "ExpectedArm64ASM": [] }, "psraw mm0, 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "sshr v2.8h, v2.8h, #15", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psraw mm0, 16": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "sshr v2.8h, v2.8h, #15", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psraw xmm0, 0": { @@ -1097,23 +1105,27 @@ "ExpectedArm64ASM": [] }, "psllw mm0, 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "shl v2.8h, v2.8h, #15", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllw mm0, 16": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllw xmm0, 0": { @@ -1145,23 +1157,27 @@ "ExpectedArm64ASM": [] }, "psrld mm0, 31": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ushr v2.4s, v2.4s, #31", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrld mm0, 32": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrld xmm0, 0": { @@ -1193,23 +1209,27 @@ "ExpectedArm64ASM": [] }, "psrad mm0, 31": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "sshr v2.4s, v2.4s, #31", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrad mm0, 32": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "sshr v2.4s, v2.4s, #31", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrad xmm0, 0": { @@ -1241,23 +1261,27 @@ "ExpectedArm64ASM": [] }, "pslld mm0, 31": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "shl v2.4s, v2.4s, #31", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pslld mm0, 32": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pslld xmm0, 0": { @@ -1289,23 +1313,27 @@ "ExpectedArm64ASM": [] }, "psrlq mm0, 63": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ushr v2.2d, v2.2d, #63", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrlq mm0, 64": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrlq xmm0, 0": { @@ -1360,23 +1388,27 @@ "ExpectedArm64ASM": [] }, "psllq mm0, 63": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "shl v2.2d, v2.2d, #63", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllq mm0, 64": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Type": "MMX", "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "movi v2.2d, #0x0", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllq xmm0, 0": { @@ -1704,12 +1736,12 @@ "mov w20, #0x37f", "strh w20, [x28, #1296]", "strb wzr, [x28, #1019]", + "strb wzr, [x28, #1298]", "strb wzr, [x28, #1016]", "strb wzr, [x28, #1017]", "strb wzr, [x28, #1018]", "strb wzr, [x28, #1022]", "movi v2.2d, #0x0", - "strb wzr, [x28, #1298]", "str q2, [x28, #1152]", "str q2, [x28, #1136]", "str q2, [x28, #1120]", diff --git a/unittests/InstructionCountCI/Secondary_OpSize.json b/unittests/InstructionCountCI/Secondary_OpSize.json index fb7baad182..9f44d8acf9 100644 --- a/unittests/InstructionCountCI/Secondary_OpSize.json +++ b/unittests/InstructionCountCI/Secondary_OpSize.json @@ -123,22 +123,26 @@ ] }, "cvttpd2pi mm0, xmm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x2c", "ExpectedArm64ASM": [ "fcvtn v2.2s, v16.2d", "fcvtzs v2.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "cvtpd2pi mm0, xmm0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0x2d", "ExpectedArm64ASM": [ "fcvtn v2.2s, v16.2d", "frinti v2.2s, v2.2s", "fcvtzs v2.2s, v2.2s", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "ucomisd xmm0, xmm1": { diff --git a/unittests/InstructionCountCI/Secondary_REPNE.json b/unittests/InstructionCountCI/Secondary_REPNE.json index 3d775aabd9..5a3bcbdc28 100644 --- a/unittests/InstructionCountCI/Secondary_REPNE.json +++ b/unittests/InstructionCountCI/Secondary_REPNE.json @@ -459,10 +459,12 @@ ] }, "movdq2q mm0, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0xd6", "ExpectedArm64ASM": [ - "str d16, [x28, #1040]" + "str d16, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "cvtpd2dq xmm0, xmm1": { diff --git a/unittests/InstructionCountCI/Secondary_SVE128.json b/unittests/InstructionCountCI/Secondary_SVE128.json index 75c5f97c28..23f5a600d1 100644 --- a/unittests/InstructionCountCI/Secondary_SVE128.json +++ b/unittests/InstructionCountCI/Secondary_SVE128.json @@ -33,83 +33,99 @@ ] }, "psrlw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xd1", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "lsr z2.h, p6/m, z2.h, z3.d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrld mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xd2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "lsr z2.s, p6/m, z2.s, z3.d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrlq mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xd3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "lsr z2.d, p6/m, z2.d, z3.d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psraw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xe1", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "asr z2.h, p6/m, z2.h, z3.d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psrad mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xe2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "asr z2.s, p6/m, z2.s, z3.d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xf1", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "lsl z2.h, p6/m, z2.h, z3.d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "pslld mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xf2", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "lsl z2.s, p6/m, z2.s, z3.d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] }, "psllq mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xf3", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", "ldr d3, [x28, #1056]", "lsl z2.d, p6/m, z2.d, z3.d", - "str d2, [x28, #1040]" + "str d2, [x28, #1040]", + "mov w20, #0xffff", + "strh w20, [x28, #1048]" ] } }