From 3f3d6095c742e4e60e99d96babd260b56b21d4d4 Mon Sep 17 00:00:00 2001 From: MITSUNARI Shigeo Date: Tue, 21 Nov 2023 16:56:44 +0900 Subject: [PATCH] disable rol/ror to support NF --- gen/gen_code.cpp | 20 ++++++++++---------- test/apx.cpp | 37 +++++++++++++++++++++++++++++++++++++ xbyak/xbyak.h | 6 ++++-- xbyak/xbyak_mnemonic.h | 24 ++++++++++++------------ 4 files changed, 63 insertions(+), 24 deletions(-) diff --git a/gen/gen_code.cpp b/gen/gen_code.cpp index bb1ce202..a0ed29b2 100644 --- a/gen/gen_code.cpp +++ b/gen/gen_code.cpp @@ -898,17 +898,17 @@ void put() { const struct Tbl { const char *name; - uint8_t ext; + uint8_t ext; // |8 means supporting NF=1 } tbl[] = { - { "rcl", 2 }, - { "rcr", 3 }, - { "rol", 0 }, - { "ror", 1 }, - { "sar", 7 }, - { "shl", 4 }, - { "shr", 5 }, - - { "sal", 4 }, + { "rcl", 2|0 }, + { "rcr", 3|0 }, + { "rol", 0|8 }, + { "ror", 1|8 }, + { "sar", 7|8 }, + { "shl", 4|8 }, + { "shr", 5|8 }, + + { "sal", 4|8 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl *p = &tbl[i]; diff --git a/test/apx.cpp b/test/apx.cpp index 16cd0a61..c2722319 100644 --- a/test/apx.cpp +++ b/test/apx.cpp @@ -1150,6 +1150,31 @@ CYBOZU_TEST_AUTO(shift_2op) rol(dword [r30], 0x7); rol(qword [r30], 0x9); + rcl(r16b, cl); + rcl(r16w, cl); + rcl(r16d, cl); + rcl(r16, cl); + rcl(r16b, 0x3); + rcl(r16w, 0x5); + rcl(r16d, 0x7); + rcl(r16, 0x9); + rcl(byte [r30], 0x3); + rcl(word [r30], 0x5); + rcl(dword [r30], 0x7); + rcl(qword [r30], 0x9); + + rcr(r16b, cl); + rcr(r16w, cl); + rcr(r16d, cl); + rcr(r16, cl); + rcr(r16b, 0x3); + rcr(r16w, 0x5); + rcr(r16d, 0x7); + rcr(r16, 0x9); + rcr(byte [r30], 0x3); + rcr(word [r30], 0x5); + rcr(dword [r30], 0x7); + rcr(qword [r30], 0x9); } } c; const uint8_t tbl[] = { @@ -1188,6 +1213,18 @@ CYBOZU_TEST_AUTO(shift_2op) 0xc1, 0xc0, 0x07, 0x62, 0xfc, 0xfc, 0x08, 0xc1, 0xc0, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x06, 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, 0x06, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x06, 0x07, 0x62, 0xdc, 0xfc, 0x08, 0xc1, 0x06, 0x09, + // rcl + 0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xd0, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xd0, 0x62, 0xfc, 0x7c, 0x08, + 0xd3, 0xd0, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xd0, 0x62, 0xfc, 0x7c, 0x08, 0xc0, 0xd0, 0x03, 0x62, + 0xfc, 0x7d, 0x08, 0xc1, 0xd0, 0x05, 0x62, 0xfc, 0x7c, 0x08, 0xc1, 0xd0, 0x07, 0x62, 0xfc, 0xfc, + 0x08, 0xc1, 0xd0, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x16, 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, + 0x16, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x16, 0x07, 0x62, 0xdc, 0xfc, 0x08, 0xc1, 0x16, 0x09, + // rcr + 0x62, 0xfc, 0x7c, 0x08, 0xd2, 0xd8, 0x62, 0xfc, 0x7d, 0x08, 0xd3, 0xd8, 0x62, 0xfc, 0x7c, 0x08, + 0xd3, 0xd8, 0x62, 0xfc, 0xfc, 0x08, 0xd3, 0xd8, 0x62, 0xfc, 0x7c, 0x08, 0xc0, 0xd8, 0x03, 0x62, + 0xfc, 0x7d, 0x08, 0xc1, 0xd8, 0x05, 0x62, 0xfc, 0x7c, 0x08, 0xc1, 0xd8, 0x07, 0x62, 0xfc, 0xfc, + 0x08, 0xc1, 0xd8, 0x09, 0x62, 0xdc, 0x7c, 0x08, 0xc0, 0x1e, 0x03, 0x62, 0xdc, 0x7d, 0x08, 0xc1, + 0x1e, 0x05, 0x62, 0xdc, 0x7c, 0x08, 0xc1, 0x1e, 0x07, 0x62, 0xdc, 0xfc, 0x08, 0xc1, 0x1e, 0x09, }; const size_t n = sizeof(tbl); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak.h b/xbyak/xbyak.h index 49e3cefe..7e28e9e9 100644 --- a/xbyak/xbyak.h +++ b/xbyak/xbyak.h @@ -2215,13 +2215,15 @@ class CodeGenerator : public CodeArray { void opShift(const Operand& op, int imm, int ext) { verifyMemHasSize(op); - opRext(op, 0, ext, T_VEX|T_NF|T_CODE1_IF1, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), false, (imm != 1) ? 1 : 0); + uint64_t type = T_VEX|T_CODE1_IF1; if (ext & 8) type |= T_NF; + opRext(op, 0, ext&7, type, (0xC0 | ((imm == 1 ? 1 : 0) << 4)), false, (imm != 1) ? 1 : 0); if (imm != 1) db(imm); } void opShift(const Operand& op, const Reg8& _cl, int ext) { if (_cl.getIdx() != Operand::CL) XBYAK_THROW(ERR_BAD_COMBINATION) - opRext(op, 0, ext, T_VEX|T_NF|T_CODE1_IF1, 0xD2); + uint64_t type = T_VEX|T_CODE1_IF1; if (ext & 8) type |= T_NF; + opRext(op, 0, ext&7, type, 0xD2); } // condR assumes that op.isREG() is true void opRO(const Reg& r, const Operand& op, uint64_t type, int code, bool condR = true, int immSize = 0) diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index ca58d114..9b3d4874 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -746,10 +746,10 @@ void repnz() { db(0xF2); } void repz() { db(0xF3); } void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } } void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } } -void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); } -void rol(const Operand& op, int imm) { opShift(op, imm, 0); } -void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); } -void ror(const Operand& op, int imm) { opShift(op, imm, 1); } +void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 8); } +void rol(const Operand& op, int imm) { opShift(op, imm, 8); } +void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 9); } +void ror(const Operand& op, int imm) { opShift(op, imm, 9); } void rorx(const Reg32e& r, const Operand& op, uint8_t imm) { opRRO(r, Reg32e(0, r.getBit()), op, T_0F3A|T_F2|T_VEX|T_MAP3, 0xF0, imm); } void roundpd(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A|T_YMM, 0x09, isXMM_XMMorMEM, imm); } void roundps(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, T_66|T_0F3A|T_YMM, 0x08, isXMM_XMMorMEM, imm); } @@ -758,10 +758,10 @@ void roundss(const Xmm& xmm, const Operand& op, int imm) { opSSE(xmm, op, T_66 | void rsqrtps(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F, 0x52, isXMM_XMMorMEM); } void rsqrtss(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F | T_F3, 0x52, isXMM_XMMorMEM); } void sahf() { db(0x9E); } -void sal(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); } -void sal(const Operand& op, int imm) { opShift(op, imm, 4); } -void sar(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 7); } -void sar(const Operand& op, int imm) { opShift(op, imm, 7); } +void sal(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12); } +void sal(const Operand& op, int imm) { opShift(op, imm, 12); } +void sar(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 15); } +void sar(const Operand& op, int imm) { opShift(op, imm, 15); } void sarx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_VEX|T_F3|T_0F38, 0xf7); } void sbb(const Operand& op, uint32_t imm) { opOI(op, imm, 0x18, 3); } void sbb(const Operand& op1, const Operand& op2) { opRO_MR(op1, op2, 0x18); } @@ -809,15 +809,15 @@ void sha1rnds4(const Xmm& xmm, const Operand& op, uint8_t imm) { opSSE(xmm, op, void sha256msg1(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCC, isXMM_XMMorMEM, NONE); } void sha256msg2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCD, isXMM_XMMorMEM, NONE); } void sha256rnds2(const Xmm& xmm, const Operand& op) { opSSE(xmm, op, T_0F38, 0xCB, isXMM_XMMorMEM, NONE); } -void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 4); } -void shl(const Operand& op, int imm) { opShift(op, imm, 4); } +void shl(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 12); } +void shl(const Operand& op, int imm) { opShift(op, imm, 12); } void shld(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(Reg(), op, reg, 0, 0xA4, 0x24, &_cl); } void shld(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(Reg(), op, reg, imm, 0xA4, 0x24); } void shld(const Reg& d, const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(d, op, reg, 0, 0xA4, 0x24, &_cl); } void shld(const Reg& d, const Operand& op, const Reg& reg, uint8_t imm) { opShxd(d, op, reg, imm, 0xA4, 0x24); } void shlx(const Reg32e& r1, const Operand& op, const Reg32e& r2) { opRRO(r1, r2, op, T_VEX|T_66|T_0F38, 0xf7); } -void shr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 5); } -void shr(const Operand& op, int imm) { opShift(op, imm, 5); } +void shr(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 13); } +void shr(const Operand& op, int imm) { opShift(op, imm, 13); } void shrd(const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(Reg(), op, reg, 0, 0xAC, 0x2C, &_cl); } void shrd(const Operand& op, const Reg& reg, uint8_t imm) { opShxd(Reg(), op, reg, imm, 0xAC, 0x2C); } void shrd(const Reg& d, const Operand& op, const Reg& reg, const Reg8& _cl) { opShxd(d, op, reg, 0, 0xAC, 0x2C, &_cl); }