From a7dd06b0f066d5d4f295f1ea7b480870c561dd42 Mon Sep 17 00:00:00 2001 From: Nekotekina Date: Thu, 13 May 2021 16:58:05 +0300 Subject: [PATCH] X86: improve (V)PMADDWD detection In function combineMulToPMADDWD, if 17 bit are sign bits, not just zero bits, the optimization can be applied sometimes. For now, detect and replace SRA pairs with SRL. --- lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 09a32d1c4221..b955b9033fab 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -41659,6 +41659,22 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, return SDValue(); APInt Mask17 = APInt::getHighBitsSet(32, 17); + if (N0.getOpcode() == ISD::SRA && N1.getOpcode() == ISD::SRA) { + // If both arguments are sign-extended, try to replace sign extends + // with zero extends, which should qualify for the optimization. + // Otherwise just fallback to zero-extension check. + if (isa(N0.getOperand(1).getOperand(0)) && + N0.getOperand(1).getConstantOperandVal(0) == 16 && + isa(N1.getOperand(1).getOperand(0)) && + N1.getOperand(1).getConstantOperandVal(0) == 16) { + // Nullify mask to pass the following check + Mask17 = 0; + N0 = DAG.getNode(ISD::SRL, N0.getNode(), VT, N0.getOperand(0), + N0.getOperand(1)); + N1 = DAG.getNode(ISD::SRL, N1.getNode(), VT, N1.getOperand(0), + N1.getOperand(1)); + } + } if (!DAG.MaskedValueIsZero(N1, Mask17) || !DAG.MaskedValueIsZero(N0, Mask17)) return SDValue();