diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 124 |
1 files changed, 19 insertions, 105 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d33888247bd2..9890b3e7820a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -870,7 +870,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::VECTOR_SPLICE); setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); - setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::CONCAT_VECTORS); setTargetDAGCombine(ISD::INSERT_SUBVECTOR); setTargetDAGCombine(ISD::STORE); @@ -1047,6 +1046,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, MVT::v4i32}) { + setOperationAction(ISD::AVGFLOORS, VT, Legal); + setOperationAction(ISD::AVGFLOORU, VT, Legal); + setOperationAction(ISD::AVGCEILS, VT, Legal); + setOperationAction(ISD::AVGCEILU, VT, Legal); setOperationAction(ISD::ABDS, VT, Legal); setOperationAction(ISD::ABDU, VT, Legal); } @@ -2096,10 +2099,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FCMLTz) MAKE_CASE(AArch64ISD::SADDV) MAKE_CASE(AArch64ISD::UADDV) - MAKE_CASE(AArch64ISD::SRHADD) - MAKE_CASE(AArch64ISD::URHADD) - MAKE_CASE(AArch64ISD::SHADD) - MAKE_CASE(AArch64ISD::UHADD) MAKE_CASE(AArch64ISD::SDOT) MAKE_CASE(AArch64ISD::UDOT) MAKE_CASE(AArch64ISD::SMINV) @@ -4371,9 +4370,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, IntNo == Intrinsic::aarch64_neon_shadd); bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd || IntNo == Intrinsic::aarch64_neon_urhadd); - unsigned Opcode = - IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD) - : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD); + unsigned Opcode = IsSignedAdd + ? (IsRoundingAdd ? ISD::AVGCEILS : ISD::AVGFLOORS) + : (IsRoundingAdd ? ISD::AVGCEILU : ISD::AVGFLOORU); return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } @@ -14243,89 +14242,6 @@ static SDValue performANDCombine(SDNode *N, return SDValue(); } -// Attempt to form urhadd(OpA, OpB) from -// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1)) -// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)). -// The original form of the first expression is -// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the -// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)). -// Before this function is called the srl will have been lowered to -// AArch64ISD::VLSHR. -// This pass can also recognize signed variants of the patterns that use sign -// extension instead of zero extension and form a srhadd(OpA, OpB) or a -// shadd(OpA, OpB) from them. -static SDValue -performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - - // Since we are looking for a right shift by a constant value of 1 and we are - // operating on types at least 16 bits in length (sign/zero extended OpA and - // OpB, which are at least 8 bits), it follows that the truncate will always - // discard the shifted-in bit and therefore the right shift will be logical - // regardless of the signedness of OpA and OpB. - SDValue Shift = N->getOperand(0); - if (Shift.getOpcode() != AArch64ISD::VLSHR) - return SDValue(); - - // Is the right shift using an immediate value of 1? - uint64_t ShiftAmount = Shift.getConstantOperandVal(1); - if (ShiftAmount != 1) - return SDValue(); - - SDValue ExtendOpA, ExtendOpB; - SDValue ShiftOp0 = Shift.getOperand(0); - unsigned ShiftOp0Opc = ShiftOp0.getOpcode(); - if (ShiftOp0Opc == ISD::SUB) { - - SDValue Xor = ShiftOp0.getOperand(1); - if (Xor.getOpcode() != ISD::XOR) - return SDValue(); - - // Is the XOR using a constant amount of all ones in the right hand side? - uint64_t C; - if (!isAllConstantBuildVector(Xor.getOperand(1), C)) - return SDValue(); - - unsigned ElemSizeInBits = VT.getScalarSizeInBits(); - APInt CAsAPInt(ElemSizeInBits, C); - if (CAsAPInt != APInt::getAllOnes(ElemSizeInBits)) - return SDValue(); - - ExtendOpA = Xor.getOperand(0); - ExtendOpB = ShiftOp0.getOperand(0); - } else if (ShiftOp0Opc == ISD::ADD) { - ExtendOpA = ShiftOp0.getOperand(0); - ExtendOpB = ShiftOp0.getOperand(1); - } else - return SDValue(); - - unsigned ExtendOpAOpc = ExtendOpA.getOpcode(); - unsigned ExtendOpBOpc = ExtendOpB.getOpcode(); - if (!(ExtendOpAOpc == ExtendOpBOpc && - (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND))) - return SDValue(); - - // Is the result of the right shift being truncated to the same value type as - // the original operands, OpA and OpB? - SDValue OpA = ExtendOpA.getOperand(0); - SDValue OpB = ExtendOpB.getOperand(0); - EVT OpAVT = OpA.getValueType(); - assert(ExtendOpA.getValueType() == ExtendOpB.getValueType()); - if (!(VT == OpAVT && OpAVT == OpB.getValueType())) - return SDValue(); - - SDLoc DL(N); - bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND; - bool IsRHADD = ShiftOp0Opc == ISD::SUB; - unsigned HADDOpc = IsSignExtend - ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD) - : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD); - SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB); - - return ResultHADD; -} - static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) { switch (Opcode) { case ISD::FADD: @@ -14428,20 +14344,20 @@ static SDValue performConcatVectorsCombine(SDNode *N, if (DCI.isBeforeLegalizeOps()) return SDValue(); - // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted - // subvectors from the same original vectors. Combine these into a single - // [us]rhadd or [us]hadd that operates on the two original vectors. Example: - // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>), - // extract_subvector (v16i8 OpB, - // <0>))), - // (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>), - // extract_subvector (v16i8 OpB, - // <8>))))) + // Optimise concat_vectors of two [us]avgceils or [us]avgfloors that use + // extracted subvectors from the same original vectors. Combine these into a + // single avg that operates on the two original vectors. + // avgceil is the target independant name for rhadd, avgfloor is a hadd. + // Example: + // (concat_vectors (v8i8 (avgceils (extract_subvector (v16i8 OpA, <0>), + // extract_subvector (v16i8 OpB, <0>))), + // (v8i8 (avgceils (extract_subvector (v16i8 OpA, <8>), + // extract_subvector (v16i8 OpB, <8>))))) // -> - // (v16i8(urhadd(v16i8 OpA, v16i8 OpB))) + // (v16i8(avgceils(v16i8 OpA, v16i8 OpB))) if (N->getNumOperands() == 2 && N0Opc == N1Opc && - (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD || - N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) { + (N0Opc == ISD::AVGCEILU || N0Opc == ISD::AVGCEILS || + N0Opc == ISD::AVGFLOORU || N0Opc == ISD::AVGFLOORS)) { SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); SDValue N10 = N1->getOperand(0); @@ -18022,8 +17938,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performExtendCombine(N, DCI, DAG); case ISD::SIGN_EXTEND_INREG: return performSignExtendInRegCombine(N, DCI, DAG); - case ISD::TRUNCATE: - return performVectorTruncateCombine(N, DCI, DAG); case ISD::CONCAT_VECTORS: return performConcatVectorsCombine(N, DCI, DAG); case ISD::INSERT_SUBVECTOR: |