diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2022-01-31 13:57:47 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2022-01-31 13:58:00 +0000 |
commit | 7ec8fc29321c698af665072424bc02a87369c8ad (patch) | |
tree | 79a1bf71e19c03774844afb8d4563f283bdc1441 | |
parent | c703d77a61ac73402bb024558ea39326d17d25c5 (diff) |
[X86] combineAnd() - per-element simplification - call SimplifyDemandedBits using mask demanded bits if SimplifyDemandedVectorElts fails
We already call SimplifyDemandedVectorElts using whether each vector mask element is zero/nonzero, this just extends this to also try SimplifyDemandedBits using the demanded bits mask generated from the nonzero elements.
This also requires an additional TargetLowering::SimplifyDemandedBits DemandedBits/DemandedElts wrapper.
4 files changed, 29 insertions, 4 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index bec191570594..94d845b39c14 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3485,6 +3485,12 @@ public: bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, DAGCombinerInfo &DCI) const; + /// Helper wrapper around SimplifyDemandedBits. + /// Adds Op back to the worklist upon success. + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, + DAGCombinerInfo &DCI) const; + /// More limited version of SimplifyDemandedBits that can be used to "look /// through" ops that don't contribute to the DemandedBits/DemandedElts - /// bitwise ops etc. diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index ba6cae00bc50..ff57725ba846 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -606,6 +606,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, } bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, + const APInt &DemandedElts, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + KnownBits Known; + + bool Simplified = + SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO); + if (Simplified) { + DCI.AddToWorklist(Op.getNode()); + DCI.CommitTargetLoweringOpt(TLO); + } + return Simplified; +} + +bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d4b0c13c0ffd..90753b5b4d33 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -46907,14 +46907,18 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits)) return false; + APInt DemandedBits = APInt::getZero(EltSizeInBits); APInt DemandedElts = APInt::getZero(NumElts); for (int I = 0; I != NumElts; ++I) - if (!EltBits[I].isZero()) + if (!EltBits[I].isZero()) { + DemandedBits |= EltBits[I]; DemandedElts.setBit(I); + } APInt KnownUndef, KnownZero; return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef, - KnownZero, DCI); + KnownZero, DCI) || + TLI.SimplifyDemandedBits(OtherOp, DemandedBits, DemandedElts, DCI); }; if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) { if (N->getOpcode() != ISD::DELETED_NODE) diff --git a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll index d751fc7ec002..608522e88814 100644 --- a/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/X86/vector_splat-const-shift-of-constmasked.ll @@ -3062,7 +3062,6 @@ define <2 x i64> @test_128_i64_x_2_140737488289792_mask_ashr_18(<2 x i64> %a0) { define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_1(<2 x i64> %a0) { ; X86-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE2-NEXT: psrad $1, %xmm0 ; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE2-NEXT: retl @@ -3086,7 +3085,6 @@ define <2 x i64> @test_128_i64_x_2_18446744065119617024_mask_ashr_1(<2 x i64> %a ; ; X64-SSE2-LABEL: test_128_i64_x_2_18446744065119617024_mask_ashr_1: ; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-SSE2-NEXT: psrad $1, %xmm0 ; X64-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-SSE2-NEXT: retq |