diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2022-01-31 11:36:03 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2022-01-31 11:55:10 +0000 |
commit | 48f45f6b25f086e5c949648160b0784370435f9f (patch) | |
tree | 5fc076c960ee55f8fa4d5b0540c78f02dbc621dd | |
parent | 0dcc8b86ee3819d4e0d01a3084926a182eee6981 (diff) |
[X86] Limit mul(x,x) knownbits tests with not undef/poison check
We can only assume bit[1] == zero if its the only demanded bit or the source is not undef/poison
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-mul.ll | 17 |
2 files changed, 15 insertions, 4 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45f3005e8f57..240dcca654ae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3082,6 +3082,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); bool SelfMultiply = Op.getOperand(0) == Op.getOperand(1); + SelfMultiply &= isGuaranteedNotToBeUndefOrPoison( + Op.getOperand(0), DemandedElts, false, Depth + 1); Known = KnownBits::mul(Known, Known2, SelfMultiply); break; } diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll index 57d85e1c1c9e..f0254e784cfc 100644 --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -366,12 +366,16 @@ define <2 x i64> @combine_mul_to_abs_v2i64(<2 x i64> %x) { define i64 @combine_mul_self_knownbits(i64 %x) { ; SSE-LABEL: combine_mul_self_knownbits: ; SSE: # %bb.0: -; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: imull %eax, %eax +; SSE-NEXT: andl $2, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: combine_mul_self_knownbits: ; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: movq %rdi, %rax +; AVX-NEXT: imull %eax, %eax +; AVX-NEXT: andl $2, %eax ; AVX-NEXT: retq %1 = mul i64 %x, %x %2 = and i64 %1, 2 @@ -381,12 +385,15 @@ define i64 @combine_mul_self_knownbits(i64 %x) { define <4 x i32> @combine_mul_self_knownbits_vector(<4 x i32> %x) { ; SSE-LABEL: combine_mul_self_knownbits_vector: ; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: pmulld %xmm0, %xmm0 +; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_mul_self_knownbits_vector: ; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpmulld %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2] +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = mul <4 x i32> %x, %x %2 = and <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2> @@ -400,12 +407,14 @@ define i64 @combine_mul_self_demandedbits(i64 %x) { ; SSE: # %bb.0: ; SSE-NEXT: movq %rdi, %rax ; SSE-NEXT: imulq %rdi, %rax +; SSE-NEXT: andq $-3, %rax ; SSE-NEXT: retq ; ; AVX-LABEL: combine_mul_self_demandedbits: ; AVX: # %bb.0: ; AVX-NEXT: movq %rdi, %rax ; AVX-NEXT: imulq %rdi, %rax +; AVX-NEXT: andq $-3, %rax ; AVX-NEXT: retq %1 = mul i64 %x, %x %2 = and i64 %1, -3 |