summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNikita Popov <npopov@redhat.com>2022-02-24 10:09:49 +0100
committerTom Stellard <tstellar@redhat.com>2022-03-07 21:02:28 -0800
commit67555104d23aaef9b4ce4995ccb98b2ba9aff07d (patch)
tree4df088de19c1adbf84b4dd3ca40d003eb3755708
parent1e4fd59253c60c78a222f85c42501302142a3586 (diff)
[MachineSink] Disable if there are any irreducible cycles
This is an alternative to D120330, which disables MachineSink for functions with irreducible cycles entirely. This avoids both the correctness problem, and ensures we don't perform non-profitable sinks into cycles. At the same time, it may also disable profitable sinks in the same function. This can be made more precise by using MachineCycleInfo in the future. Fixes https://github.com/llvm/llvm-project/issues/53990. Differential Revision: https://reviews.llvm.org/D120800 (cherry picked from commit 6fde0439512580df793f3f48f95757b47de40d2b)
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp12
-rw-r--r--llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll22
-rw-r--r--llvm/test/CodeGen/X86/pr38795.ll93
-rw-r--r--llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll9
-rw-r--r--llvm/test/CodeGen/X86/x86-shrink-wrapping.ll36
5 files changed, 87 insertions, 85 deletions
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 0dbbc218e946..bc03776bde19 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -18,12 +18,14 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -429,6 +431,16 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
RegClassInfo.runOnMachineFunction(MF);
+ // MachineSink currently uses MachineLoopInfo, which only recognizes natural
+ // loops. As such, we could sink instructions into irreducible cycles, which
+ // would be non-profitable.
+ // WARNING: The current implementation of hasStoreBetween() is incorrect for
+ // sinking into irreducible cycles (PR53990), this bailout is currently
+ // necessary for correctness, not just profitability.
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI))
+ return false;
+
bool EverMadeChange = false;
while (true) {
diff --git a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll
index 024b6c608aba..f93e181d157c 100644
--- a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll
+++ b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll
@@ -24,7 +24,7 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind {
; CHECK-NEXT: movq %r15, %rdi
; CHECK-NEXT: callq l
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: jne .LBB0_10
+; CHECK-NEXT: jne .LBB0_9
; CHECK-NEXT: # %bb.1: # %if.end
; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: cmpl $0, e(%rip)
@@ -44,21 +44,19 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind {
; CHECK-NEXT: callq i
; CHECK-NEXT: movl %eax, %ebp
; CHECK-NEXT: orl %r14d, %ebp
-; CHECK-NEXT: testl %r13d, %r13d
-; CHECK-NEXT: je .LBB0_6
-; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: andl $4, %ebx
-; CHECK-NEXT: jmp .LBB0_3
-; CHECK-NEXT: .LBB0_6: # %if.end12
+; CHECK-NEXT: testl %r13d, %r13d
+; CHECK-NEXT: jne .LBB0_3
+; CHECK-NEXT: # %bb.5: # %if.end12
; CHECK-NEXT: testl %ebp, %ebp
-; CHECK-NEXT: je .LBB0_9
-; CHECK-NEXT: # %bb.7: # %if.then14
+; CHECK-NEXT: je .LBB0_8
+; CHECK-NEXT: # %bb.6: # %if.then14
; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
-; CHECK-NEXT: jmp .LBB0_10
+; CHECK-NEXT: jmp .LBB0_9
; CHECK-NEXT: .Ltmp0: # Block address taken
-; CHECK-NEXT: # %bb.8: # %if.then20.critedge
+; CHECK-NEXT: # %bb.7: # %if.then20.critedge
; CHECK-NEXT: movl j(%rip), %edi
; CHECK-NEXT: movslq %eax, %rcx
; CHECK-NEXT: movl $1, %esi
@@ -71,9 +69,9 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind {
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: jmp k # TAILCALL
-; CHECK-NEXT: .LBB0_9: # %if.else
+; CHECK-NEXT: .LBB0_8: # %if.else
; CHECK-NEXT: incq 0
-; CHECK-NEXT: .LBB0_10: # %cleanup
+; CHECK-NEXT: .LBB0_9: # %cleanup
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: popq %r12
diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll
index d805dcad8b6e..b526e4f471b1 100644
--- a/llvm/test/CodeGen/X86/pr38795.ll
+++ b/llvm/test/CodeGen/X86/pr38795.ll
@@ -32,13 +32,14 @@ define dso_local void @fn() {
; CHECK-NEXT: # implicit-def: $ebp
; CHECK-NEXT: jmp .LBB0_1
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_16: # %for.inc
+; CHECK-NEXT: .LBB0_15: # %for.inc
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: movb %dh, %dl
; CHECK-NEXT: .LBB0_1: # %for.cond
; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB0_20 Depth 2
+; CHECK-NEXT: # Child Loop BB0_19 Depth 2
; CHECK-NEXT: cmpb $8, %dl
; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: ja .LBB0_3
@@ -55,7 +56,7 @@ define dso_local void @fn() {
; CHECK-NEXT: movb %cl, %dh
; CHECK-NEXT: movl $0, h
; CHECK-NEXT: cmpb $8, %dl
-; CHECK-NEXT: jg .LBB0_8
+; CHECK-NEXT: jg .LBB0_9
; CHECK-NEXT: # %bb.5: # %if.then13
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl %eax, %esi
@@ -64,12 +65,10 @@ define dso_local void @fn() {
; CHECK-NEXT: calll printf
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: movl %esi, %ecx
; CHECK-NEXT: # implicit-def: $eax
-; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
-; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
; CHECK-NEXT: movb %dh, %dl
-; CHECK-NEXT: jne .LBB0_16
+; CHECK-NEXT: jne .LBB0_15
; CHECK-NEXT: jmp .LBB0_6
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_3: # %if.then
@@ -78,82 +77,82 @@ define dso_local void @fn() {
; CHECK-NEXT: calll printf
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
; CHECK-NEXT: # implicit-def: $eax
+; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
+; CHECK-NEXT: jmp .LBB0_6
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB0_9: # %if.end21
+; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: # implicit-def: $ebp
+; CHECK-NEXT: jmp .LBB0_10
+; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_6: # %for.cond35
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: movb %dl, %dh
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: je .LBB0_7
-; CHECK-NEXT: .LBB0_11: # %af
+; CHECK-NEXT: movl %edi, %esi
+; CHECK-NEXT: movl $0, %edi
+; CHECK-NEXT: movb %cl, %dl
+; CHECK-NEXT: je .LBB0_19
+; CHECK-NEXT: # %bb.7: # %af
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: jne .LBB0_12
-; CHECK-NEXT: .LBB0_17: # %if.end39
+; CHECK-NEXT: jne .LBB0_8
+; CHECK-NEXT: .LBB0_16: # %if.end39
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testl %eax, %eax
-; CHECK-NEXT: je .LBB0_19
-; CHECK-NEXT: # %bb.18: # %if.then41
+; CHECK-NEXT: je .LBB0_18
+; CHECK-NEXT: # %bb.17: # %if.then41
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $.str, (%esp)
; CHECK-NEXT: calll printf
-; CHECK-NEXT: .LBB0_19: # %for.end46
+; CHECK-NEXT: .LBB0_18: # %for.end46
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: movl %esi, %edi
; CHECK-NEXT: # implicit-def: $dl
; CHECK-NEXT: # implicit-def: $dh
; CHECK-NEXT: # implicit-def: $ebp
-; CHECK-NEXT: jmp .LBB0_20
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_8: # %if.end21
-; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: # implicit-def: $ebp
-; CHECK-NEXT: jmp .LBB0_9
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: xorl %edi, %edi
-; CHECK-NEXT: movb %dl, %dh
-; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
-; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_20: # %for.cond47
+; CHECK-NEXT: .LBB0_19: # %for.cond47
; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: jne .LBB0_20
-; CHECK-NEXT: # %bb.21: # %for.cond47
-; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2
+; CHECK-NEXT: jne .LBB0_19
+; CHECK-NEXT: # %bb.20: # %for.cond47
+; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: jne .LBB0_20
-; CHECK-NEXT: .LBB0_9: # %ae
+; CHECK-NEXT: jne .LBB0_19
+; CHECK-NEXT: .LBB0_10: # %ae
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: jne .LBB0_10
-; CHECK-NEXT: # %bb.13: # %if.end26
+; CHECK-NEXT: jne .LBB0_11
+; CHECK-NEXT: # %bb.12: # %if.end26
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: testb %dl, %dl
-; CHECK-NEXT: je .LBB0_16
-; CHECK-NEXT: # %bb.14: # %if.end26
+; CHECK-NEXT: je .LBB0_15
+; CHECK-NEXT: # %bb.13: # %if.end26
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: testl %ebp, %ebp
-; CHECK-NEXT: jne .LBB0_16
-; CHECK-NEXT: # %bb.15: # %if.then31
+; CHECK-NEXT: jne .LBB0_15
+; CHECK-NEXT: # %bb.14: # %if.then31
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: xorl %esi, %esi
; CHECK-NEXT: xorl %ebp, %ebp
-; CHECK-NEXT: jmp .LBB0_16
+; CHECK-NEXT: jmp .LBB0_15
; CHECK-NEXT: .p2align 4, 0x90
-; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: movl %edi, %esi
; CHECK-NEXT: # implicit-def: $eax
; CHECK-NEXT: testb %bl, %bl
-; CHECK-NEXT: je .LBB0_17
-; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: je .LBB0_16
+; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: # implicit-def: $edi
; CHECK-NEXT: # implicit-def: $cl
-; CHECK-NEXT: # kill: killed $cl
; CHECK-NEXT: # implicit-def: $dl
; CHECK-NEXT: # implicit-def: $ebp
-; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: jne .LBB0_11
-; CHECK-NEXT: jmp .LBB0_7
+; CHECK-NEXT: jmp .LBB0_6
entry:
br label %for.cond
diff --git a/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll b/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll
index 3d7ff6cbe676..4f56d7b16a87 100644
--- a/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll
+++ b/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll
@@ -7,18 +7,15 @@ define void @test(i1 %c, i64* %p, i64* noalias %p2) nounwind {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
; CHECK-NEXT: pushq %rbx
-; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movq %rdx, %rbx
-; CHECK-NEXT: movq %rsi, %r14
-; CHECK-NEXT: movl %edi, %r15d
+; CHECK-NEXT: movl %edi, %r14d
+; CHECK-NEXT: movq (%rsi), %rbp
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8)
; CHECK-NEXT: .LBB0_1: # %split.3
-; CHECK-NEXT: movq (%r14), %rbp
-; CHECK-NEXT: testb $1, %r15b
+; CHECK-NEXT: testb $1, %r14b
; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.2: # %clobber
; CHECK-NEXT: callq clobber@PLT
diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
index 0f8bb837f82a..b44895293b41 100644
--- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -1377,6 +1377,8 @@ define i32 @irreducibleCFG() #4 {
; ENABLE-NEXT: pushq %rbx
; ENABLE-NEXT: pushq %rax
; ENABLE-NEXT: .cfi_offset %rbx, -24
+; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax
+; ENABLE-NEXT: movl (%rax), %edi
; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax
; ENABLE-NEXT: cmpb $0, (%rax)
; ENABLE-NEXT: je LBB16_2
@@ -1386,24 +1388,20 @@ define i32 @irreducibleCFG() #4 {
; ENABLE-NEXT: jmp LBB16_1
; ENABLE-NEXT: LBB16_2: ## %split
; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax
+; ENABLE-NEXT: xorl %ebx, %ebx
; ENABLE-NEXT: cmpl $0, (%rax)
-; ENABLE-NEXT: je LBB16_3
-; ENABLE-NEXT: ## %bb.4: ## %for.body4.i
-; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax
-; ENABLE-NEXT: movl (%rax), %edi
+; ENABLE-NEXT: je LBB16_4
+; ENABLE-NEXT: ## %bb.3: ## %for.body4.i
; ENABLE-NEXT: xorl %ebx, %ebx
; ENABLE-NEXT: xorl %eax, %eax
; ENABLE-NEXT: callq _something
-; ENABLE-NEXT: jmp LBB16_5
-; ENABLE-NEXT: LBB16_3:
-; ENABLE-NEXT: xorl %ebx, %ebx
; ENABLE-NEXT: .p2align 4, 0x90
-; ENABLE-NEXT: LBB16_5: ## %for.inc
+; ENABLE-NEXT: LBB16_4: ## %for.inc
; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1
; ENABLE-NEXT: incl %ebx
; ENABLE-NEXT: cmpl $7, %ebx
-; ENABLE-NEXT: jl LBB16_5
-; ENABLE-NEXT: ## %bb.6: ## %fn1.exit
+; ENABLE-NEXT: jl LBB16_4
+; ENABLE-NEXT: ## %bb.5: ## %fn1.exit
; ENABLE-NEXT: xorl %eax, %eax
; ENABLE-NEXT: addq $8, %rsp
; ENABLE-NEXT: popq %rbx
@@ -1420,6 +1418,8 @@ define i32 @irreducibleCFG() #4 {
; DISABLE-NEXT: pushq %rbx
; DISABLE-NEXT: pushq %rax
; DISABLE-NEXT: .cfi_offset %rbx, -24
+; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax
+; DISABLE-NEXT: movl (%rax), %edi
; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax
; DISABLE-NEXT: cmpb $0, (%rax)
; DISABLE-NEXT: je LBB16_2
@@ -1429,24 +1429,20 @@ define i32 @irreducibleCFG() #4 {
; DISABLE-NEXT: jmp LBB16_1
; DISABLE-NEXT: LBB16_2: ## %split
; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax
+; DISABLE-NEXT: xorl %ebx, %ebx
; DISABLE-NEXT: cmpl $0, (%rax)
-; DISABLE-NEXT: je LBB16_3
-; DISABLE-NEXT: ## %bb.4: ## %for.body4.i
-; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax
-; DISABLE-NEXT: movl (%rax), %edi
+; DISABLE-NEXT: je LBB16_4
+; DISABLE-NEXT: ## %bb.3: ## %for.body4.i
; DISABLE-NEXT: xorl %ebx, %ebx
; DISABLE-NEXT: xorl %eax, %eax
; DISABLE-NEXT: callq _something
-; DISABLE-NEXT: jmp LBB16_5
-; DISABLE-NEXT: LBB16_3:
-; DISABLE-NEXT: xorl %ebx, %ebx
; DISABLE-NEXT: .p2align 4, 0x90
-; DISABLE-NEXT: LBB16_5: ## %for.inc
+; DISABLE-NEXT: LBB16_4: ## %for.inc
; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1
; DISABLE-NEXT: incl %ebx
; DISABLE-NEXT: cmpl $7, %ebx
-; DISABLE-NEXT: jl LBB16_5
-; DISABLE-NEXT: ## %bb.6: ## %fn1.exit
+; DISABLE-NEXT: jl LBB16_4
+; DISABLE-NEXT: ## %bb.5: ## %fn1.exit
; DISABLE-NEXT: xorl %eax, %eax
; DISABLE-NEXT: addq $8, %rsp
; DISABLE-NEXT: popq %rbx