diff options
author | Nikita Popov <npopov@redhat.com> | 2022-02-24 10:09:49 +0100 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2022-03-07 21:02:28 -0800 |
commit | 67555104d23aaef9b4ce4995ccb98b2ba9aff07d (patch) | |
tree | 4df088de19c1adbf84b4dd3ca40d003eb3755708 | |
parent | 1e4fd59253c60c78a222f85c42501302142a3586 (diff) |
[MachineSink] Disable if there are any irreducible cycles
This is an alternative to D120330, which disables MachineSink for
functions with irreducible cycles entirely. This avoids both the
correctness problem, and ensures we don't perform non-profitable
sinks into cycles. At the same time, it may also disable
profitable sinks in the same function. This can be made more
precise by using MachineCycleInfo in the future.
Fixes https://github.com/llvm/llvm-project/issues/53990.
Differential Revision: https://reviews.llvm.org/D120800
(cherry picked from commit 6fde0439512580df793f3f48f95757b47de40d2b)
-rw-r--r-- | llvm/lib/CodeGen/MachineSink.cpp | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll | 22 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr38795.ll | 93 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 36 |
5 files changed, 87 insertions, 85 deletions
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 0dbbc218e946..bc03776bde19 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -18,12 +18,14 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -429,6 +431,16 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); RegClassInfo.runOnMachineFunction(MF); + // MachineSink currently uses MachineLoopInfo, which only recognizes natural + // loops. As such, we could sink instructions into irreducible cycles, which + // would be non-profitable. + // WARNING: The current implementation of hasStoreBetween() is incorrect for + // sinking into irreducible cycles (PR53990), this bailout is currently + // necessary for correctness, not just profitability. + ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin()); + if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *LI)) + return false; + bool EverMadeChange = false; while (true) { diff --git a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll index 024b6c608aba..f93e181d157c 100644 --- a/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll @@ -24,7 +24,7 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind { ; CHECK-NEXT: movq %r15, %rdi ; CHECK-NEXT: callq l ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: jne .LBB0_9 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: cmpl $0, e(%rip) @@ -44,21 +44,19 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind { ; CHECK-NEXT: callq i ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: orl %r14d, %ebp -; CHECK-NEXT: testl %r13d, %r13d -; CHECK-NEXT: je .LBB0_6 -; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: andl $4, %ebx -; CHECK-NEXT: jmp .LBB0_3 -; CHECK-NEXT: .LBB0_6: # %if.end12 +; CHECK-NEXT: testl %r13d, %r13d +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: # %bb.5: # %if.end12 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: je .LBB0_9 -; CHECK-NEXT: # %bb.7: # %if.then14 +; CHECK-NEXT: je .LBB0_8 +; CHECK-NEXT: # %bb.6: # %if.then14 ; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: jmp .LBB0_10 +; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .Ltmp0: # Block address taken -; CHECK-NEXT: # %bb.8: # %if.then20.critedge +; CHECK-NEXT: # %bb.7: # %if.then20.critedge ; CHECK-NEXT: movl j(%rip), %edi ; CHECK-NEXT: movslq %eax, %rcx ; CHECK-NEXT: movl $1, %esi @@ -71,9 +69,9 @@ define dso_local void @n(i32* %o, i32 %p, i32 %u) nounwind { ; CHECK-NEXT: popq %r15 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: jmp k # TAILCALL -; CHECK-NEXT: .LBB0_9: # %if.else +; CHECK-NEXT: .LBB0_8: # %if.else ; CHECK-NEXT: incq 0 -; CHECK-NEXT: .LBB0_10: # %cleanup +; CHECK-NEXT: .LBB0_9: # %cleanup ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index d805dcad8b6e..b526e4f471b1 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -32,13 +32,14 @@ define dso_local void @fn() { ; CHECK-NEXT: # implicit-def: $ebp ; CHECK-NEXT: jmp .LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_16: # %for.inc +; CHECK-NEXT: .LBB0_15: # %for.inc ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movb %dh, %dl ; CHECK-NEXT: .LBB0_1: # %for.cond ; CHECK-NEXT: # =>This Loop Header: Depth=1 -; CHECK-NEXT: # Child Loop BB0_20 Depth 2 +; CHECK-NEXT: # Child Loop BB0_19 Depth 2 ; CHECK-NEXT: cmpb $8, %dl ; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: ja .LBB0_3 @@ -55,7 +56,7 @@ define dso_local void @fn() { ; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h ; CHECK-NEXT: cmpb $8, %dl -; CHECK-NEXT: jg .LBB0_8 +; CHECK-NEXT: jg .LBB0_9 ; CHECK-NEXT: # %bb.5: # %if.then13 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl %eax, %esi @@ -64,12 +65,10 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: # implicit-def: $eax -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload ; CHECK-NEXT: movb %dh, %dl -; CHECK-NEXT: jne .LBB0_16 +; CHECK-NEXT: jne .LBB0_15 ; CHECK-NEXT: jmp .LBB0_6 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %if.then @@ -78,82 +77,82 @@ define dso_local void @fn() { ; CHECK-NEXT: calll printf ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: # implicit-def: $eax +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: jmp .LBB0_6 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_9: # %if.end21 +; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: # implicit-def: $ebp +; CHECK-NEXT: jmp .LBB0_10 +; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movb %dl, %dh ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_7 -; CHECK-NEXT: .LBB0_11: # %af +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: movl $0, %edi +; CHECK-NEXT: movb %cl, %dl +; CHECK-NEXT: je .LBB0_19 +; CHECK-NEXT: # %bb.7: # %af ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_12 -; CHECK-NEXT: .LBB0_17: # %if.end39 +; CHECK-NEXT: jne .LBB0_8 +; CHECK-NEXT: .LBB0_16: # %if.end39 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je .LBB0_19 -; CHECK-NEXT: # %bb.18: # %if.then41 +; CHECK-NEXT: je .LBB0_18 +; CHECK-NEXT: # %bb.17: # %if.then41 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $fn, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: .LBB0_19: # %for.end46 +; CHECK-NEXT: .LBB0_18: # %for.end46 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %esi, %edi ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $dh ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_20 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_8: # %if.end21 -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: jmp .LBB0_9 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: movb %dl, %dh -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_20: # %for.cond47 +; CHECK-NEXT: .LBB0_19: # %for.cond47 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: # %bb.21: # %for.cond47 -; CHECK-NEXT: # in Loop: Header=BB0_20 Depth=2 +; CHECK-NEXT: jne .LBB0_19 +; CHECK-NEXT: # %bb.20: # %for.cond47 +; CHECK-NEXT: # in Loop: Header=BB0_19 Depth=2 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_20 -; CHECK-NEXT: .LBB0_9: # %ae +; CHECK-NEXT: jne .LBB0_19 +; CHECK-NEXT: .LBB0_10: # %ae ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.13: # %if.end26 +; CHECK-NEXT: jne .LBB0_11 +; CHECK-NEXT: # %bb.12: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je .LBB0_16 -; CHECK-NEXT: # %bb.14: # %if.end26 +; CHECK-NEXT: je .LBB0_15 +; CHECK-NEXT: # %bb.13: # %if.end26 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_16 -; CHECK-NEXT: # %bb.15: # %if.then31 +; CHECK-NEXT: jne .LBB0_15 +; CHECK-NEXT: # %bb.14: # %if.then31 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: xorl %ebp, %ebp -; CHECK-NEXT: jmp .LBB0_16 +; CHECK-NEXT: jmp .LBB0_15 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_10: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: .LBB0_11: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: movl %edi, %esi ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: testb %bl, %bl -; CHECK-NEXT: je .LBB0_17 -; CHECK-NEXT: .LBB0_12: # in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: je .LBB0_16 +; CHECK-NEXT: .LBB0_8: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: # implicit-def: $edi ; CHECK-NEXT: # implicit-def: $cl -; CHECK-NEXT: # kill: killed $cl ; CHECK-NEXT: # implicit-def: $dl ; CHECK-NEXT: # implicit-def: $ebp -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: jne .LBB0_11 -; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: jmp .LBB0_6 entry: br label %for.cond diff --git a/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll b/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll index 3d7ff6cbe676..4f56d7b16a87 100644 --- a/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll +++ b/llvm/test/CodeGen/X86/pr53990-incorrect-machine-sink.ll @@ -7,18 +7,15 @@ define void @test(i1 %c, i64* %p, i64* noalias %p2) nounwind { ; CHECK-LABEL: test: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: pushq %r15 ; CHECK-NEXT: pushq %r14 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rsi, %r14 -; CHECK-NEXT: movl %edi, %r15d +; CHECK-NEXT: movl %edi, %r14d +; CHECK-NEXT: movq (%rsi), %rbp ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: jmpq *.LJTI0_0(,%rax,8) ; CHECK-NEXT: .LBB0_1: # %split.3 -; CHECK-NEXT: movq (%r14), %rbp -; CHECK-NEXT: testb $1, %r15b +; CHECK-NEXT: testb $1, %r14b ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.2: # %clobber ; CHECK-NEXT: callq clobber@PLT diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index 0f8bb837f82a..b44895293b41 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -1377,6 +1377,8 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: pushq %rbx ; ENABLE-NEXT: pushq %rax ; ENABLE-NEXT: .cfi_offset %rbx, -24 +; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; ENABLE-NEXT: movl (%rax), %edi ; ENABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; ENABLE-NEXT: cmpb $0, (%rax) ; ENABLE-NEXT: je LBB16_2 @@ -1386,24 +1388,20 @@ define i32 @irreducibleCFG() #4 { ; ENABLE-NEXT: jmp LBB16_1 ; ENABLE-NEXT: LBB16_2: ## %split ; ENABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax +; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: cmpl $0, (%rax) -; ENABLE-NEXT: je LBB16_3 -; ENABLE-NEXT: ## %bb.4: ## %for.body4.i -; ENABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; ENABLE-NEXT: movl (%rax), %edi +; ENABLE-NEXT: je LBB16_4 +; ENABLE-NEXT: ## %bb.3: ## %for.body4.i ; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: callq _something -; ENABLE-NEXT: jmp LBB16_5 -; ENABLE-NEXT: LBB16_3: -; ENABLE-NEXT: xorl %ebx, %ebx ; ENABLE-NEXT: .p2align 4, 0x90 -; ENABLE-NEXT: LBB16_5: ## %for.inc +; ENABLE-NEXT: LBB16_4: ## %for.inc ; ENABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: incl %ebx ; ENABLE-NEXT: cmpl $7, %ebx -; ENABLE-NEXT: jl LBB16_5 -; ENABLE-NEXT: ## %bb.6: ## %fn1.exit +; ENABLE-NEXT: jl LBB16_4 +; ENABLE-NEXT: ## %bb.5: ## %fn1.exit ; ENABLE-NEXT: xorl %eax, %eax ; ENABLE-NEXT: addq $8, %rsp ; ENABLE-NEXT: popq %rbx @@ -1420,6 +1418,8 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: pushq %rbx ; DISABLE-NEXT: pushq %rax ; DISABLE-NEXT: .cfi_offset %rbx, -24 +; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax +; DISABLE-NEXT: movl (%rax), %edi ; DISABLE-NEXT: movq _irreducibleCFGf@GOTPCREL(%rip), %rax ; DISABLE-NEXT: cmpb $0, (%rax) ; DISABLE-NEXT: je LBB16_2 @@ -1429,24 +1429,20 @@ define i32 @irreducibleCFG() #4 { ; DISABLE-NEXT: jmp LBB16_1 ; DISABLE-NEXT: LBB16_2: ## %split ; DISABLE-NEXT: movq _irreducibleCFGb@GOTPCREL(%rip), %rax +; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: cmpl $0, (%rax) -; DISABLE-NEXT: je LBB16_3 -; DISABLE-NEXT: ## %bb.4: ## %for.body4.i -; DISABLE-NEXT: movq _irreducibleCFGa@GOTPCREL(%rip), %rax -; DISABLE-NEXT: movl (%rax), %edi +; DISABLE-NEXT: je LBB16_4 +; DISABLE-NEXT: ## %bb.3: ## %for.body4.i ; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: callq _something -; DISABLE-NEXT: jmp LBB16_5 -; DISABLE-NEXT: LBB16_3: -; DISABLE-NEXT: xorl %ebx, %ebx ; DISABLE-NEXT: .p2align 4, 0x90 -; DISABLE-NEXT: LBB16_5: ## %for.inc +; DISABLE-NEXT: LBB16_4: ## %for.inc ; DISABLE-NEXT: ## =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: incl %ebx ; DISABLE-NEXT: cmpl $7, %ebx -; DISABLE-NEXT: jl LBB16_5 -; DISABLE-NEXT: ## %bb.6: ## %fn1.exit +; DISABLE-NEXT: jl LBB16_4 +; DISABLE-NEXT: ## %bb.5: ## %fn1.exit ; DISABLE-NEXT: xorl %eax, %eax ; DISABLE-NEXT: addq $8, %rsp ; DISABLE-NEXT: popq %rbx |