summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@sifive.com>2022-02-03 09:26:34 -0800
committerCraig Topper <craig.topper@sifive.com>2022-02-03 09:40:47 -0800
commit237eb37260e9091655e2c771155aab9a19cafad7 (patch)
treea12981cbccbc89c36aaf25c0879307ec737dc090
parentd3b87e4a1c5a433f13a6c694107659039a28b63f (diff)
[RISCV] Add FMV_X_W and FMV_X_H to RISCVSExtWRemoval.
Add -target-abi to sextw-removal.ll RUN lines to show benefit on new test case.
-rw-r--r--llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp2
-rw-r--r--llvm/test/CodeGen/RISCV/sextw-removal.ll82
2 files changed, 69 insertions, 15 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
index 715d92b036e3..e4695fb2eaa7 100644
--- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
@@ -89,6 +89,7 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
case RISCV::FCVT_WU_S:
case RISCV::FCVT_W_D:
case RISCV::FCVT_WU_D:
+ case RISCV::FMV_X_W:
// The following aren't W instructions, but are either sign extended from a
// smaller size or put zeros in bits 63:31.
case RISCV::LBU:
@@ -102,6 +103,7 @@ static bool isSignExtendingOpW(const MachineInstr &MI) {
case RISCV::SEXT_B:
case RISCV::SEXT_H:
case RISCV::ZEXT_H_RV64:
+ case RISCV::FMV_X_H:
return true;
// shifting right sufficiently makes the value 32-bit sign-extended
case RISCV::SRAI:
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index c580d26695e0..67c9aa54d9db 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f -target-abi=lp64f \
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64I
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+f -target-abi=lp64f \
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64ZBB
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+f -target-abi=lp64f \
; RUN: -riscv-disable-sextw-removal | FileCheck %s --check-prefix=NOREMOVAL
define void @test1(i32 signext %arg, i32 signext %arg1) nounwind {
@@ -265,19 +265,18 @@ define void @test6(i32 signext %arg, i32 signext %arg1) nounwind {
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; CHECK-NEXT: sraw a0, a0, a1
-; CHECK-NEXT: fmv.w.x ft0, zero
-; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill
+; CHECK-NEXT: fmv.w.x fs0, zero
; CHECK-NEXT: .LBB5_1: # %bb2
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: call baz@plt
-; CHECK-NEXT: fmv.w.x ft0, a0
-; CHECK-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload
-; CHECK-NEXT: feq.s a1, ft0, ft1
-; CHECK-NEXT: fcvt.w.s a0, ft0, rtz
+; CHECK-NEXT: feq.s a1, fa0, fs0
+; CHECK-NEXT: fcvt.w.s a0, fa0, rtz
; CHECK-NEXT: beqz a1, .LBB5_1
; CHECK-NEXT: # %bb.2: # %bb7
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
;
@@ -285,20 +284,19 @@ define void @test6(i32 signext %arg, i32 signext %arg1) nounwind {
; NOREMOVAL: # %bb.0: # %bb
; NOREMOVAL-NEXT: addi sp, sp, -16
; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; NOREMOVAL-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
; NOREMOVAL-NEXT: sraw a0, a0, a1
-; NOREMOVAL-NEXT: fmv.w.x ft0, zero
-; NOREMOVAL-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill
+; NOREMOVAL-NEXT: fmv.w.x fs0, zero
; NOREMOVAL-NEXT: .LBB5_1: # %bb2
; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
; NOREMOVAL-NEXT: sext.w a0, a0
; NOREMOVAL-NEXT: call baz@plt
-; NOREMOVAL-NEXT: fmv.w.x ft0, a0
-; NOREMOVAL-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload
-; NOREMOVAL-NEXT: feq.s a1, ft0, ft1
-; NOREMOVAL-NEXT: fcvt.w.s a0, ft0, rtz
+; NOREMOVAL-NEXT: feq.s a1, fa0, fs0
+; NOREMOVAL-NEXT: fcvt.w.s a0, fa0, rtz
; NOREMOVAL-NEXT: beqz a1, .LBB5_1
; NOREMOVAL-NEXT: # %bb.2: # %bb7
; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; NOREMOVAL-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
; NOREMOVAL-NEXT: addi sp, sp, 16
; NOREMOVAL-NEXT: ret
bb:
@@ -419,3 +417,57 @@ bb2: ; preds = %bb2, %bb
bb7: ; preds = %bb2
ret void
}
+
+define void @test10(i32 signext %arg, i32 signext %arg1) nounwind {
+; CHECK-LABEL: test10:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; CHECK-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; CHECK-NEXT: sraw a0, a0, a1
+; CHECK-NEXT: fmv.w.x fs0, zero
+; CHECK-NEXT: .LBB8_1: # %bb2
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: call baz@plt
+; CHECK-NEXT: feq.s a1, fa0, fs0
+; CHECK-NEXT: fmv.x.w a0, fa0
+; CHECK-NEXT: beqz a1, .LBB8_1
+; CHECK-NEXT: # %bb.2: # %bb7
+; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+;
+; NOREMOVAL-LABEL: test10:
+; NOREMOVAL: # %bb.0: # %bb
+; NOREMOVAL-NEXT: addi sp, sp, -16
+; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; NOREMOVAL-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill
+; NOREMOVAL-NEXT: sraw a0, a0, a1
+; NOREMOVAL-NEXT: fmv.w.x fs0, zero
+; NOREMOVAL-NEXT: .LBB8_1: # %bb2
+; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
+; NOREMOVAL-NEXT: sext.w a0, a0
+; NOREMOVAL-NEXT: call baz@plt
+; NOREMOVAL-NEXT: feq.s a1, fa0, fs0
+; NOREMOVAL-NEXT: fmv.x.w a0, fa0
+; NOREMOVAL-NEXT: beqz a1, .LBB8_1
+; NOREMOVAL-NEXT: # %bb.2: # %bb7
+; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; NOREMOVAL-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload
+; NOREMOVAL-NEXT: addi sp, sp, 16
+; NOREMOVAL-NEXT: ret
+bb:
+ %i = ashr i32 %arg, %arg1
+ br label %bb2
+
+bb2: ; preds = %bb2, %bb
+ %i3 = phi i32 [ %i, %bb ], [ %i5, %bb2 ]
+ %i4 = tail call float @baz(i32 signext %i3)
+ %i5 = bitcast float %i4 to i32
+ %i6 = fcmp oeq float %i4, zeroinitializer
+ br i1 %i6, label %bb7, label %bb2
+
+bb7: ; preds = %bb2
+ ret void
+}