diff options
author | Craig Topper <craig.topper@sifive.com> | 2022-02-03 09:26:34 -0800 |
---|---|---|
committer | Craig Topper <craig.topper@sifive.com> | 2022-02-03 09:40:47 -0800 |
commit | 237eb37260e9091655e2c771155aab9a19cafad7 (patch) | |
tree | a12981cbccbc89c36aaf25c0879307ec737dc090 | |
parent | d3b87e4a1c5a433f13a6c694107659039a28b63f (diff) |
[RISCV] Add FMV_X_W and FMV_X_H to RISCVSExtWRemoval.
Add -target-abi to sextw-removal.ll RUN lines to show benefit on
new test case.
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/sextw-removal.ll | 82 |
2 files changed, 69 insertions, 15 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp index 715d92b036e3..e4695fb2eaa7 100644 --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -89,6 +89,7 @@ static bool isSignExtendingOpW(const MachineInstr &MI) { case RISCV::FCVT_WU_S: case RISCV::FCVT_W_D: case RISCV::FCVT_WU_D: + case RISCV::FMV_X_W: // The following aren't W instructions, but are either sign extended from a // smaller size or put zeros in bits 63:31. case RISCV::LBU: @@ -102,6 +103,7 @@ static bool isSignExtendingOpW(const MachineInstr &MI) { case RISCV::SEXT_B: case RISCV::SEXT_H: case RISCV::ZEXT_H_RV64: + case RISCV::FMV_X_H: return true; // shifting right sufficiently makes the value 32-bit sign-extended case RISCV::SRAI: diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index c580d26695e0..67c9aa54d9db 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f -target-abi=lp64f \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64I -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+f \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+f -target-abi=lp64f \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64ZBB -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+f \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb,+f -target-abi=lp64f \ ; RUN: -riscv-disable-sextw-removal | FileCheck %s --check-prefix=NOREMOVAL define void @test1(i32 signext %arg, i32 signext %arg1) nounwind { @@ -265,19 +265,18 @@ define void @test6(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; CHECK-NEXT: sraw a0, a0, a1 -; CHECK-NEXT: fmv.w.x ft0, zero -; CHECK-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill +; CHECK-NEXT: fmv.w.x fs0, zero ; CHECK-NEXT: .LBB5_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: call baz@plt -; CHECK-NEXT: fmv.w.x ft0, a0 -; CHECK-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload -; CHECK-NEXT: feq.s a1, ft0, ft1 -; CHECK-NEXT: fcvt.w.s a0, ft0, rtz +; CHECK-NEXT: feq.s a1, fa0, fs0 +; CHECK-NEXT: fcvt.w.s a0, fa0, rtz ; CHECK-NEXT: beqz a1, .LBB5_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; @@ -285,20 +284,19 @@ define void @test6(i32 signext %arg, i32 signext %arg1) nounwind { ; NOREMOVAL: # %bb.0: # %bb ; NOREMOVAL-NEXT: addi sp, sp, -16 ; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; NOREMOVAL-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill ; NOREMOVAL-NEXT: sraw a0, a0, a1 -; NOREMOVAL-NEXT: fmv.w.x ft0, zero -; NOREMOVAL-NEXT: fsw ft0, 4(sp) # 4-byte Folded Spill +; NOREMOVAL-NEXT: fmv.w.x fs0, zero ; NOREMOVAL-NEXT: .LBB5_1: # %bb2 ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: call baz@plt -; NOREMOVAL-NEXT: fmv.w.x ft0, a0 -; NOREMOVAL-NEXT: flw ft1, 4(sp) # 4-byte Folded Reload -; NOREMOVAL-NEXT: feq.s a1, ft0, ft1 -; NOREMOVAL-NEXT: fcvt.w.s a0, ft0, rtz +; NOREMOVAL-NEXT: feq.s a1, fa0, fs0 +; NOREMOVAL-NEXT: fcvt.w.s a0, fa0, rtz ; NOREMOVAL-NEXT: beqz a1, .LBB5_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; NOREMOVAL-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload ; NOREMOVAL-NEXT: addi sp, sp, 16 ; NOREMOVAL-NEXT: ret bb: @@ -419,3 +417,57 @@ bb2: ; preds = %bb2, %bb bb7: ; preds = %bb2 ret void } + +define void @test10(i32 signext %arg, i32 signext %arg1) nounwind { +; CHECK-LABEL: test10: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; CHECK-NEXT: sraw a0, a0, a1 +; CHECK-NEXT: fmv.w.x fs0, zero +; CHECK-NEXT: .LBB8_1: # %bb2 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: call baz@plt +; CHECK-NEXT: feq.s a1, fa0, fs0 +; CHECK-NEXT: fmv.x.w a0, fa0 +; CHECK-NEXT: beqz a1, .LBB8_1 +; CHECK-NEXT: # %bb.2: # %bb7 +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +; +; NOREMOVAL-LABEL: test10: +; NOREMOVAL: # %bb.0: # %bb +; NOREMOVAL-NEXT: addi sp, sp, -16 +; NOREMOVAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; NOREMOVAL-NEXT: fsw fs0, 4(sp) # 4-byte Folded Spill +; NOREMOVAL-NEXT: sraw a0, a0, a1 +; NOREMOVAL-NEXT: fmv.w.x fs0, zero +; NOREMOVAL-NEXT: .LBB8_1: # %bb2 +; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: call baz@plt +; NOREMOVAL-NEXT: feq.s a1, fa0, fs0 +; NOREMOVAL-NEXT: fmv.x.w a0, fa0 +; NOREMOVAL-NEXT: beqz a1, .LBB8_1 +; NOREMOVAL-NEXT: # %bb.2: # %bb7 +; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; NOREMOVAL-NEXT: flw fs0, 4(sp) # 4-byte Folded Reload +; NOREMOVAL-NEXT: addi sp, sp, 16 +; NOREMOVAL-NEXT: ret +bb: + %i = ashr i32 %arg, %arg1 + br label %bb2 + +bb2: ; preds = %bb2, %bb + %i3 = phi i32 [ %i, %bb ], [ %i5, %bb2 ] + %i4 = tail call float @baz(i32 signext %i3) + %i5 = bitcast float %i4 to i32 + %i6 = fcmp oeq float %i4, zeroinitializer + br i1 %i6, label %bb7, label %bb2 + +bb7: ; preds = %bb2 + ret void +} |