diff options
author | Qiu Chaofan <qiucofan@cn.ibm.com> | 2022-01-22 23:20:14 +0800 |
---|---|---|
committer | Qiu Chaofan <qiucofan@cn.ibm.com> | 2022-01-22 23:20:14 +0800 |
commit | 8dedf9b58bff3589bff8cb422e449c4ee7f11499 (patch) | |
tree | 2c3330fb48d4dbd72de3667b03cc1afe5d116ee0 | |
parent | b27e5459d51fd5ba80a1182e5bd8c0fd5e2e6a49 (diff) |
[PowerPC] Change CTR clobber estimation for 128-bit floating types
Reviewed By: shchenz
Differential Revision: https://reviews.llvm.org/D117459
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll | 52 |
2 files changed, 58 insertions, 4 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index ed28731b8ef2..707c1396e572 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -653,11 +653,17 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, } return true; - } else if (isa<BinaryOperator>(J) && - (J->getType()->getScalarType()->isFP128Ty() || + } else if ((J->getType()->getScalarType()->isFP128Ty() || J->getType()->getScalarType()->isPPC_FP128Ty())) { // Most operations on f128 or ppc_f128 values become calls. return true; + } else if (isa<FCmpInst>(J) && + J->getOperand(0)->getType()->getScalarType()->isFP128Ty()) { + return true; + } else if ((isa<FPTruncInst>(J) || isa<FPExtInst>(J)) && + (cast<CastInst>(J)->getSrcTy()->getScalarType()->isFP128Ty() || + cast<CastInst>(J)->getDestTy()->getScalarType()->isFP128Ty())) { + return true; } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { CastInst *CI = cast<CastInst>(J); diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll index 57095413cdb2..fde8e20212c0 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll @@ -1,5 +1,7 @@ -; RUN: llc -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr9 \ -; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr9 \ +; RUN: -mtriple=powerpc64le-unknown-unknown | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr8 \ +; RUN: -mtriple=powerpc64le-unknown-unknown | FileCheck %s @a = internal global fp128 0xL00000000000000000000000000000000, align 16 @x = internal global [4 x fp128] zeroinitializer, align 16 @@ -29,4 +31,50 @@ for.end: ; preds = %for.body ; CHECK-NOT: call i1 @llvm.loop.decrement.i64(i64 1) } +define void @fpext_ctrloop_fp128(double* %a) { +entry: + br label %for.body + +for.body: + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds double, double* %a, i64 %i.06 + %0 = load double, double* %arrayidx, align 8 + %ext = fpext double %0 to fp128 + %arrayidx1 = getelementptr inbounds [4 x fp128], [4 x fp128]* @y, i64 0, i64 %i.06 + store fp128 %ext, fp128* %arrayidx1, align 16 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond = icmp eq i64 %inc, 4 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void + +; CHECK-LABEL: fpext_ctrloop_fp128 +; CHECK-NOT: call void @llvm.set.loop.iterations.i64(i64 4) +; CHECK-NOT: call i1 @llvm.loop.decrement.i64(i64 1) +} + +define void @fptrunc_ctrloop_fp128(double* %a) { +entry: + br label %for.body + +for.body: + %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds [4 x fp128], [4 x fp128]* @x, i64 0, i64 %i.06 + %0 = load fp128, fp128* %arrayidx, align 16 + %trunc = fptrunc fp128 %0 to double + %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.06 + store double %trunc, double* %arrayidx1, align 16 + %inc = add nuw nsw i64 %i.06, 1 + %exitcond = icmp eq i64 %inc, 4 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void + +; CHECK-LABEL: fptrunc_ctrloop_fp128 +; CHECK-NOT: call void @llvm.set.loop.iterations.i64(i64 4) +; CHECK-NOT: call i1 @llvm.loop.decrement.i64(i64 1) +} + declare void @obfuscate(i8*, ...) local_unnamed_addr #2 |