diff options
author | Jay Foad <jay.foad@amd.com> | 2022-02-03 16:02:39 +0000 |
---|---|---|
committer | Jay Foad <jay.foad@amd.com> | 2022-02-03 16:10:51 +0000 |
commit | 42fc05e09c38460d149e8097a3cb1e1f481e7ac2 (patch) | |
tree | f00539a504b70e8275fa08523f97192de39c4875 | |
parent | cf2bc63a1bb36027d79870091c93068f0e9e0f0a (diff) |
[AMDGPU] Tweak tests in noclobber-barrier.ll
Tweak some of the tests to demonstrate
AMDGPUAnnotateUniformValues::visitLoadInst inserting a trivial
getelementptr instruction, just to have somewhere to put amdgpu.uniform
metadata. NFC.
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll | 66 |
1 files changed, 26 insertions, 40 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll index 691d699707c8..afff30bdb521 100644 --- a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll @@ -357,12 +357,12 @@ bb: define protected amdgpu_kernel void @no_alias_store(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { ; CHECK-LABEL: @no_alias_store( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 ; CHECK-NEXT: store i32 0, i32 addrspace(3)* @LDS, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -371,8 +371,7 @@ entry: fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0 - %ld = load i32, i32 addrspace(1)* %gep, align 4 + %ld = load i32, i32 addrspace(1)* %in, align 4 store i32 %ld, i32 addrspace(1)* %out, align 4 ret void } @@ -389,8 +388,7 @@ define protected amdgpu_kernel void @may_alias_store(i32 addrspace(1)* %in, i32 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4 ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4 ; CHECK-NEXT: ret void ; @@ -399,8 +397,7 @@ entry: fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0 - %ld = load i32, i32 addrspace(1)* %gep, align 4 + %ld = load i32, i32 addrspace(1)* %in, align 4 store i32 %ld, i32 addrspace(1)* %out, align 4 ret void } @@ -414,12 +411,12 @@ entry: define protected amdgpu_kernel void @no_alias_volatile_store(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { ; CHECK-LABEL: @no_alias_volatile_store( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 ; CHECK-NEXT: store volatile i32 0, i32 addrspace(3)* @LDS, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -428,8 +425,7 @@ entry: fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0 - %ld = load i32, i32 addrspace(1)* %gep, align 4 + %ld = load i32, i32 addrspace(1)* %in, align 4 store i32 %ld, i32 addrspace(1)* %out, align 4 ret void } @@ -442,16 +438,15 @@ entry: define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { ; CHECK-LABEL: @no_alias_atomic_rmw_relaxed( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 ; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 monotonic, align 4 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; entry: %unused = atomicrmw add i32 addrspace(3)* @LDS, i32 5 monotonic - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0 - %ld = load i32, i32 addrspace(1)* %gep, align 4 + %ld = load i32, i32 addrspace(1)* %in, align 4 store i32 %ld, i32 addrspace(1)* %out, align 4 ret void } @@ -464,12 +459,12 @@ entry: define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i32 %swap) { ; CHECK-LABEL: @no_alias_atomic_cmpxchg( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 ; CHECK-NEXT: [[UNUSED:%.*]] = cmpxchg i32 addrspace(3)* @LDS, i32 7, i32 [[SWAP:%.*]] seq_cst monotonic, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -478,8 +473,7 @@ entry: fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0 - %ld = load i32, i32 addrspace(1)* %gep, align 4 + %ld = load i32, i32 addrspace(1)* %in, align 4 store i32 %ld, i32 addrspace(1)* %out, align 4 ret void } @@ -492,12 +486,12 @@ entry: define protected amdgpu_kernel void @no_alias_atomic_rmw(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { ; CHECK-LABEL: @no_alias_atomic_rmw( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 ; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -506,8 +500,7 @@ entry: fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0 - %ld = load i32, i32 addrspace(1)* %gep, align 4 + %ld = load i32, i32 addrspace(1)* %in, align 4 store i32 %ld, i32 addrspace(1)* %out, align 4 ret void } @@ -523,8 +516,7 @@ define protected amdgpu_kernel void @may_alias_atomic_cmpxchg(i32 addrspace(1)* ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4 ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4 ; CHECK-NEXT: ret void ; @@ -533,8 +525,7 @@ entry: fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0 - %ld = load i32, i32 addrspace(1)* %gep, align 4 + %ld = load i32, i32 addrspace(1)* %in, align 4 store i32 %ld, i32 addrspace(1)* %out, align 4 ret void } @@ -550,8 +541,7 @@ define protected amdgpu_kernel void @may_alias_atomic_rmw(i32 addrspace(1)* %in, ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4 ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4 ; CHECK-NEXT: ret void ; @@ -560,8 +550,7 @@ entry: fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0 - %ld = load i32, i32 addrspace(1)* %gep, align 4 + %ld = load i32, i32 addrspace(1)* %in, align 4 store i32 %ld, i32 addrspace(1)* %out, align 4 ret void } @@ -581,8 +570,7 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw_then_clobber(i32 addrsp ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4 ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4 ; CHECK-NEXT: ret void ; @@ -593,8 +581,7 @@ entry: fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0 - %ld = load i32, i32 addrspace(1)* %gep, align 4 + %ld = load i32, i32 addrspace(1)* %in, align 4 store i32 %ld, i32 addrspace(1)* %out, align 4 ret void } @@ -608,13 +595,13 @@ entry: define protected amdgpu_kernel void @no_alias_atomic_rmw_then_no_alias_store(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i32 addrspace(1)* noalias %noalias) { ; CHECK-LABEL: @no_alias_atomic_rmw_then_no_alias_store( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 ; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[NOALIAS:%.*]], align 4 ; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst, align 4 ; CHECK-NEXT: fence syncscope("workgroup") release ; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier() ; CHECK-NEXT: fence syncscope("workgroup") acquire -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0 -; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0 +; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0 ; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4 ; CHECK-NEXT: ret void ; @@ -624,8 +611,7 @@ entry: fence syncscope("workgroup") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0 - %ld = load i32, i32 addrspace(1)* %gep, align 4 + %ld = load i32, i32 addrspace(1)* %in, align 4 store i32 %ld, i32 addrspace(1)* %out, align 4 ret void } |