summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Foad <jay.foad@amd.com>2022-02-03 16:02:39 +0000
committerJay Foad <jay.foad@amd.com>2022-02-03 16:10:51 +0000
commit42fc05e09c38460d149e8097a3cb1e1f481e7ac2 (patch)
treef00539a504b70e8275fa08523f97192de39c4875
parentcf2bc63a1bb36027d79870091c93068f0e9e0f0a (diff)
[AMDGPU] Tweak tests in noclobber-barrier.ll
Tweak some of the tests to demonstrate AMDGPUAnnotateUniformValues::visitLoadInst inserting a trivial getelementptr instruction, just to have somewhere to put amdgpu.uniform metadata. NFC.
-rw-r--r--llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll66
1 files changed, 26 insertions, 40 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
index 691d699707c8..afff30bdb521 100644
--- a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
@@ -357,12 +357,12 @@ bb:
define protected amdgpu_kernel void @no_alias_store(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
; CHECK-LABEL: @no_alias_store(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
; CHECK-NEXT: store i32 0, i32 addrspace(3)* @LDS, align 4
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@@ -371,8 +371,7 @@ entry:
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0
- %ld = load i32, i32 addrspace(1)* %gep, align 4
+ %ld = load i32, i32 addrspace(1)* %in, align 4
store i32 %ld, i32 addrspace(1)* %out, align 4
ret void
}
@@ -389,8 +388,7 @@ define protected amdgpu_kernel void @may_alias_store(i32 addrspace(1)* %in, i32
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4
; CHECK-NEXT: ret void
;
@@ -399,8 +397,7 @@ entry:
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0
- %ld = load i32, i32 addrspace(1)* %gep, align 4
+ %ld = load i32, i32 addrspace(1)* %in, align 4
store i32 %ld, i32 addrspace(1)* %out, align 4
ret void
}
@@ -414,12 +411,12 @@ entry:
define protected amdgpu_kernel void @no_alias_volatile_store(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
; CHECK-LABEL: @no_alias_volatile_store(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
; CHECK-NEXT: store volatile i32 0, i32 addrspace(3)* @LDS, align 4
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@@ -428,8 +425,7 @@ entry:
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0
- %ld = load i32, i32 addrspace(1)* %gep, align 4
+ %ld = load i32, i32 addrspace(1)* %in, align 4
store i32 %ld, i32 addrspace(1)* %out, align 4
ret void
}
@@ -442,16 +438,15 @@ entry:
define protected amdgpu_kernel void @no_alias_atomic_rmw_relaxed(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
; CHECK-LABEL: @no_alias_atomic_rmw_relaxed(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 monotonic, align 4
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
entry:
%unused = atomicrmw add i32 addrspace(3)* @LDS, i32 5 monotonic
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0
- %ld = load i32, i32 addrspace(1)* %gep, align 4
+ %ld = load i32, i32 addrspace(1)* %in, align 4
store i32 %ld, i32 addrspace(1)* %out, align 4
ret void
}
@@ -464,12 +459,12 @@ entry:
define protected amdgpu_kernel void @no_alias_atomic_cmpxchg(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i32 %swap) {
; CHECK-LABEL: @no_alias_atomic_cmpxchg(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
; CHECK-NEXT: [[UNUSED:%.*]] = cmpxchg i32 addrspace(3)* @LDS, i32 7, i32 [[SWAP:%.*]] seq_cst monotonic, align 4
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@@ -478,8 +473,7 @@ entry:
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0
- %ld = load i32, i32 addrspace(1)* %gep, align 4
+ %ld = load i32, i32 addrspace(1)* %in, align 4
store i32 %ld, i32 addrspace(1)* %out, align 4
ret void
}
@@ -492,12 +486,12 @@ entry:
define protected amdgpu_kernel void @no_alias_atomic_rmw(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
; CHECK-LABEL: @no_alias_atomic_rmw(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst, align 4
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@@ -506,8 +500,7 @@ entry:
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0
- %ld = load i32, i32 addrspace(1)* %gep, align 4
+ %ld = load i32, i32 addrspace(1)* %in, align 4
store i32 %ld, i32 addrspace(1)* %out, align 4
ret void
}
@@ -523,8 +516,7 @@ define protected amdgpu_kernel void @may_alias_atomic_cmpxchg(i32 addrspace(1)*
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4
; CHECK-NEXT: ret void
;
@@ -533,8 +525,7 @@ entry:
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0
- %ld = load i32, i32 addrspace(1)* %gep, align 4
+ %ld = load i32, i32 addrspace(1)* %in, align 4
store i32 %ld, i32 addrspace(1)* %out, align 4
ret void
}
@@ -550,8 +541,7 @@ define protected amdgpu_kernel void @may_alias_atomic_rmw(i32 addrspace(1)* %in,
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4
; CHECK-NEXT: ret void
;
@@ -560,8 +550,7 @@ entry:
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0
- %ld = load i32, i32 addrspace(1)* %gep, align 4
+ %ld = load i32, i32 addrspace(1)* %in, align 4
store i32 %ld, i32 addrspace(1)* %out, align 4
ret void
}
@@ -581,8 +570,7 @@ define protected amdgpu_kernel void @no_alias_atomic_rmw_then_clobber(i32 addrsp
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[IN:%.*]], align 4
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT]], align 4
; CHECK-NEXT: ret void
;
@@ -593,8 +581,7 @@ entry:
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0
- %ld = load i32, i32 addrspace(1)* %gep, align 4
+ %ld = load i32, i32 addrspace(1)* %in, align 4
store i32 %ld, i32 addrspace(1)* %out, align 4
ret void
}
@@ -608,13 +595,13 @@ entry:
define protected amdgpu_kernel void @no_alias_atomic_rmw_then_no_alias_store(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i32 addrspace(1)* noalias %noalias) {
; CHECK-LABEL: @no_alias_atomic_rmw_then_no_alias_store(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
; CHECK-NEXT: store i32 2, i32 addrspace(1)* [[NOALIAS:%.*]], align 4
; CHECK-NEXT: [[UNUSED:%.*]] = atomicrmw add i32 addrspace(3)* @LDS, i32 5 seq_cst, align 4
; CHECK-NEXT: fence syncscope("workgroup") release
; CHECK-NEXT: tail call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: fence syncscope("workgroup") acquire
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[IN:%.*]], i64 0, !amdgpu.uniform !0
-; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[GEP]], align 4, !amdgpu.noclobber !0
+; CHECK-NEXT: [[LD:%.*]] = load i32, i32 addrspace(1)* [[TMP0]], align 4, !amdgpu.noclobber !0
; CHECK-NEXT: store i32 [[LD]], i32 addrspace(1)* [[OUT:%.*]], align 4
; CHECK-NEXT: ret void
;
@@ -624,8 +611,7 @@ entry:
fence syncscope("workgroup") release
tail call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
- %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i64 0
- %ld = load i32, i32 addrspace(1)* %gep, align 4
+ %ld = load i32, i32 addrspace(1)* %in, align 4
store i32 %ld, i32 addrspace(1)* %out, align 4
ret void
}