summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
blob: 7a5f1eae341f8fedd180a8e15b004a2c14c77ae6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck %s

; Test to ensure recursive functions exhibit proper behaviour
; Test to generate fibonacci numbers

define i32 @fib(i32 %n) #0 {
; CHECK-LABEL: define {{[^@]+}}@fib
; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[N]], 0
; CHECK-NEXT:    br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
; CHECK:       cont1:
; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[N]], 1
; CHECK-NEXT:    br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
; CHECK:       cont2:
; CHECK-NEXT:    [[NM1:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT:    [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]])
; CHECK-NEXT:    [[NM2:%.*]] = sub i32 [[N]], 2
; CHECK-NEXT:    [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]])
; CHECK-NEXT:    [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
; CHECK-NEXT:    ret i32 [[RETVAL]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 1
;
  %cmp1 = icmp eq i32 %n, 0
  br i1 %cmp1, label %exit, label %cont1

cont1:
  %cmp2 = icmp eq i32 %n, 1
  br i1 %cmp2, label %exit, label %cont2

cont2:
  %nm1 = sub i32 %n, 1
  %fibm1 = call i32 @fib(i32 %nm1)
  %nm2 = sub i32 %n, 2
  %fibm2 = call i32 @fib(i32 %nm2)
  %retval = add i32 %fibm1, %fibm2

  ret i32 %retval

exit:
  ret i32 1
}

define internal i32 @fib_internal(i32 %n) #0 {
; CHECK-LABEL: define {{[^@]+}}@fib_internal
; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[N]], 0
; CHECK-NEXT:    br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
; CHECK:       cont1:
; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[N]], 1
; CHECK-NEXT:    br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
; CHECK:       cont2:
; CHECK-NEXT:    [[NM1:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT:    [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]])
; CHECK-NEXT:    [[NM2:%.*]] = sub i32 [[N]], 2
; CHECK-NEXT:    [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]])
; CHECK-NEXT:    [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
; CHECK-NEXT:    ret i32 [[RETVAL]]
; CHECK:       exit:
; CHECK-NEXT:    ret i32 1
;
  %cmp1 = icmp eq i32 %n, 0
  br i1 %cmp1, label %exit, label %cont1

cont1:
  %cmp2 = icmp eq i32 %n, 1
  br i1 %cmp2, label %exit, label %cont2

cont2:
  %nm1 = sub i32 %n, 1
  %fibm1 = call i32 @fib_internal(i32 %nm1)
  %nm2 = sub i32 %n, 2
  %fibm2 = call i32 @fib_internal(i32 %nm2)
  %retval = add i32 %fibm1, %fibm2

  ret i32 %retval

exit:
  ret i32 1
}

define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
; CHECK-LABEL: define {{[^@]+}}@kernel
; CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT:    [[R:%.*]] = call i32 @fib(i32 5)
; CHECK-NEXT:    [[R2:%.*]] = call i32 @fib_internal(i32 5)
; CHECK-NEXT:    store i32 [[R]], i32 addrspace(1)* [[M]], align 4
; CHECK-NEXT:    store i32 [[R2]], i32 addrspace(1)* [[M]], align 4
; CHECK-NEXT:    ret void
;
  %r = call i32 @fib(i32 5)
  %r2 = call i32 @fib_internal(i32 5)

  store i32 %r, i32 addrspace(1)* %m
  store i32 %r2, i32 addrspace(1)* %m
  ret void
}

; nounwind and readnone are added to match attributor results.
attributes #0 = { nounwind readnone }
attributes #1 = { "uniform-work-group-size"="true" }
;.
; CHECK: attributes #[[ATTR0]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { nounwind readnone "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.