summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/extractelement-load.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/extractelement-load.ll')
-rw-r--r--llvm/test/CodeGen/X86/extractelement-load.ll151
1 files changed, 147 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index 2a7ed3a8b4e7..138d60b05ba9 100644
--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=X64,X64-SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -268,7 +268,7 @@ entry:
}
; Test for bad extractions from a VBROADCAST_LOAD of the <2 x i16> non-uniform constant bitcast as <4 x i32>.
-define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* nocapture %1, i16* nocapture %2) {
+define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* nocapture %1, i16* nocapture %2) nounwind {
; X32-SSE2-LABEL: subextract_broadcast_load_constant:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -301,7 +301,7 @@ define void @subextract_broadcast_load_constant(<2 x i16>* nocapture %0, i16* no
ret void
}
-define i32 @multi_use_load_scalarization(<4 x i32>* %p) {
+define i32 @multi_use_load_scalarization(<4 x i32>* %p) nounwind {
; X32-SSE2-LABEL: multi_use_load_scalarization:
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
@@ -335,3 +335,146 @@ define i32 @multi_use_load_scalarization(<4 x i32>* %p) {
%r = extractelement <4 x i32> %v, i64 0
ret i32 %r
}
+
+@n1 = local_unnamed_addr global <8 x i32> <i32 0, i32 42, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0>, align 32
+@zero = internal unnamed_addr global <8 x i32> zeroinitializer, align 32
+
+define i32 @main() nounwind {
+; X32-SSE2-LABEL: main:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: pushl %ebp
+; X32-SSE2-NEXT: movl %esp, %ebp
+; X32-SSE2-NEXT: pushl %esi
+; X32-SSE2-NEXT: andl $-32, %esp
+; X32-SSE2-NEXT: subl $64, %esp
+; X32-SSE2-NEXT: movdqa zero, %xmm0
+; X32-SSE2-NEXT: movaps n1+16, %xmm1
+; X32-SSE2-NEXT: movaps n1, %xmm2
+; X32-SSE2-NEXT: movaps %xmm2, zero
+; X32-SSE2-NEXT: movaps %xmm1, zero+16
+; X32-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2]
+; X32-SSE2-NEXT: movaps %xmm1, {{[0-9]+}}(%esp)
+; X32-SSE2-NEXT: movaps %xmm1, (%esp)
+; X32-SSE2-NEXT: movdqa (%esp), %xmm1
+; X32-SSE2-NEXT: movaps {{[0-9]+}}(%esp), %xmm2
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
+; X32-SSE2-NEXT: movd %xmm2, %eax
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
+; X32-SSE2-NEXT: movd %xmm2, %ecx
+; X32-SSE2-NEXT: xorl %edx, %edx
+; X32-SSE2-NEXT: divl %ecx
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X32-SSE2-NEXT: movd %xmm0, %eax
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
+; X32-SSE2-NEXT: movd %xmm0, %esi
+; X32-SSE2-NEXT: xorl %edx, %edx
+; X32-SSE2-NEXT: divl %esi
+; X32-SSE2-NEXT: addl %ecx, %eax
+; X32-SSE2-NEXT: leal -4(%ebp), %esp
+; X32-SSE2-NEXT: popl %esi
+; X32-SSE2-NEXT: popl %ebp
+; X32-SSE2-NEXT: retl
+;
+; X64-SSSE3-LABEL: main:
+; X64-SSSE3: # %bb.0:
+; X64-SSSE3-NEXT: pushq %rbp
+; X64-SSSE3-NEXT: movq %rsp, %rbp
+; X64-SSSE3-NEXT: andq $-32, %rsp
+; X64-SSSE3-NEXT: subq $64, %rsp
+; X64-SSSE3-NEXT: movdqa zero(%rip), %xmm0
+; X64-SSSE3-NEXT: movq n1@GOTPCREL(%rip), %rax
+; X64-SSSE3-NEXT: movaps (%rax), %xmm1
+; X64-SSSE3-NEXT: movaps 16(%rax), %xmm2
+; X64-SSSE3-NEXT: movaps %xmm1, zero(%rip)
+; X64-SSSE3-NEXT: movaps %xmm2, zero+16(%rip)
+; X64-SSSE3-NEXT: movaps {{.*#+}} xmm1 = [2,2,2,2]
+; X64-SSSE3-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp)
+; X64-SSSE3-NEXT: movaps %xmm1, (%rsp)
+; X64-SSSE3-NEXT: movdqa (%rsp), %xmm1
+; X64-SSSE3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
+; X64-SSSE3-NEXT: movd %xmm2, %eax
+; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
+; X64-SSSE3-NEXT: movd %xmm2, %ecx
+; X64-SSSE3-NEXT: xorl %edx, %edx
+; X64-SSSE3-NEXT: divl %ecx
+; X64-SSSE3-NEXT: movl %eax, %ecx
+; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X64-SSSE3-NEXT: movd %xmm0, %eax
+; X64-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
+; X64-SSSE3-NEXT: movd %xmm0, %esi
+; X64-SSSE3-NEXT: xorl %edx, %edx
+; X64-SSSE3-NEXT: divl %esi
+; X64-SSSE3-NEXT: addl %ecx, %eax
+; X64-SSSE3-NEXT: movq %rbp, %rsp
+; X64-SSSE3-NEXT: popq %rbp
+; X64-SSSE3-NEXT: retq
+;
+; X64-AVX1-LABEL: main:
+; X64-AVX1: # %bb.0:
+; X64-AVX1-NEXT: pushq %rbp
+; X64-AVX1-NEXT: movq %rsp, %rbp
+; X64-AVX1-NEXT: andq $-32, %rsp
+; X64-AVX1-NEXT: subq $64, %rsp
+; X64-AVX1-NEXT: movq n1@GOTPCREL(%rip), %rax
+; X64-AVX1-NEXT: vmovaps (%rax), %ymm0
+; X64-AVX1-NEXT: vmovaps zero(%rip), %xmm1
+; X64-AVX1-NEXT: vmovaps %ymm0, zero(%rip)
+; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
+; X64-AVX1-NEXT: vmovaps %ymm0, (%rsp)
+; X64-AVX1-NEXT: vmovaps (%rsp), %ymm0
+; X64-AVX1-NEXT: vextractps $2, %xmm1, %eax
+; X64-AVX1-NEXT: vextractps $2, %xmm0, %ecx
+; X64-AVX1-NEXT: xorl %edx, %edx
+; X64-AVX1-NEXT: divl %ecx
+; X64-AVX1-NEXT: movl %eax, %ecx
+; X64-AVX1-NEXT: vextractps $1, %xmm1, %eax
+; X64-AVX1-NEXT: vextractps $1, %xmm0, %esi
+; X64-AVX1-NEXT: xorl %edx, %edx
+; X64-AVX1-NEXT: divl %esi
+; X64-AVX1-NEXT: addl %ecx, %eax
+; X64-AVX1-NEXT: movq %rbp, %rsp
+; X64-AVX1-NEXT: popq %rbp
+; X64-AVX1-NEXT: vzeroupper
+; X64-AVX1-NEXT: retq
+;
+; X64-AVX2-LABEL: main:
+; X64-AVX2: # %bb.0:
+; X64-AVX2-NEXT: pushq %rbp
+; X64-AVX2-NEXT: movq %rsp, %rbp
+; X64-AVX2-NEXT: andq $-32, %rsp
+; X64-AVX2-NEXT: subq $64, %rsp
+; X64-AVX2-NEXT: movq n1@GOTPCREL(%rip), %rax
+; X64-AVX2-NEXT: vmovaps (%rax), %ymm0
+; X64-AVX2-NEXT: vmovaps zero(%rip), %xmm1
+; X64-AVX2-NEXT: vmovaps %ymm0, zero(%rip)
+; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2]
+; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
+; X64-AVX2-NEXT: vmovaps (%rsp), %ymm0
+; X64-AVX2-NEXT: vextractps $2, %xmm1, %eax
+; X64-AVX2-NEXT: vextractps $2, %xmm0, %ecx
+; X64-AVX2-NEXT: xorl %edx, %edx
+; X64-AVX2-NEXT: divl %ecx
+; X64-AVX2-NEXT: movl %eax, %ecx
+; X64-AVX2-NEXT: vextractps $1, %xmm1, %eax
+; X64-AVX2-NEXT: vextractps $1, %xmm0, %esi
+; X64-AVX2-NEXT: xorl %edx, %edx
+; X64-AVX2-NEXT: divl %esi
+; X64-AVX2-NEXT: addl %ecx, %eax
+; X64-AVX2-NEXT: movq %rbp, %rsp
+; X64-AVX2-NEXT: popq %rbp
+; X64-AVX2-NEXT: vzeroupper
+; X64-AVX2-NEXT: retq
+ %stackptr = alloca <8 x i32>, align 32
+ %z = load <8 x i32>, <8 x i32>* @zero, align 32
+ %t1 = load <8 x i32>, <8 x i32>* @n1, align 32
+ store <8 x i32> %t1, <8 x i32>* @zero, align 32
+ store volatile <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32>* %stackptr, align 32
+ %stackload = load volatile <8 x i32>, <8 x i32>* %stackptr, align 32
+ %div = udiv <8 x i32> %z, %stackload
+ %e1 = extractelement <8 x i32> %div, i64 1
+ %e2 = extractelement <8 x i32> %div, i64 2
+ %r = add i32 %e1, %e2
+ ret i32 %r
+}