diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll | 35 |
1 files changed, 16 insertions, 19 deletions
diff --git a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll index f8390f581157..3996024a6a1f 100644 --- a/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-store-i8-stride-3.ll @@ -233,22 +233,22 @@ define void @store_i8_stride3_vf16(<16 x i8>* %in.vecptr0, <16 x i8>* %in.vecptr ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm8[2,1,3,3,4,5,6,7] ; SSE-NEXT: pshufhw {{.*#+}} xmm2 = xmm0[0,1,2,3,4,4,4,4] ; SSE-NEXT: movdqa {{.*#+}} xmm0 = [255,0,255,255,0,255,255,0,255,255,0,255,255,0,255,255] -; SSE-NEXT: movdqa %xmm0, %xmm3 -; SSE-NEXT: pandn %xmm2, %xmm3 +; SSE-NEXT: movdqa %xmm0, %xmm4 +; SSE-NEXT: pandn %xmm2, %xmm4 ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,1,2,3] ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] ; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,2,2,3,4,5,6,7] ; SSE-NEXT: pshufhw {{.*#+}} xmm6 = xmm2[0,1,2,3,4,5,5,6] ; SSE-NEXT: pand %xmm0, %xmm6 -; SSE-NEXT: por %xmm3, %xmm6 +; SSE-NEXT: por %xmm4, %xmm6 ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,0,255,255,0,255,255,0,255,255,0,255,255,0,255] ; SSE-NEXT: pand %xmm2, %xmm6 -; SSE-NEXT: pshuflw {{.*#+}} xmm3 = xmm5[3,3,3,3,4,5,6,7] -; SSE-NEXT: pshufhw {{.*#+}} xmm7 = xmm3[0,1,2,3,4,4,6,5] -; SSE-NEXT: movdqa %xmm2, %xmm3 -; SSE-NEXT: pandn %xmm7, %xmm3 -; SSE-NEXT: por %xmm6, %xmm3 +; SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm5[3,3,3,3,4,5,6,7] +; SSE-NEXT: pshufhw {{.*#+}} xmm7 = xmm4[0,1,2,3,4,4,6,5] +; SSE-NEXT: movdqa %xmm2, %xmm4 +; SSE-NEXT: pandn %xmm7, %xmm4 +; SSE-NEXT: por %xmm6, %xmm4 ; SSE-NEXT: movdqa %xmm1, %xmm6 ; SSE-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm1[0],xmm6[1],xmm1[1],xmm6[2],xmm1[2],xmm6[3],xmm1[3],xmm6[4],xmm1[4],xmm6[5],xmm1[5],xmm6[6],xmm1[6],xmm6[7],xmm1[7] ; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,1,1,2] @@ -264,9 +264,9 @@ define void @store_i8_stride3_vf16(<16 x i8>* %in.vecptr0, <16 x i8>* %in.vecptr ; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm8[0,1,0,1] ; SSE-NEXT: pshuflw {{.*#+}} xmm7 = xmm7[0,0,0,0,4,5,6,7] ; SSE-NEXT: pshufhw {{.*#+}} xmm7 = xmm7[0,1,2,3,5,5,6,6] -; SSE-NEXT: movdqa %xmm2, %xmm4 -; SSE-NEXT: pandn %xmm7, %xmm4 -; SSE-NEXT: por %xmm6, %xmm4 +; SSE-NEXT: movdqa %xmm2, %xmm3 +; SSE-NEXT: pandn %xmm7, %xmm3 +; SSE-NEXT: por %xmm6, %xmm3 ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3] ; SSE-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[1,1,2,2,4,5,6,7] ; SSE-NEXT: pshufhw {{.*#+}} xmm5 = xmm5[0,1,2,3,7,7,7,7] @@ -278,14 +278,13 @@ define void @store_i8_stride3_vf16(<16 x i8>* %in.vecptr0, <16 x i8>* %in.vecptr ; SSE-NEXT: por %xmm6, %xmm0 ; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,2,3] -; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,3,0,3,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,2,2,3] ; SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,6,6,7] ; SSE-NEXT: pandn %xmm1, %xmm2 ; SSE-NEXT: por %xmm0, %xmm2 ; SSE-NEXT: movdqa %xmm2, 32(%rcx) -; SSE-NEXT: movdqa %xmm4, (%rcx) -; SSE-NEXT: movdqa %xmm3, 16(%rcx) +; SSE-NEXT: movdqa %xmm3, (%rcx) +; SSE-NEXT: movdqa %xmm4, 16(%rcx) ; SSE-NEXT: retq ; ; AVX-LABEL: store_i8_stride3_vf16: @@ -403,8 +402,7 @@ define void @store_i8_stride3_vf32(<32 x i8>* %in.vecptr0, <32 x i8>* %in.vecptr ; SSE-NEXT: pand %xmm4, %xmm1 ; SSE-NEXT: movdqa %xmm7, %xmm2 ; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm7[8],xmm2[9],xmm7[9],xmm2[10],xmm7[10],xmm2[11],xmm7[11],xmm2[12],xmm7[12],xmm2[13],xmm7[13],xmm2[14],xmm7[14],xmm2[15],xmm7[15] -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,2,3] -; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,3,0,3,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,2,2,3] ; SSE-NEXT: pshufhw {{.*#+}} xmm3 = xmm2[0,1,2,3,5,6,6,7] ; SSE-NEXT: movdqa %xmm4, %xmm2 ; SSE-NEXT: pandn %xmm3, %xmm2 @@ -439,8 +437,7 @@ define void @store_i8_stride3_vf32(<32 x i8>* %in.vecptr0, <32 x i8>* %in.vecptr ; SSE-NEXT: pand %xmm4, %xmm3 ; SSE-NEXT: movdqa %xmm13, %xmm7 ; SSE-NEXT: punpckhbw {{.*#+}} xmm7 = xmm7[8],xmm13[8],xmm7[9],xmm13[9],xmm7[10],xmm13[10],xmm7[11],xmm13[11],xmm7[12],xmm13[12],xmm7[13],xmm13[13],xmm7[14],xmm13[14],xmm7[15],xmm13[15] -; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm7[2,1,2,3] -; SSE-NEXT: pshuflw {{.*#+}} xmm7 = xmm7[0,3,0,3,4,5,6,7] +; SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,2,2,3] ; SSE-NEXT: pshufhw {{.*#+}} xmm7 = xmm7[0,1,2,3,5,6,6,7] ; SSE-NEXT: movdqa %xmm4, %xmm5 ; SSE-NEXT: pandn %xmm7, %xmm5 |