annotate test/CodeGen/X86/unaligned-32-byte-memops.ll @ 95:afa8332a0e37 LLVM3.8

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Tue, 13 Oct 2015 17:48:58 +0900
parents 60c9769439b8
children 7d135dc70f03
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,+slow-unaligned-mem-32 | FileCheck %s --check-prefix=AVXSLOW
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,-slow-unaligned-mem-32 | FileCheck %s --check-prefix=AVXFAST
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX2
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
4
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
5 ; Don't generate an unaligned 32-byte load on this test if that is slower than two 16-byte loads.
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 define <8 x float> @load32bytes(<8 x float>* %Ap) {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
8 ; AVXSLOW-LABEL: load32bytes:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
9 ; AVXSLOW: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
10 ; AVXSLOW-NEXT: vmovaps (%rdi), %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
11 ; AVXSLOW-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
12 ; AVXSLOW-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
13 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
14 ; AVXFAST-LABEL: load32bytes:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
15 ; AVXFAST: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
16 ; AVXFAST-NEXT: vmovups (%rdi), %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
17 ; AVXFAST-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
18 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
19 ; AVX2-LABEL: load32bytes:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
20 ; AVX2: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
21 ; AVX2-NEXT: vmovups (%rdi), %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
22 ; AVX2-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
23 %A = load <8 x float>, <8 x float>* %Ap, align 16
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 ret <8 x float> %A
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 }
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
27 ; Don't generate an unaligned 32-byte store on this test if that is slower than two 16-byte loads.
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
28
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 define void @store32bytes(<8 x float> %A, <8 x float>* %P) {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
30 ; AVXSLOW-LABEL: store32bytes:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
31 ; AVXSLOW: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
32 ; AVXSLOW-NEXT: vextractf128 $1, %ymm0, 16(%rdi)
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
33 ; AVXSLOW-NEXT: vmovaps %xmm0, (%rdi)
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
34 ; AVXSLOW-NEXT: vzeroupper
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
35 ; AVXSLOW-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
36 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
37 ; AVXFAST-LABEL: store32bytes:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
38 ; AVXFAST: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
39 ; AVXFAST-NEXT: vmovups %ymm0, (%rdi)
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
40 ; AVXFAST-NEXT: vzeroupper
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
41 ; AVXFAST-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
42 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
43 ; AVX2-LABEL: store32bytes:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
44 ; AVX2: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
45 ; AVX2-NEXT: vmovups %ymm0, (%rdi)
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
46 ; AVX2-NEXT: vzeroupper
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
47 ; AVX2-NEXT: retq
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 store <8 x float> %A, <8 x float>* %P, align 16
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 ret void
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 }
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
51
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
52 ; Merge two consecutive 16-byte subvector loads into a single 32-byte load if it's faster.
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
53
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 define <8 x float> @combine_16_byte_loads_no_intrinsic(<4 x float>* %ptr) {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
55 ; AVXSLOW-LABEL: combine_16_byte_loads_no_intrinsic:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
56 ; AVXSLOW: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
57 ; AVXSLOW-NEXT: vmovups 48(%rdi), %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
58 ; AVXSLOW-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
59 ; AVXSLOW-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
60 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
61 ; AVXFAST-LABEL: combine_16_byte_loads_no_intrinsic:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
62 ; AVXFAST: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
63 ; AVXFAST-NEXT: vmovups 48(%rdi), %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
64 ; AVXFAST-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
65 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
66 ; AVX2-LABEL: combine_16_byte_loads_no_intrinsic:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
67 ; AVX2: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
68 ; AVX2-NEXT: vmovups 48(%rdi), %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
69 ; AVX2-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
70 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
71 %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
72 %v1 = load <4 x float>, <4 x float>* %ptr1, align 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
73 %v2 = load <4 x float>, <4 x float>* %ptr2, align 1
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
75 ret <8 x float> %v3
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 }
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
77
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
78 ; If the first load is 32-byte aligned, then the loads should be merged in all cases.
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
79
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
80 define <8 x float> @combine_16_byte_loads_aligned(<4 x float>* %ptr) {
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
81 ; AVXSLOW-LABEL: combine_16_byte_loads_aligned:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
82 ; AVXSLOW: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
83 ; AVXSLOW-NEXT: vmovaps 48(%rdi), %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
84 ; AVXSLOW-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
85 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
86 ; AVXFAST-LABEL: combine_16_byte_loads_aligned:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
87 ; AVXFAST: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
88 ; AVXFAST-NEXT: vmovaps 48(%rdi), %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
89 ; AVXFAST-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
90 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
91 ; AVX2-LABEL: combine_16_byte_loads_aligned:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
92 ; AVX2: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
93 ; AVX2-NEXT: vmovaps 48(%rdi), %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
94 ; AVX2-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
95 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
96 %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
97 %v1 = load <4 x float>, <4 x float>* %ptr1, align 32
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
98 %v2 = load <4 x float>, <4 x float>* %ptr2, align 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
99 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
100 ret <8 x float> %v3
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
101 }
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
102
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
103 ; Swap the order of the shufflevector operands to ensure that the pattern still matches.
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
104
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
105 define <8 x float> @combine_16_byte_loads_no_intrinsic_swap(<4 x float>* %ptr) {
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
106 ; AVXSLOW-LABEL: combine_16_byte_loads_no_intrinsic_swap:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
107 ; AVXSLOW: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
108 ; AVXSLOW-NEXT: vmovups 64(%rdi), %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
109 ; AVXSLOW-NEXT: vinsertf128 $1, 80(%rdi), %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
110 ; AVXSLOW-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
111 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
112 ; AVXFAST-LABEL: combine_16_byte_loads_no_intrinsic_swap:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
113 ; AVXFAST: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
114 ; AVXFAST-NEXT: vmovups 64(%rdi), %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
115 ; AVXFAST-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
116 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
117 ; AVX2-LABEL: combine_16_byte_loads_no_intrinsic_swap:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
118 ; AVX2: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
119 ; AVX2-NEXT: vmovups 64(%rdi), %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
120 ; AVX2-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
121 %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
122 %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 5
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
123 %v1 = load <4 x float>, <4 x float>* %ptr1, align 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
124 %v2 = load <4 x float>, <4 x float>* %ptr2, align 1
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
125 %v3 = shufflevector <4 x float> %v2, <4 x float> %v1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
126 ret <8 x float> %v3
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
127 }
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
128
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
129 ; Check each element type other than float to make sure it is handled correctly.
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
130 ; Use the loaded values with an 'add' to make sure we're using the correct load type.
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
131 ; Don't generate 32-byte loads for integer ops unless we have AVX2.
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
132
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
133 define <4 x i64> @combine_16_byte_loads_i64(<2 x i64>* %ptr, <4 x i64> %x) {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
134 ; AVXSLOW-LABEL: combine_16_byte_loads_i64:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
135 ; AVXSLOW: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
136 ; AVXSLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
137 ; AVXSLOW-NEXT: vpaddq 96(%rdi), %xmm1, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
138 ; AVXSLOW-NEXT: vpaddq 80(%rdi), %xmm0, %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
139 ; AVXSLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
140 ; AVXSLOW-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
141 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
142 ; AVXFAST-LABEL: combine_16_byte_loads_i64:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
143 ; AVXFAST: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
144 ; AVXFAST-NEXT: vextractf128 $1, %ymm0, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
145 ; AVXFAST-NEXT: vpaddq 96(%rdi), %xmm1, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
146 ; AVXFAST-NEXT: vpaddq 80(%rdi), %xmm0, %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
147 ; AVXFAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
148 ; AVXFAST-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
149 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
150 ; AVX2-LABEL: combine_16_byte_loads_i64:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
151 ; AVX2: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
152 ; AVX2-NEXT: vpaddq 80(%rdi), %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
153 ; AVX2-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
154 %ptr1 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 5
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
155 %ptr2 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 6
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
156 %v1 = load <2 x i64>, <2 x i64>* %ptr1, align 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
157 %v2 = load <2 x i64>, <2 x i64>* %ptr2, align 1
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
158 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
159 %v4 = add <4 x i64> %v3, %x
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
160 ret <4 x i64> %v4
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
161 }
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
162
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
163 define <8 x i32> @combine_16_byte_loads_i32(<4 x i32>* %ptr, <8 x i32> %x) {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
164 ; AVXSLOW-LABEL: combine_16_byte_loads_i32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
165 ; AVXSLOW: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
166 ; AVXSLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
167 ; AVXSLOW-NEXT: vpaddd 112(%rdi), %xmm1, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
168 ; AVXSLOW-NEXT: vpaddd 96(%rdi), %xmm0, %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
169 ; AVXSLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
170 ; AVXSLOW-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
171 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
172 ; AVXFAST-LABEL: combine_16_byte_loads_i32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
173 ; AVXFAST: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
174 ; AVXFAST-NEXT: vextractf128 $1, %ymm0, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
175 ; AVXFAST-NEXT: vpaddd 112(%rdi), %xmm1, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
176 ; AVXFAST-NEXT: vpaddd 96(%rdi), %xmm0, %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
177 ; AVXFAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
178 ; AVXFAST-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
179 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
180 ; AVX2-LABEL: combine_16_byte_loads_i32:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
181 ; AVX2: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
182 ; AVX2-NEXT: vpaddd 96(%rdi), %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
183 ; AVX2-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
184 %ptr1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 6
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
185 %ptr2 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 7
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
186 %v1 = load <4 x i32>, <4 x i32>* %ptr1, align 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
187 %v2 = load <4 x i32>, <4 x i32>* %ptr2, align 1
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
188 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
189 %v4 = add <8 x i32> %v3, %x
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
190 ret <8 x i32> %v4
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
191 }
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
192
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
193 define <16 x i16> @combine_16_byte_loads_i16(<8 x i16>* %ptr, <16 x i16> %x) {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
194 ; AVXSLOW-LABEL: combine_16_byte_loads_i16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
195 ; AVXSLOW: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
196 ; AVXSLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
197 ; AVXSLOW-NEXT: vpaddw 128(%rdi), %xmm1, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
198 ; AVXSLOW-NEXT: vpaddw 112(%rdi), %xmm0, %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
199 ; AVXSLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
200 ; AVXSLOW-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
201 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
202 ; AVXFAST-LABEL: combine_16_byte_loads_i16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
203 ; AVXFAST: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
204 ; AVXFAST-NEXT: vextractf128 $1, %ymm0, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
205 ; AVXFAST-NEXT: vpaddw 128(%rdi), %xmm1, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
206 ; AVXFAST-NEXT: vpaddw 112(%rdi), %xmm0, %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
207 ; AVXFAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
208 ; AVXFAST-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
209 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
210 ; AVX2-LABEL: combine_16_byte_loads_i16:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
211 ; AVX2: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
212 ; AVX2-NEXT: vpaddw 112(%rdi), %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
213 ; AVX2-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
214 %ptr1 = getelementptr inbounds <8 x i16>, <8 x i16>* %ptr, i64 7
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
215 %ptr2 = getelementptr inbounds <8 x i16>, <8 x i16>* %ptr, i64 8
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
216 %v1 = load <8 x i16>, <8 x i16>* %ptr1, align 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
217 %v2 = load <8 x i16>, <8 x i16>* %ptr2, align 1
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
218 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
219 %v4 = add <16 x i16> %v3, %x
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
220 ret <16 x i16> %v4
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
221 }
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
222
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
223 define <32 x i8> @combine_16_byte_loads_i8(<16 x i8>* %ptr, <32 x i8> %x) {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
224 ; AVXSLOW-LABEL: combine_16_byte_loads_i8:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
225 ; AVXSLOW: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
226 ; AVXSLOW-NEXT: vextractf128 $1, %ymm0, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
227 ; AVXSLOW-NEXT: vpaddb 144(%rdi), %xmm1, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
228 ; AVXSLOW-NEXT: vpaddb 128(%rdi), %xmm0, %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
229 ; AVXSLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
230 ; AVXSLOW-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
231 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
232 ; AVXFAST-LABEL: combine_16_byte_loads_i8:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
233 ; AVXFAST: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
234 ; AVXFAST-NEXT: vextractf128 $1, %ymm0, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
235 ; AVXFAST-NEXT: vpaddb 144(%rdi), %xmm1, %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
236 ; AVXFAST-NEXT: vpaddb 128(%rdi), %xmm0, %xmm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
237 ; AVXFAST-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
238 ; AVXFAST-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
239 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
240 ; AVX2-LABEL: combine_16_byte_loads_i8:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
241 ; AVX2: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
242 ; AVX2-NEXT: vpaddb 128(%rdi), %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
243 ; AVX2-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
244 %ptr1 = getelementptr inbounds <16 x i8>, <16 x i8>* %ptr, i64 8
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
245 %ptr2 = getelementptr inbounds <16 x i8>, <16 x i8>* %ptr, i64 9
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
246 %v1 = load <16 x i8>, <16 x i8>* %ptr1, align 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
247 %v2 = load <16 x i8>, <16 x i8>* %ptr2, align 1
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
248 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
249 %v4 = add <32 x i8> %v3, %x
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
250 ret <32 x i8> %v4
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
251 }
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
252
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
253 define <4 x double> @combine_16_byte_loads_double(<2 x double>* %ptr, <4 x double> %x) {
95
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
254 ; AVXSLOW-LABEL: combine_16_byte_loads_double:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
255 ; AVXSLOW: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
256 ; AVXSLOW-NEXT: vmovupd 144(%rdi), %xmm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
257 ; AVXSLOW-NEXT: vinsertf128 $1, 160(%rdi), %ymm1, %ymm1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
258 ; AVXSLOW-NEXT: vaddpd %ymm0, %ymm1, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
259 ; AVXSLOW-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
260 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
261 ; AVXFAST-LABEL: combine_16_byte_loads_double:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
262 ; AVXFAST: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
263 ; AVXFAST-NEXT: vaddpd 144(%rdi), %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
264 ; AVXFAST-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
265 ;
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
266 ; AVX2-LABEL: combine_16_byte_loads_double:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
267 ; AVX2: # BB#0:
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
268 ; AVX2-NEXT: vaddpd 144(%rdi), %ymm0, %ymm0
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
269 ; AVX2-NEXT: retq
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
270 %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 9
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
271 %ptr2 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 10
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
272 %v1 = load <2 x double>, <2 x double>* %ptr1, align 1
afa8332a0e37 LLVM 3.8
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents: 83
diff changeset
273 %v2 = load <2 x double>, <2 x double>* %ptr2, align 1
83
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
274 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
275 %v4 = fadd <4 x double> %v3, %x
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
276 ret <4 x double> %v4
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
277 }
60c9769439b8 LLVM 3.7
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents:
diff changeset
278