77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 ; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3 ; Simple load of v4i16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 define <4 x half> @load_64(<4 x half>* nocapture readonly %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 ; CHECK-LABEL: load_64:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 ; CHECK: ldr d0, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 entry:
|
95
|
8 %0 = load <4 x half>, <4 x half>* %a, align 8
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 ret <4 x half> %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 ; Simple load of v8i16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 define <8 x half> @load_128(<8 x half>* nocapture readonly %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 ; CHECK-LABEL: load_128:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 ; CHECK: ldr q0, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 entry:
|
95
|
17 %0 = load <8 x half>, <8 x half>* %a, align 16
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 ret <8 x half> %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 ; Duplicating load to v4i16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 define <4 x half> @load_dup_64(half* nocapture readonly %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 ; CHECK-LABEL: load_dup_64:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 ; CHECK: ld1r { v0.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 entry:
|
95
|
26 %0 = load half, half* %a, align 2
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 %1 = insertelement <4 x half> undef, half %0, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 %2 = shufflevector <4 x half> %1, <4 x half> undef, <4 x i32> zeroinitializer
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 ret <4 x half> %2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 ; Duplicating load to v8i16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 define <8 x half> @load_dup_128(half* nocapture readonly %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 ; CHECK-LABEL: load_dup_128:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 ; CHECK: ld1r { v0.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 entry:
|
95
|
37 %0 = load half, half* %a, align 2
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 %1 = insertelement <8 x half> undef, half %0, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39 %2 = shufflevector <8 x half> %1, <8 x half> undef, <8 x i32> zeroinitializer
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 ret <8 x half> %2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 ; Load to one lane of v4f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 define <4 x half> @load_lane_64(half* nocapture readonly %a, <4 x half> %b) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 ; CHECK-LABEL: load_lane_64:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 ; CHECK: ld1 { v0.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 entry:
|
95
|
48 %0 = load half, half* %a, align 2
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 %1 = insertelement <4 x half> %b, half %0, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 ret <4 x half> %1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
51 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
52
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
53 ; Load to one lane of v8f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54 define <8 x half> @load_lane_128(half* nocapture readonly %a, <8 x half> %b) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 ; CHECK-LABEL: load_lane_128:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 ; CHECK: ld1 { v0.h }[5], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57 entry:
|
95
|
58 %0 = load half, half* %a, align 2
|
77
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
59 %1 = insertelement <8 x half> %b, half %0, i32 5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
60 ret <8 x half> %1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
62
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
63 ; Simple store of v4f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
64 define void @store_64(<4 x half>* nocapture %a, <4 x half> %b) #1 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
65 ; CHECK-LABEL: store_64:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
66 ; CHECK: str d0, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
67 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
68 store <4 x half> %b, <4 x half>* %a, align 8
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 ; Simple store of v8f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
73 define void @store_128(<8 x half>* nocapture %a, <8 x half> %b) #1 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
74 ; CHECK-LABEL: store_128:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
75 ; CHECK: str q0, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
76 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
77 store <8 x half> %b, <8 x half>* %a, align 16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
78 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
79 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
80
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81 ; Store from one lane of v4f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
82 define void @store_lane_64(half* nocapture %a, <4 x half> %b) #1 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
83 ; CHECK-LABEL: store_lane_64:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84 ; CHECK: st1 { v0.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86 %0 = extractelement <4 x half> %b, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
87 store half %0, half* %a, align 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
88 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
89 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
90
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
91 ; Store from one lane of v8f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
92 define void @store_lane_128(half* nocapture %a, <8 x half> %b) #1 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
93 ; CHECK-LABEL: store_lane_128:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
94 ; CHECK: st1 { v0.h }[5], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
96 %0 = extractelement <8 x half> %b, i32 5
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 store half %0, half* %a, align 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101 ; NEON intrinsics - (de-)interleaving loads and stores
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102 declare { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
103 declare { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
104 declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
105 declare void @llvm.aarch64.neon.st2.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106 declare void @llvm.aarch64.neon.st3.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
107 declare void @llvm.aarch64.neon.st4.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>, <4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108 declare { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
109 declare { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
110 declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
111 declare void @llvm.aarch64.neon.st2.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 declare void @llvm.aarch64.neon.st3.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113 declare void @llvm.aarch64.neon.st4.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>, <8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115 ; Load 2 x v4f16 with de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
116 define { <4 x half>, <4 x half> } @load_interleave_64_2(<4 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117 ; CHECK-LABEL: load_interleave_64_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
118 ; CHECK: ld2 { v0.4h, v1.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
119 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
120 %0 = tail call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
121 ret { <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
122 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
123
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
124 ; Load 3 x v4f16 with de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
125 define { <4 x half>, <4 x half>, <4 x half> } @load_interleave_64_3(<4 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
126 ; CHECK-LABEL: load_interleave_64_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
127 ; CHECK: ld3 { v0.4h, v1.4h, v2.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
128 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129 %0 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 ret { <4 x half>, <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
131 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
132
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
133 ; Load 4 x v4f16 with de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
134 define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @load_interleave_64_4(<4 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
135 ; CHECK-LABEL: load_interleave_64_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
136 ; CHECK: ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
137 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
138 %0 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
139 ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
141
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
142 ; Store 2 x v4f16 with interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
143 define void @store_interleave_64_2(<4 x half>* %a, <4 x half> %b, <4 x half> %c) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
144 ; CHECK-LABEL: store_interleave_64_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
145 ; CHECK: st2 { v0.4h, v1.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
146 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
147 tail call void @llvm.aarch64.neon.st2.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
148 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
149 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
150
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
151 ; Store 3 x v4f16 with interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
152 define void @store_interleave_64_3(<4 x half>* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
153 ; CHECK-LABEL: store_interleave_64_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
154 ; CHECK: st3 { v0.4h, v1.4h, v2.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
155 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
156 tail call void @llvm.aarch64.neon.st3.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
157 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
158 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
159
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
160 ; Store 4 x v4f16 with interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
161 define void @store_interleave_64_4(<4 x half>* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
162 ; CHECK-LABEL: store_interleave_64_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
163 ; CHECK: st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
164 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
165 tail call void @llvm.aarch64.neon.st4.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e, <4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
166 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
167 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
168
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
169 ; Load 2 x v8f16 with de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
170 define { <8 x half>, <8 x half> } @load_interleave_128_2(<8 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
171 ; CHECK-LABEL: load_interleave_128_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
172 ; CHECK: ld2 { v0.8h, v1.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
173 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
174 %0 = tail call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
175 ret { <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
176 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
177
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
178 ; Load 3 x v8f16 with de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
179 define { <8 x half>, <8 x half>, <8 x half> } @load_interleave_128_3(<8 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
180 ; CHECK-LABEL: load_interleave_128_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
181 ; CHECK: ld3 { v0.8h, v1.8h, v2.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
182 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
183 %0 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
184 ret { <8 x half>, <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
185 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
186
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
187 ; Load 8 x v8f16 with de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
188 define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @load_interleave_128_4(<8 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
189 ; CHECK-LABEL: load_interleave_128_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
190 ; CHECK: ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
191 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
192 %0 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
193 ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
194 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
195
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
196 ; Store 2 x v8f16 with interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
197 define void @store_interleave_128_2(<8 x half>* %a, <8 x half> %b, <8 x half> %c) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
198 ; CHECK-LABEL: store_interleave_128_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
199 ; CHECK: st2 { v0.8h, v1.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
200 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
201 tail call void @llvm.aarch64.neon.st2.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
202 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
203 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
204
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
205 ; Store 3 x v8f16 with interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
206 define void @store_interleave_128_3(<8 x half>* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
207 ; CHECK-LABEL: store_interleave_128_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
208 ; CHECK: st3 { v0.8h, v1.8h, v2.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
209 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
210 tail call void @llvm.aarch64.neon.st3.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
211 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
212 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
213
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
214 ; Store 8 x v8f16 with interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
215 define void @store_interleave_128_4(<8 x half>* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
216 ; CHECK-LABEL: store_interleave_128_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
217 ; CHECK: st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
218 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
219 tail call void @llvm.aarch64.neon.st4.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e, <8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
220 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
221 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
222
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
223 ; NEON intrinsics - duplicating loads
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
224 declare { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2r.v4f16.p0f16(half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
225 declare { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3r.v4f16.p0f16(half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
226 declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4r.v4f16.p0f16(half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
227 declare { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2r.v8f16.p0f16(half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
228 declare { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3r.v8f16.p0f16(half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
229 declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4r.v8f16.p0f16(half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
230
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
231 ; Load 2 x v4f16 with duplication
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
232 define { <4 x half>, <4 x half> } @load_dup_64_2(half* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
233 ; CHECK-LABEL: load_dup_64_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
234 ; CHECK: ld2r { v0.4h, v1.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
235 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
236 %0 = tail call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2r.v4f16.p0f16(half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
237 ret { <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
238 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
239
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
240 ; Load 3 x v4f16 with duplication
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
241 define { <4 x half>, <4 x half>, <4 x half> } @load_dup_64_3(half* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
242 ; CHECK-LABEL: load_dup_64_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
243 ; CHECK: ld3r { v0.4h, v1.4h, v2.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
244 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
245 %0 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3r.v4f16.p0f16(half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
246 ret { <4 x half>, <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
247 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
248
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
249 ; Load 4 x v4f16 with duplication
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
250 define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @load_dup_64_4(half* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
251 ; CHECK-LABEL: load_dup_64_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
252 ; CHECK: ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
253 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
254 %0 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4r.v4f16.p0f16(half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
255 ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
256 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
257
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
258 ; Load 2 x v8f16 with duplication
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
259 define { <8 x half>, <8 x half> } @load_dup_128_2(half* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
260 ; CHECK-LABEL: load_dup_128_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
261 ; CHECK: ld2r { v0.8h, v1.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
262 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
263 %0 = tail call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2r.v8f16.p0f16(half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
264 ret { <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
265 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
266
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
267 ; Load 3 x v8f16 with duplication
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
268 define { <8 x half>, <8 x half>, <8 x half> } @load_dup_128_3(half* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
269 ; CHECK-LABEL: load_dup_128_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
270 ; CHECK: ld3r { v0.8h, v1.8h, v2.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
271 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
272 %0 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3r.v8f16.p0f16(half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
273 ret { <8 x half>, <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
274 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
275
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
276 ; Load 8 x v8f16 with duplication
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
277 define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @load_dup_128_4(half* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
278 ; CHECK-LABEL: load_dup_128_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
279 ; CHECK: ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
280 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
281 %0 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4r.v8f16.p0f16(half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
282 ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
283 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
284
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
285
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
286 ; NEON intrinsics - loads and stores to/from one lane
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
287 declare { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2lane.v4f16.p0f16(<4 x half>, <4 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
288 declare { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3lane.v4f16.p0f16(<4 x half>, <4 x half>, <4 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
289 declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4lane.v4f16.p0f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
290 declare void @llvm.aarch64.neon.st2lane.v4f16.p0f16(<4 x half>, <4 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
291 declare void @llvm.aarch64.neon.st3lane.v4f16.p0f16(<4 x half>, <4 x half>, <4 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
292 declare void @llvm.aarch64.neon.st4lane.v4f16.p0f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
293 declare { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2lane.v8f16.p0f16(<8 x half>, <8 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
294 declare { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3lane.v8f16.p0f16(<8 x half>, <8 x half>, <8 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
295 declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4lane.v8f16.p0f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
296 declare void @llvm.aarch64.neon.st2lane.v8f16.p0f16(<8 x half>, <8 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
297 declare void @llvm.aarch64.neon.st3lane.v8f16.p0f16(<8 x half>, <8 x half>, <8 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
298 declare void @llvm.aarch64.neon.st4lane.v8f16.p0f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>, i64, half*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
299
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
300 ; Load one lane of 2 x v4f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
301 define { <4 x half>, <4 x half> } @load_lane_64_2(half* %a, <4 x half> %b, <4 x half> %c) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
302 ; CHECK-LABEL: load_lane_64_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
303 ; CHECK: ld2 { v0.h, v1.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
304 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
305 %0 = tail call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
306 ret { <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
307 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
308
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
309 ; Load one lane of 3 x v4f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
310 define { <4 x half>, <4 x half>, <4 x half> } @load_lane_64_3(half* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
311 ; CHECK-LABEL: load_lane_64_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
312 ; CHECK: ld3 { v0.h, v1.h, v2.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
313 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
314 %0 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
315 ret { <4 x half>, <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
316 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
317
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
318 ; Load one lane of 4 x v4f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
319 define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @load_lane_64_4(half* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
320 ; CHECK-LABEL: load_lane_64_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
321 ; CHECK: ld4 { v0.h, v1.h, v2.h, v3.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
322 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
323 %0 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
324 ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
325 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
326
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
327 ; Store one lane of 2 x v4f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
328 define void @store_lane_64_2(half* %a, <4 x half> %b, <4 x half> %c) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
329 ; CHECK-LABEL: store_lane_64_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
330 ; CHECK: st2 { v0.h, v1.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
331 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
332 tail call void @llvm.aarch64.neon.st2lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
333 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
334 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
335
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
336 ; Store one lane of 3 x v4f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
337 define void @store_lane_64_3(half* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
338 ; CHECK-LABEL: store_lane_64_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
339 ; CHECK: st3 { v0.h, v1.h, v2.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
340 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
341 tail call void @llvm.aarch64.neon.st3lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
342 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
343 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
344
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
345 ; Store one lane of 4 x v4f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
346 define void @store_lane_64_4(half* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
347 ; CHECK-LABEL: store_lane_64_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
348 ; CHECK: st4 { v0.h, v1.h, v2.h, v3.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
349 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
350 tail call void @llvm.aarch64.neon.st4lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
351 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
352 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
353
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
354 ; Load one lane of 2 x v8f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
355 define { <8 x half>, <8 x half> } @load_lane_128_2(half* %a, <8 x half> %b, <8 x half> %c) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
356 ; CHECK-LABEL: load_lane_128_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
357 ; CHECK: ld2 { v0.h, v1.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
358 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
359 %0 = tail call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
360 ret { <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
361 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
362
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
363 ; Load one lane of 3 x v8f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
364 define { <8 x half>, <8 x half>, <8 x half> } @load_lane_128_3(half* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
365 ; CHECK-LABEL: load_lane_128_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
366 ; CHECK: ld3 { v0.h, v1.h, v2.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
367 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
368 %0 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
369 ret { <8 x half>, <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
370 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
371
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
372 ; Load one lane of 8 x v8f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
373 define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @load_lane_128_4(half* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
374 ; CHECK-LABEL: load_lane_128_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
375 ; CHECK: ld4 { v0.h, v1.h, v2.h, v3.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
376 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
377 %0 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
378 ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
379 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
380
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
381 ; Store one lane of 2 x v8f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
382 define void @store_lane_128_2(half* %a, <8 x half> %b, <8 x half> %c) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
383 ; CHECK-LABEL: store_lane_128_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
384 ; CHECK: st2 { v0.h, v1.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
385 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
386 tail call void @llvm.aarch64.neon.st2lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
387 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
388 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
389
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
390 ; Store one lane of 3 x v8f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
391 define void @store_lane_128_3(half* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
392 ; CHECK-LABEL: store_lane_128_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
393 ; CHECK: st3 { v0.h, v1.h, v2.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
394 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
395 tail call void @llvm.aarch64.neon.st3lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
396 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
397 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
398
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
399 ; Store one lane of 8 x v8f16
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
400 define void @store_lane_128_4(half* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
401 ; CHECK-LABEL: store_lane_128_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
402 ; CHECK: st4 { v0.h, v1.h, v2.h, v3.h }[2], [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
403 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
404 tail call void @llvm.aarch64.neon.st4lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e, i64 2, half* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
405 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
406 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
407
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
408 ; NEON intrinsics - load/store without interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
409 declare { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x2.v4f16.p0v4f16(<4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
410 declare { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x3.v4f16.p0v4f16(<4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
411 declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x4.v4f16.p0v4f16(<4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
412 declare void @llvm.aarch64.neon.st1x2.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
413 declare void @llvm.aarch64.neon.st1x3.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
414 declare void @llvm.aarch64.neon.st1x4.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>, <4 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
415 declare { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x2.v8f16.p0v8f16(<8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
416 declare { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x3.v8f16.p0v8f16(<8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
417 declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x4.v8f16.p0v8f16(<8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
418 declare void @llvm.aarch64.neon.st1x2.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
419 declare void @llvm.aarch64.neon.st1x3.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
420 declare void @llvm.aarch64.neon.st1x4.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>, <8 x half>*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
421
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
422 ; Load 2 x v4f16 without de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
423 define { <4 x half>, <4 x half> } @load_64_2(<4 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
424 ; CHECK-LABEL: load_64_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
425 ; CHECK: ld1 { v0.4h, v1.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
426 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
427 %0 = tail call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x2.v4f16.p0v4f16(<4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
428 ret { <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
429 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
430
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
431 ; Load 3 x v4f16 without de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
432 define { <4 x half>, <4 x half>, <4 x half> } @load_64_3(<4 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
433 ; CHECK-LABEL: load_64_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
434 ; CHECK: ld1 { v0.4h, v1.4h, v2.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
435 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
436 %0 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x3.v4f16.p0v4f16(<4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
437 ret { <4 x half>, <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
438 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
439
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
440 ; Load 4 x v4f16 without de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
441 define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @load_64_4(<4 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
442 ; CHECK-LABEL: load_64_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
443 ; CHECK: ld1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
444 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
445 %0 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x4.v4f16.p0v4f16(<4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
446 ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
447 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
448
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
449 ; Store 2 x v4f16 without interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
450 define void @store_64_2(<4 x half>* %a, <4 x half> %b, <4 x half> %c) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
451 ; CHECK-LABEL: store_64_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
452 ; CHECK: st1 { v0.4h, v1.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
453 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
454 tail call void @llvm.aarch64.neon.st1x2.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
455 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
456 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
457
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
458 ; Store 3 x v4f16 without interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
459 define void @store_64_3(<4 x half>* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
460 ; CHECK-LABEL: store_64_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
461 ; CHECK: st1 { v0.4h, v1.4h, v2.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
462 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
463 tail call void @llvm.aarch64.neon.st1x3.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
464 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
465 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
466
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
467 ; Store 4 x v4f16 without interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
468 define void @store_64_4(<4 x half>* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
469 ; CHECK-LABEL: store_64_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
470 ; CHECK: st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
471 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
472 tail call void @llvm.aarch64.neon.st1x4.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e, <4 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
473 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
474 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
475
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
476 ; Load 2 x v8f16 without de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
477 define { <8 x half>, <8 x half> } @load_128_2(<8 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
478 ; CHECK-LABEL: load_128_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
479 ; CHECK: ld1 { v0.8h, v1.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
480 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
481 %0 = tail call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x2.v8f16.p0v8f16(<8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
482 ret { <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
483 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
484
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
485 ; Load 3 x v8f16 without de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
486 define { <8 x half>, <8 x half>, <8 x half> } @load_128_3(<8 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
487 ; CHECK-LABEL: load_128_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
488 ; CHECK: ld1 { v0.8h, v1.8h, v2.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
489 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
490 %0 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x3.v8f16.p0v8f16(<8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
491 ret { <8 x half>, <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
492 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
493
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
494 ; Load 8 x v8f16 without de-interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
495 define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @load_128_4(<8 x half>* %a) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
496 ; CHECK-LABEL: load_128_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
497 ; CHECK: ld1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
498 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
499 %0 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x4.v8f16.p0v8f16(<8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
500 ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
501 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
502
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
503 ; Store 2 x v8f16 without interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
504 define void @store_128_2(<8 x half>* %a, <8 x half> %b, <8 x half> %c) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
505 ; CHECK-LABEL: store_128_2:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
506 ; CHECK: st1 { v0.8h, v1.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
507 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
508 tail call void @llvm.aarch64.neon.st1x2.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
509 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
510 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
511
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
512 ; Store 3 x v8f16 without interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
513 define void @store_128_3(<8 x half>* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
514 ; CHECK-LABEL: store_128_3:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
515 ; CHECK: st1 { v0.8h, v1.8h, v2.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
516 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
517 tail call void @llvm.aarch64.neon.st1x3.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
518 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
519 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
520
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
521 ; Store 8 x v8f16 without interleaving
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
522 define void @store_128_4(<8 x half>* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e) #0 {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
523 ; CHECK-LABEL: store_128_4:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
524 ; CHECK: st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
525 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
526 tail call void @llvm.aarch64.neon.st1x4.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e, <8 x half>* %a)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
527 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
528 }
|