0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 ; RUN: opt < %s -instcombine -S | FileCheck %s
|
77
|
2 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 define i16 @test1(float %f) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 ; CHECK-LABEL: @test1(
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 ; CHECK: fmul float
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 ; CHECK-NOT: insertelement {{.*}} 0.00
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 ; CHECK-NOT: call {{.*}} @llvm.x86.sse.mul
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 ; CHECK-NOT: call {{.*}} @llvm.x86.sse.sub
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 ; CHECK: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 %tmp = insertelement <4 x float> undef, float %f, i32 0 ; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 %tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 %tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > ) ; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 %tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer ) ; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 %tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 ) ; <i32> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 %tmp69 = trunc i32 %tmp.upgrd.1 to i16 ; <i16> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 ret i16 %tmp69
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 define i32 @test2(float %f) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 ; CHECK-LABEL: @test2(
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
27 ; CHECK-NOT: insertelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
28 ; CHECK-NOT: extractelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 ; CHECK: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 %tmp5 = fmul float %f, %f
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 %tmp9 = insertelement <4 x float> undef, float %tmp5, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 %tmp19 = bitcast <4 x float> %tmp12 to <4 x i32>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 %tmp21 = extractelement <4 x i32> %tmp19, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 ret i32 %tmp21
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 define i64 @test3(float %f, double %d) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 ; CHECK-LABEL: @test3(
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42 ; CHECK-NOT: insertelement {{.*}} 0.00
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 ; CHECK: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 %v00 = insertelement <4 x float> undef, float %f, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 %tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 %v10 = insertelement <4 x float> undef, float %f, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
51 %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
52 %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
53 %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54 %tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 %v20 = insertelement <4 x float> undef, float %f, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
57 %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
58 %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
59 %tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
60 %v30 = insertelement <4 x float> undef, float %f, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
61 %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
62 %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
63 %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
64 %tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
65 %v40 = insertelement <2 x double> undef, double %d, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
66 %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
67 %tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
68 %v50 = insertelement <2 x double> undef, double %d, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
69 %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
70 %tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
71 %v60 = insertelement <2 x double> undef, double %d, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
72 %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
73 %tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
74 %v70 = insertelement <2 x double> undef, double %d, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
75 %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
76 %tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
77 %tmp8 = add i32 %tmp0, %tmp2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
78 %tmp9 = add i32 %tmp4, %tmp6
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
79 %tmp10 = add i32 %tmp8, %tmp9
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
80 %tmp11 = sext i32 %tmp10 to i64
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
81 %tmp12 = add i64 %tmp1, %tmp3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
82 %tmp13 = add i64 %tmp5, %tmp7
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
83 %tmp14 = add i64 %tmp12, %tmp13
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
84 %tmp15 = add i64 %tmp11, %tmp14
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
85 ret i64 %tmp15
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
86 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
87
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
88 define void @get_image() nounwind {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
89 ; CHECK-LABEL: @get_image(
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
90 ; CHECK-NOT: extractelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
91 ; CHECK: unreachable
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
92 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
93 %0 = call i32 @fgetc(i8* null) nounwind ; <i32> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
94 %1 = trunc i32 %0 to i8 ; <i8> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
95 %tmp2 = insertelement <100 x i8> zeroinitializer, i8 %1, i32 1 ; <<100 x i8>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
96 %tmp1 = extractelement <100 x i8> %tmp2, i32 0 ; <i8> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
97 %2 = icmp eq i8 %tmp1, 80 ; <i1> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
98 br i1 %2, label %bb2, label %bb3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
99
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
100 bb2: ; preds = %entry
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
101 br label %bb3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
102
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
103 bb3: ; preds = %bb2, %entry
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
104 unreachable
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
105 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
106
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
107 ; PR4340
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
108 define void @vac(<4 x float>* nocapture %a) nounwind {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
109 ; CHECK-LABEL: @vac(
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
110 ; CHECK-NOT: load
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
111 ; CHECK: ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
112 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
113 %tmp1 = load <4 x float>* %a ; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
114 %vecins = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 0 ; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
115 %vecins4 = insertelement <4 x float> %vecins, float 0.000000e+00, i32 1; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
116 %vecins6 = insertelement <4 x float> %vecins4, float 0.000000e+00, i32 2; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
117 %vecins8 = insertelement <4 x float> %vecins6, float 0.000000e+00, i32 3; <<4 x float>> [#uses=1]
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
118 store <4 x float> %vecins8, <4 x float>* %a
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
119 ret void
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
120 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
121
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
122 declare i32 @fgetc(i8*)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
123
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
124 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
125
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
126 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
127
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
128 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
130 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
131
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
132 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
133 declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
134 declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
135 declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
136 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
137 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
138 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
139 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
141 ; <rdar://problem/6945110>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
142 define <4 x i32> @kernel3_vertical(<4 x i16> * %src, <8 x i16> * %foo) nounwind {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
143 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
144 %tmp = load <4 x i16>* %src
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
145 %tmp1 = load <8 x i16>* %foo
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
146 ; CHECK: %tmp2 = shufflevector
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
147 %tmp2 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
148 ; pmovzxwd ignores the upper 64-bits of its input; -instcombine should remove this shuffle:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
149 ; CHECK-NOT: shufflevector
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
150 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
151 ; CHECK-NEXT: pmovzxwd
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
152 %0 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
153 ret <4 x i32> %0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
154 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
155 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
156
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
157 define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
158 entry:
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
159 ; CHECK-LABEL: define <4 x float> @dead_shuffle_elt(
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
160 ; CHECK: shufflevector <2 x float> %y, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
161 %shuffle.i = shufflevector <2 x float> %y, <2 x float> %y, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
162 %shuffle9.i = shufflevector <4 x float> %x, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
163 ret <4 x float> %shuffle9.i
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
164 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
165
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
166 define <2 x float> @test_fptrunc(double %f) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
167 ; CHECK-LABEL: @test_fptrunc(
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
168 ; CHECK: insertelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
169 ; CHECK: insertelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
170 ; CHECK-NOT: insertelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
171 %tmp9 = insertelement <4 x double> undef, double %f, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
172 %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
173 %tmp11 = insertelement <4 x double> %tmp10, double 0.000000e+00, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
174 %tmp12 = insertelement <4 x double> %tmp11, double 0.000000e+00, i32 3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
175 %tmp5 = fptrunc <4 x double> %tmp12 to <4 x float>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
176 %ret = shufflevector <4 x float> %tmp5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
177 ret <2 x float> %ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
178 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
179
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
180 define <2 x double> @test_fpext(float %f) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
181 ; CHECK-LABEL: @test_fpext(
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
182 ; CHECK: insertelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
183 ; CHECK: insertelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
184 ; CHECK-NOT: insertelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
185 %tmp9 = insertelement <4 x float> undef, float %f, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
186 %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
187 %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
188 %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
189 %tmp5 = fpext <4 x float> %tmp12 to <4 x double>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
190 %ret = shufflevector <4 x double> %tmp5, <4 x double> undef, <2 x i32> <i32 0, i32 1>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
191 ret <2 x double> %ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
192 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
193
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
194 define <4 x float> @test_select(float %f, float %g) {
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
195 ; CHECK-LABEL: @test_select(
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
196 ; CHECK: %a0 = insertelement <4 x float> undef, float %f, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
197 ; CHECK-NOT: insertelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
198 ; CHECK: %a3 = insertelement <4 x float> %a0, float 3.000000e+00, i32 3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
199 ; CHECK-NOT: insertelement
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
200 ; CHECK: %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
201 %a0 = insertelement <4 x float> undef, float %f, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
202 %a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
203 %a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
204 %a3 = insertelement <4 x float> %a2, float 3.000000e+00, i32 3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
205 %b0 = insertelement <4 x float> undef, float %g, i32 0
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
206 %b1 = insertelement <4 x float> %b0, float 4.000000e+00, i32 1
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
207 %b2 = insertelement <4 x float> %b1, float 5.000000e+00, i32 2
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
208 %b3 = insertelement <4 x float> %b2, float 6.000000e+00, i32 3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
209 %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> %b3
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
210 ret <4 x float> %ret
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
211 }
|
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
212
|
77
|
213 ; We should optimize these two redundant insertqi into one
|
|
214 ; CHECK: define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i)
|
|
215 define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i) {
|
|
216 ; CHECK: call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
|
|
217 ; CHECK-NOT: insertqi
|
|
218 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
|
|
219 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 32)
|
|
220 ret <2 x i64> %2
|
|
221 }
|
0
Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
222
|
77
|
223 ; The result of this insert is the second arg, since the top 64 bits of
|
|
224 ; the result are undefined, and we copy the bottom 64 bits from the
|
|
225 ; second arg
|
|
226 ; CHECK: define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i)
|
|
227 define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
|
|
228 ; CHECK: ret <2 x i64> %i
|
|
229 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
|
|
230 ret <2 x i64> %1
|
|
231 }
|
|
232
|
|
233 ; Test the several types of ranges and ordering that exist for two insertqi
|
|
234 ; CHECK: define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i)
|
|
235 define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i) {
|
|
236 ; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
|
237 ; CHECK: ret <2 x i64> %[[RES]]
|
|
238 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
|
239 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 16)
|
|
240 ret <2 x i64> %2
|
|
241 }
|
|
242
|
|
243 ; CHECK: define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i)
|
|
244 define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i) {
|
|
245 ; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
|
246 ; CHECK: ret <2 x i64> %[[RES]]
|
|
247 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 16)
|
|
248 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
|
|
249 ret <2 x i64> %2
|
|
250 }
|
|
251
|
|
252 ; CHECK: define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i)
|
|
253 define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i) {
|
|
254 ; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
|
255 ; CHECK: ret <2 x i64> %[[RES]]
|
|
256 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
|
257 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 16)
|
|
258 ret <2 x i64> %2
|
|
259 }
|
|
260
|
|
261 ; CHECK: define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i)
|
|
262 define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i) {
|
|
263 ; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
|
264 ; CHECK: ret <2 x i64> %[[RES]]
|
|
265 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 16)
|
|
266 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
|
|
267 ret <2 x i64> %2
|
|
268 }
|
|
269
|
|
270 ; CHECK: define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i)
|
|
271 define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i) {
|
|
272 ; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
|
273 ; CHECK: ret <2 x i64> %[[RES]]
|
|
274 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
|
|
275 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
|
276 ret <2 x i64> %2
|
|
277 }
|
|
278
|
|
279 ; CHECK: define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i)
|
|
280 define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i) {
|
|
281 ; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
|
|
282 ; CHECK: ret <2 x i64> %[[RES]]
|
|
283 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 32)
|
|
284 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
|
|
285 ret <2 x i64> %2
|
|
286 }
|
|
287
|
|
288 ; CHECK: define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i)
|
|
289 define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i) {
|
|
290 ; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
|
291 ; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
|
292 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
|
293 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
|
294 ret <2 x i64> %2
|
|
295 }
|
|
296
|
|
297 ; CHECK: define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i)
|
|
298 define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
|
|
299 ; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
|
300 ; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
|
301 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
|
|
302 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
|
|
303 ret <2 x i64> %2
|
|
304 }
|
|
305
|
83
|
306 ; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i)
|
|
307 define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
|
|
308 ; CHECK: ret <2 x i64> %i
|
|
309 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
|
|
310 ret <2 x i64> %1
|
|
311 }
|
|
312
|
|
313 ; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i)
|
|
314 define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
|
|
315 ; CHECK: ret <2 x i64> undef
|
|
316 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
|
|
317 ret <2 x i64> %1
|
|
318 }
|
|
319
|
|
320 ; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i)
|
|
321 define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
|
|
322 ; CHECK: ret <2 x i64> undef
|
|
323 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
|
|
324 ret <2 x i64> %1
|
|
325 }
|
|
326
|
|
327 ; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i)
|
|
328 define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
|
|
329 ; CHECK: ret <2 x i64> undef
|
|
330 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
|
|
331 ret <2 x i64> %1
|
|
332 }
|
77
|
333
|
|
334 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
|
|
335 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
|
|
336
|
|
337 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
|
|
338 define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
|
|
339 ; CHECK-LABEL: @test_vpermilvar_ps(
|
|
340 ; CHECK: shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
341 %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
|
|
342 ret <4 x float> %a
|
|
343 }
|
|
344
|
|
345 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
|
|
346 define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
|
|
347 ; CHECK-LABEL: @test_vpermilvar_ps_256(
|
|
348 ; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
|
349 %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
|
|
350 ret <8 x float> %a
|
|
351 }
|
|
352
|
|
353 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i32>)
|
|
354 define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
|
|
355 ; CHECK-LABEL: @test_vpermilvar_pd(
|
|
356 ; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
|
|
357 %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> <i32 2, i32 0>)
|
|
358 ret <2 x double> %a
|
|
359 }
|
|
360
|
|
361 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i32>)
|
|
362 define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
|
|
363 ; CHECK-LABEL: @test_vpermilvar_pd_256(
|
|
364 ; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
365 %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> <i32 3, i32 1, i32 2, i32 0>)
|
|
366 ret <4 x double> %a
|
|
367 }
|
|
368
|
|
369 define <4 x float> @test_vpermilvar_ps_zero(<4 x float> %v) {
|
|
370 ; CHECK-LABEL: @test_vpermilvar_ps_zero(
|
|
371 ; CHECK: shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
|
|
372 %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer)
|
|
373 ret <4 x float> %a
|
|
374 }
|
|
375
|
|
376 define <8 x float> @test_vpermilvar_ps_256_zero(<8 x float> %v) {
|
|
377 ; CHECK-LABEL: @test_vpermilvar_ps_256_zero(
|
|
378 ; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
|
|
379 %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer)
|
|
380 ret <8 x float> %a
|
|
381 }
|
|
382
|
|
383 define <2 x double> @test_vpermilvar_pd_zero(<2 x double> %v) {
|
|
384 ; CHECK-LABEL: @test_vpermilvar_pd_zero(
|
|
385 ; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
|
|
386 %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i32> zeroinitializer)
|
|
387 ret <2 x double> %a
|
|
388 }
|
|
389
|
|
390 define <4 x double> @test_vpermilvar_pd_256_zero(<4 x double> %v) {
|
|
391 ; CHECK-LABEL: @test_vpermilvar_pd_256_zero(
|
|
392 ; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
|
|
393 %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i32> zeroinitializer)
|
|
394 ret <4 x double> %a
|
|
395 }
|
|
396
|
|
397 define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
|
|
398 %S = bitcast i32 1 to i32
|
|
399 %1 = zext i32 %S to i64
|
|
400 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
|
|
401 %3 = insertelement <2 x i64> %2, i64 0, i32 1
|
|
402 %4 = bitcast <2 x i64> %3 to <8 x i16>
|
|
403 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
|
|
404 %6 = bitcast <8 x i16> %5 to <4 x i32>
|
|
405 %7 = bitcast <2 x i64> %3 to <4 x i32>
|
|
406 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
|
|
407 %9 = bitcast <4 x i32> %8 to <2 x i64>
|
|
408 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
|
|
409 %11 = bitcast <2 x i64> %10 to <8 x i16>
|
|
410 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
|
|
411 %13 = bitcast <8 x i16> %12 to <4 x i32>
|
|
412 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
|
|
413 %15 = bitcast <4 x i32> %14 to <2 x i64>
|
|
414 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
|
|
415 ret <2 x i64> %16
|
|
416 ; CHECK: test_sse2_1
|
|
417 ; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
|
|
418 }
|
|
419
|
|
420 define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
|
|
421 %S = bitcast i32 1 to i32
|
|
422 %1 = zext i32 %S to i64
|
|
423 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
|
|
424 %3 = insertelement <2 x i64> %2, i64 0, i32 1
|
|
425 %4 = bitcast <2 x i64> %3 to <8 x i16>
|
|
426 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
|
|
427 %6 = bitcast <16 x i16> %5 to <8 x i32>
|
|
428 %7 = bitcast <2 x i64> %3 to <4 x i32>
|
|
429 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
|
|
430 %9 = bitcast <8 x i32> %8 to <4 x i64>
|
|
431 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
|
|
432 %11 = bitcast <4 x i64> %10 to <16 x i16>
|
|
433 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
|
|
434 %13 = bitcast <16 x i16> %12 to <8 x i32>
|
|
435 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
|
|
436 %15 = bitcast <8 x i32> %14 to <4 x i64>
|
|
437 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
|
|
438 ret <4 x i64> %16
|
|
439 ; CHECK: test_avx2_1
|
|
440 ; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
|
|
441 }
|
|
442
|
|
443 define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
|
|
444 %S = bitcast i32 128 to i32
|
|
445 %1 = zext i32 %S to i64
|
|
446 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
|
|
447 %3 = insertelement <2 x i64> %2, i64 0, i32 1
|
|
448 %4 = bitcast <2 x i64> %3 to <8 x i16>
|
|
449 %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
|
|
450 %6 = bitcast <8 x i16> %5 to <4 x i32>
|
|
451 %7 = bitcast <2 x i64> %3 to <4 x i32>
|
|
452 %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
|
|
453 %9 = bitcast <4 x i32> %8 to <2 x i64>
|
|
454 %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
|
|
455 %11 = bitcast <2 x i64> %10 to <8 x i16>
|
|
456 %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
|
|
457 %13 = bitcast <8 x i16> %12 to <4 x i32>
|
|
458 %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
|
|
459 %15 = bitcast <4 x i32> %14 to <2 x i64>
|
|
460 %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
|
|
461 ret <2 x i64> %16
|
|
462 ; CHECK: test_sse2_0
|
|
463 ; CHECK: ret <2 x i64> zeroinitializer
|
|
464 }
|
|
465
|
|
466 define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
|
|
467 %S = bitcast i32 128 to i32
|
|
468 %1 = zext i32 %S to i64
|
|
469 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
|
|
470 %3 = insertelement <2 x i64> %2, i64 0, i32 1
|
|
471 %4 = bitcast <2 x i64> %3 to <8 x i16>
|
|
472 %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
|
|
473 %6 = bitcast <16 x i16> %5 to <8 x i32>
|
|
474 %7 = bitcast <2 x i64> %3 to <4 x i32>
|
|
475 %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
|
|
476 %9 = bitcast <8 x i32> %8 to <4 x i64>
|
|
477 %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
|
|
478 %11 = bitcast <4 x i64> %10 to <16 x i16>
|
|
479 %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
|
|
480 %13 = bitcast <16 x i16> %12 to <8 x i32>
|
|
481 %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
|
|
482 %15 = bitcast <8 x i32> %14 to <4 x i64>
|
|
483 %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
|
|
484 ret <4 x i64> %16
|
|
485 ; CHECK: test_avx2_0
|
|
486 ; CHECK: ret <4 x i64> zeroinitializer
|
|
487 }
|
|
488 define <2 x i64> @test_sse2_psrl_1() nounwind readnone uwtable {
|
|
489 %S = bitcast i32 1 to i32
|
|
490 %1 = zext i32 %S to i64
|
|
491 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
|
|
492 %3 = insertelement <2 x i64> %2, i64 0, i32 1
|
|
493 %4 = bitcast <2 x i64> %3 to <8 x i16>
|
|
494 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
|
|
495 %6 = bitcast <8 x i16> %5 to <4 x i32>
|
|
496 %7 = bitcast <2 x i64> %3 to <4 x i32>
|
|
497 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
|
|
498 %9 = bitcast <4 x i32> %8 to <2 x i64>
|
|
499 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
|
|
500 %11 = bitcast <2 x i64> %10 to <8 x i16>
|
|
501 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
|
|
502 %13 = bitcast <8 x i16> %12 to <4 x i32>
|
|
503 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
|
|
504 %15 = bitcast <4 x i32> %14 to <2 x i64>
|
|
505 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
|
|
506 ret <2 x i64> %16
|
|
507 ; CHECK: test_sse2_psrl_1
|
|
508 ; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
|
|
509 }
|
|
510
|
|
511 define <4 x i64> @test_avx2_psrl_1() nounwind readnone uwtable {
|
|
512 %S = bitcast i32 1 to i32
|
|
513 %1 = zext i32 %S to i64
|
|
514 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
|
|
515 %3 = insertelement <2 x i64> %2, i64 0, i32 1
|
|
516 %4 = bitcast <2 x i64> %3 to <8 x i16>
|
|
517 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
|
|
518 %6 = bitcast <16 x i16> %5 to <8 x i32>
|
|
519 %7 = bitcast <2 x i64> %3 to <4 x i32>
|
|
520 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
|
|
521 %9 = bitcast <8 x i32> %8 to <4 x i64>
|
|
522 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
|
|
523 %11 = bitcast <4 x i64> %10 to <16 x i16>
|
|
524 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
|
|
525 %13 = bitcast <16 x i16> %12 to <8 x i32>
|
|
526 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
|
|
527 %15 = bitcast <8 x i32> %14 to <4 x i64>
|
|
528 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
|
|
529 ret <4 x i64> %16
|
|
530 ; CHECK: test_avx2_psrl_1
|
|
531 ; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
|
|
532 }
|
|
533
|
|
534 define <2 x i64> @test_sse2_psrl_0() nounwind readnone uwtable {
|
|
535 %S = bitcast i32 128 to i32
|
|
536 %1 = zext i32 %S to i64
|
|
537 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
|
|
538 %3 = insertelement <2 x i64> %2, i64 0, i32 1
|
|
539 %4 = bitcast <2 x i64> %3 to <8 x i16>
|
|
540 %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
|
|
541 %6 = bitcast <8 x i16> %5 to <4 x i32>
|
|
542 %7 = bitcast <2 x i64> %3 to <4 x i32>
|
|
543 %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
|
|
544 %9 = bitcast <4 x i32> %8 to <2 x i64>
|
|
545 %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
|
|
546 %11 = bitcast <2 x i64> %10 to <8 x i16>
|
|
547 %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
|
|
548 %13 = bitcast <8 x i16> %12 to <4 x i32>
|
|
549 %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
|
|
550 %15 = bitcast <4 x i32> %14 to <2 x i64>
|
|
551 %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
|
|
552 ret <2 x i64> %16
|
|
553 ; CHECK: test_sse2_psrl_0
|
|
554 ; CHECK: ret <2 x i64> zeroinitializer
|
|
555 }
|
|
556
|
|
557 define <4 x i64> @test_avx2_psrl_0() nounwind readnone uwtable {
|
|
558 %S = bitcast i32 128 to i32
|
|
559 %1 = zext i32 %S to i64
|
|
560 %2 = insertelement <2 x i64> undef, i64 %1, i32 0
|
|
561 %3 = insertelement <2 x i64> %2, i64 0, i32 1
|
|
562 %4 = bitcast <2 x i64> %3 to <8 x i16>
|
|
563 %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
|
|
564 %6 = bitcast <16 x i16> %5 to <8 x i32>
|
|
565 %7 = bitcast <2 x i64> %3 to <4 x i32>
|
|
566 %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
|
|
567 %9 = bitcast <8 x i32> %8 to <4 x i64>
|
|
568 %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
|
|
569 %11 = bitcast <4 x i64> %10 to <16 x i16>
|
|
570 %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
|
|
571 %13 = bitcast <16 x i16> %12 to <8 x i32>
|
|
572 %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
|
|
573 %15 = bitcast <8 x i32> %14 to <4 x i64>
|
|
574 %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
|
|
575 ret <4 x i64> %16
|
|
576 ; CHECK: test_avx2_psrl_0
|
|
577 ; CHECK: ret <4 x i64> zeroinitializer
|
|
578 }
|
|
579
|
|
580 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
|
|
581 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
|
|
582 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
|
|
583 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
|
|
584 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
|
|
585 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
|
|
586 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
|
|
587 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
|
|
588 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
|
|
589 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
|
|
590 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
|
|
591 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
|
|
592 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
|
|
593 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
|
|
594 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
|
|
595 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
|
|
596 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
|
|
597 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
|
|
598 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
|
|
599 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
|
|
600 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
|
|
601 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
|
|
602 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
|
|
603 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
|
|
604
|
|
605 attributes #1 = { nounwind readnone }
|