Mercurial > hg > CbC > CbC_llvm
comparison test/CodeGen/X86/avx512bw-intrinsics.ll @ 95:afa8332a0e37 LLVM3.8
LLVM 3.8
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Oct 2015 17:48:58 +0900 |
parents | 60c9769439b8 |
children | 7d135dc70f03 |
comparison
equal
deleted
inserted
replaced
84:f3e34b893a5f | 95:afa8332a0e37 |
---|---|
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding| FileCheck %s | 1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw --show-mc-encoding| FileCheck %s |
2 | 2 |
3 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { | 3 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { |
4 ; CHECK-LABEL: test_pcmpeq_b | 4 ; CHECK-LABEL: test_pcmpeq_b |
5 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## | 5 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## |
6 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) | 6 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) |
65 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) | 65 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) |
66 | 66 |
67 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { | 67 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { |
68 ; CHECK_LABEL: test_cmp_b_512 | 68 ; CHECK_LABEL: test_cmp_b_512 |
69 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## | 69 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## |
70 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 -1) | 70 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) |
71 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 | 71 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 |
72 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ## | 72 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ## |
73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 -1) | 73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) |
74 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 | 74 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 |
75 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ## | 75 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ## |
76 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 -1) | 76 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) |
77 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 | 77 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 |
78 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ## | 78 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ## |
79 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 -1) | 79 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) |
80 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 | 80 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 |
81 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ## | 81 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ## |
82 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 -1) | 82 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) |
83 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 | 83 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 |
84 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ## | 84 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ## |
85 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 -1) | 85 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) |
86 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 | 86 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 |
87 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ## | 87 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ## |
88 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 -1) | 88 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) |
89 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 | 89 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 |
90 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ## | 90 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ## |
91 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 -1) | 91 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) |
92 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 | 92 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 |
93 ret <8 x i64> %vec7 | 93 ret <8 x i64> %vec7 |
94 } | 94 } |
95 | 95 |
96 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { | 96 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { |
97 ; CHECK_LABEL: test_mask_cmp_b_512 | 97 ; CHECK_LABEL: test_mask_cmp_b_512 |
98 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ## | 98 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ## |
99 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 %mask) | 99 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) |
100 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 | 100 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 |
101 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ## | 101 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ## |
102 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 %mask) | 102 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) |
103 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 | 103 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 |
104 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ## | 104 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ## |
105 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 %mask) | 105 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) |
106 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 | 106 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 |
107 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ## | 107 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ## |
108 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 %mask) | 108 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) |
109 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 | 109 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 |
110 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ## | 110 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ## |
111 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 %mask) | 111 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) |
112 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 | 112 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 |
113 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ## | 113 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ## |
114 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 %mask) | 114 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) |
115 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 | 115 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 |
116 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ## | 116 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ## |
117 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 %mask) | 117 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) |
118 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 | 118 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 |
119 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ## | 119 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ## |
120 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 %mask) | 120 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) |
121 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 | 121 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 |
122 ret <8 x i64> %vec7 | 122 ret <8 x i64> %vec7 |
123 } | 123 } |
124 | 124 |
125 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i8, i64) nounwind readnone | 125 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone |
126 | 126 |
127 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { | 127 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { |
128 ; CHECK_LABEL: test_ucmp_b_512 | 128 ; CHECK_LABEL: test_ucmp_b_512 |
129 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ## | 129 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ## |
130 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 -1) | 130 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1) |
131 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 | 131 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 |
132 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ## | 132 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ## |
133 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 -1) | 133 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1) |
134 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 | 134 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 |
135 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ## | 135 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ## |
136 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 -1) | 136 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1) |
137 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 | 137 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 |
138 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ## | 138 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ## |
139 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 -1) | 139 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1) |
140 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 | 140 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 |
141 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ## | 141 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ## |
142 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 -1) | 142 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1) |
143 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 | 143 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 |
144 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ## | 144 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ## |
145 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 -1) | 145 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1) |
146 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 | 146 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 |
147 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ## | 147 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ## |
148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 -1) | 148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1) |
149 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 | 149 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 |
150 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ## | 150 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ## |
151 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 -1) | 151 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1) |
152 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 | 152 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 |
153 ret <8 x i64> %vec7 | 153 ret <8 x i64> %vec7 |
154 } | 154 } |
155 | 155 |
156 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { | 156 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { |
157 ; CHECK_LABEL: test_mask_ucmp_b_512 | 157 ; CHECK_LABEL: test_mask_ucmp_b_512 |
158 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ## | 158 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ## |
159 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 %mask) | 159 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask) |
160 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 | 160 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 |
161 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ## | 161 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ## |
162 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 %mask) | 162 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask) |
163 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 | 163 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 |
164 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ## | 164 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ## |
165 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 %mask) | 165 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask) |
166 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 | 166 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 |
167 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ## | 167 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ## |
168 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 %mask) | 168 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask) |
169 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 | 169 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 |
170 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ## | 170 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ## |
171 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 %mask) | 171 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask) |
172 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 | 172 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 |
173 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ## | 173 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ## |
174 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 %mask) | 174 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask) |
175 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 | 175 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 |
176 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ## | 176 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ## |
177 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 %mask) | 177 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask) |
178 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 | 178 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 |
179 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ## | 179 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ## |
180 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 %mask) | 180 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask) |
181 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 | 181 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 |
182 ret <8 x i64> %vec7 | 182 ret <8 x i64> %vec7 |
183 } | 183 } |
184 | 184 |
185 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i8, i64) nounwind readnone | 185 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone |
186 | 186 |
187 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { | 187 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { |
188 ; CHECK_LABEL: test_cmp_w_512 | 188 ; CHECK_LABEL: test_cmp_w_512 |
189 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ## | 189 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ## |
190 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 -1) | 190 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) |
191 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 | 191 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 |
192 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ## | 192 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ## |
193 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 -1) | 193 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) |
194 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 | 194 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 |
195 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ## | 195 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ## |
196 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 -1) | 196 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) |
197 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 | 197 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 |
198 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ## | 198 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ## |
199 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 -1) | 199 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) |
200 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 | 200 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 |
201 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ## | 201 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ## |
202 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 -1) | 202 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) |
203 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 | 203 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 |
204 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ## | 204 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ## |
205 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 -1) | 205 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) |
206 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 | 206 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 |
207 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ## | 207 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ## |
208 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 -1) | 208 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) |
209 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 | 209 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 |
210 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ## | 210 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ## |
211 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 -1) | 211 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) |
212 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 | 212 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 |
213 ret <8 x i32> %vec7 | 213 ret <8 x i32> %vec7 |
214 } | 214 } |
215 | 215 |
216 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { | 216 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { |
217 ; CHECK_LABEL: test_mask_cmp_w_512 | 217 ; CHECK_LABEL: test_mask_cmp_w_512 |
218 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ## | 218 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ## |
219 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 %mask) | 219 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) |
220 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 | 220 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 |
221 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ## | 221 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ## |
222 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 %mask) | 222 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) |
223 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 | 223 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 |
224 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ## | 224 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ## |
225 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 %mask) | 225 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) |
226 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 | 226 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 |
227 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ## | 227 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ## |
228 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 %mask) | 228 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) |
229 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 | 229 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 |
230 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ## | 230 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ## |
231 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 %mask) | 231 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) |
232 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 | 232 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 |
233 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ## | 233 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ## |
234 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 %mask) | 234 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) |
235 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 | 235 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 |
236 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ## | 236 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ## |
237 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 %mask) | 237 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) |
238 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 | 238 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 |
239 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ## | 239 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ## |
240 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 %mask) | 240 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) |
241 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 | 241 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 |
242 ret <8 x i32> %vec7 | 242 ret <8 x i32> %vec7 |
243 } | 243 } |
244 | 244 |
245 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i8, i32) nounwind readnone | 245 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone |
246 | 246 |
247 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { | 247 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { |
248 ; CHECK_LABEL: test_ucmp_w_512 | 248 ; CHECK_LABEL: test_ucmp_w_512 |
249 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ## | 249 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ## |
250 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 -1) | 250 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1) |
251 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 | 251 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 |
252 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ## | 252 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ## |
253 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 -1) | 253 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1) |
254 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 | 254 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 |
255 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ## | 255 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ## |
256 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 -1) | 256 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1) |
257 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 | 257 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 |
258 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ## | 258 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ## |
259 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 -1) | 259 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1) |
260 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 | 260 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 |
261 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ## | 261 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ## |
262 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 -1) | 262 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1) |
263 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 | 263 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 |
264 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ## | 264 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ## |
265 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 -1) | 265 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1) |
266 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 | 266 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 |
267 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ## | 267 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ## |
268 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 -1) | 268 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1) |
269 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 | 269 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 |
270 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ## | 270 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ## |
271 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 -1) | 271 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1) |
272 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 | 272 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 |
273 ret <8 x i32> %vec7 | 273 ret <8 x i32> %vec7 |
274 } | 274 } |
275 | 275 |
276 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { | 276 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { |
277 ; CHECK_LABEL: test_mask_ucmp_w_512 | 277 ; CHECK_LABEL: test_mask_ucmp_w_512 |
278 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ## | 278 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ## |
279 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 %mask) | 279 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask) |
280 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 | 280 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 |
281 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ## | 281 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ## |
282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 %mask) | 282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask) |
283 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 | 283 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 |
284 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ## | 284 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ## |
285 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 %mask) | 285 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask) |
286 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 | 286 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 |
287 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ## | 287 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ## |
288 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 %mask) | 288 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask) |
289 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 | 289 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 |
290 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ## | 290 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ## |
291 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 %mask) | 291 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask) |
292 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 | 292 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 |
293 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ## | 293 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ## |
294 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 %mask) | 294 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask) |
295 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 | 295 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 |
296 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ## | 296 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ## |
297 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 %mask) | 297 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask) |
298 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 | 298 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 |
299 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ## | 299 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ## |
300 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 %mask) | 300 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask) |
301 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 | 301 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 |
302 ret <8 x i32> %vec7 | 302 ret <8 x i32> %vec7 |
303 } | 303 } |
304 | 304 |
305 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i8, i32) nounwind readnone | 305 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone |
306 | 306 |
307 ; CHECK-LABEL: test_x86_mask_blend_b_256 | 307 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly |
308 ; CHECK: vpblendmb | 308 |
309 define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) { | 309 ; CHECK-LABEL: test_x86_mask_blend_w_512 |
310 %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1] | 310 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) { |
311 ret <32 x i8> %res | |
312 } | |
313 declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly | |
314 | |
315 ; CHECK-LABEL: test_x86_mask_blend_w_256 | |
316 define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) { | |
317 ; CHECK: vpblendmw | 311 ; CHECK: vpblendmw |
318 %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1] | 312 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1] |
319 ret <16 x i16> %res | 313 ret <32 x i16> %res |
320 } | 314 } |
321 declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly | 315 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly |
322 | 316 |
323 ; CHECK-LABEL: test_x86_mask_blend_b_512 | 317 ; CHECK-LABEL: test_x86_mask_blend_b_512 |
324 ; CHECK: vpblendmb | 318 ; CHECK: vpblendmb |
325 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) { | 319 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) { |
326 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1] | 320 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1] |
327 ret <64 x i8> %res | 321 ret <64 x i8> %res |
328 } | 322 } |
329 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly | 323 |
330 | 324 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { |
331 ; CHECK-LABEL: test_x86_mask_blend_w_512 | 325 ;CHECK-LABEL: test_mask_packs_epi32_rr_512 |
332 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) { | 326 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1] |
333 ; CHECK: vpblendmw | 327 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) |
334 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1] | 328 ret <32 x i16> %res |
335 ret <32 x i16> %res | 329 } |
336 } | 330 |
337 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly | 331 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { |
338 | 332 ;CHECK-LABEL: test_mask_packs_epi32_rrk_512 |
339 ; CHECK-LABEL: test_x86_mask_blend_b_128 | 333 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1] |
340 ; CHECK: vpblendmb | 334 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) |
341 define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) { | 335 ret <32 x i16> %res |
342 %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1] | 336 } |
343 ret <16 x i8> %res | 337 |
344 } | 338 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { |
345 declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly | 339 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_512 |
346 | 340 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1] |
347 ; CHECK-LABEL: test_x86_mask_blend_w_128 | 341 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) |
348 define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) { | 342 ret <32 x i16> %res |
349 ; CHECK: vpblendmw | 343 } |
350 %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1] | 344 |
351 ret <8 x i16> %res | 345 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { |
352 } | 346 ;CHECK-LABEL: test_mask_packs_epi32_rm_512 |
353 declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly | 347 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07] |
348 %b = load <16 x i32>, <16 x i32>* %ptr_b | |
349 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) | |
350 ret <32 x i16> %res | |
351 } | |
352 | |
353 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | |
354 ;CHECK-LABEL: test_mask_packs_epi32_rmk_512 | |
355 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f] | |
356 %b = load <16 x i32>, <16 x i32>* %ptr_b | |
357 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) | |
358 ret <32 x i16> %res | |
359 } | |
360 | |
361 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { | |
362 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_512 | |
363 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07] | |
364 %b = load <16 x i32>, <16 x i32>* %ptr_b | |
365 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) | |
366 ret <32 x i16> %res | |
367 } | |
368 | |
369 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { | |
370 ;CHECK-LABEL: test_mask_packs_epi32_rmb_512 | |
371 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07] | |
372 %q = load i32, i32* %ptr_b | |
373 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | |
374 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | |
375 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) | |
376 ret <32 x i16> %res | |
377 } | |
378 | |
379 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { | |
380 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_512 | |
381 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f] | |
382 %q = load i32, i32* %ptr_b | |
383 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | |
384 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | |
385 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) | |
386 ret <32 x i16> %res | |
387 } | |
388 | |
389 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { | |
390 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512 | |
391 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07] | |
392 %q = load i32, i32* %ptr_b | |
393 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | |
394 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | |
395 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) | |
396 ret <32 x i16> %res | |
397 } | |
398 | |
399 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) | |
400 | |
401 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { | |
402 ;CHECK-LABEL: test_mask_packs_epi16_rr_512 | |
403 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1] | |
404 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) | |
405 ret <64 x i8> %res | |
406 } | |
407 | |
408 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { | |
409 ;CHECK-LABEL: test_mask_packs_epi16_rrk_512 | |
410 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1] | |
411 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) | |
412 ret <64 x i8> %res | |
413 } | |
414 | |
415 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { | |
416 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_512 | |
417 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1] | |
418 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) | |
419 ret <64 x i8> %res | |
420 } | |
421 | |
422 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | |
423 ;CHECK-LABEL: test_mask_packs_epi16_rm_512 | |
424 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07] | |
425 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
426 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) | |
427 ret <64 x i8> %res | |
428 } | |
429 | |
430 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { | |
431 ;CHECK-LABEL: test_mask_packs_epi16_rmk_512 | |
432 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f] | |
433 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
434 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) | |
435 ret <64 x i8> %res | |
436 } | |
437 | |
438 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { | |
439 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_512 | |
440 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07] | |
441 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
442 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) | |
443 ret <64 x i8> %res | |
444 } | |
445 | |
446 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) | |
447 | |
448 | |
449 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) { | |
450 ;CHECK-LABEL: test_mask_packus_epi32_rr_512 | |
451 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 | |
452 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) | |
453 ret <32 x i16> %res | |
454 } | |
455 | |
456 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) { | |
457 ;CHECK-LABEL: test_mask_packus_epi32_rrk_512 | |
458 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm2 {%k1} | |
459 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) | |
460 ret <32 x i16> %res | |
461 } | |
462 | |
463 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) { | |
464 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_512 | |
465 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} | |
466 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) | |
467 ret <32 x i16> %res | |
468 } | |
469 | |
470 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) { | |
471 ;CHECK-LABEL: test_mask_packus_epi32_rm_512 | |
472 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 | |
473 %b = load <16 x i32>, <16 x i32>* %ptr_b | |
474 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) | |
475 ret <32 x i16> %res | |
476 } | |
477 | |
478 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | |
479 ;CHECK-LABEL: test_mask_packus_epi32_rmk_512 | |
480 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm1 {%k1} | |
481 %b = load <16 x i32>, <16 x i32>* %ptr_b | |
482 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) | |
483 ret <32 x i16> %res | |
484 } | |
485 | |
486 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) { | |
487 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_512 | |
488 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} | |
489 %b = load <16 x i32>, <16 x i32>* %ptr_b | |
490 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) | |
491 ret <32 x i16> %res | |
492 } | |
493 | |
494 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) { | |
495 ;CHECK-LABEL: test_mask_packus_epi32_rmb_512 | |
496 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 | |
497 %q = load i32, i32* %ptr_b | |
498 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | |
499 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | |
500 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) | |
501 ret <32 x i16> %res | |
502 } | |
503 | |
504 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) { | |
505 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_512 | |
506 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} | |
507 %q = load i32, i32* %ptr_b | |
508 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | |
509 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | |
510 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) | |
511 ret <32 x i16> %res | |
512 } | |
513 | |
514 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) { | |
515 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512 | |
516 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} | |
517 %q = load i32, i32* %ptr_b | |
518 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0 | |
519 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer | |
520 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask) | |
521 ret <32 x i16> %res | |
522 } | |
523 | |
524 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32) | |
525 | |
526 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { | |
527 ;CHECK-LABEL: test_mask_packus_epi16_rr_512 | |
528 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 | |
529 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) | |
530 ret <64 x i8> %res | |
531 } | |
532 | |
533 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) { | |
534 ;CHECK-LABEL: test_mask_packus_epi16_rrk_512 | |
535 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm2 {%k1} | |
536 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) | |
537 ret <64 x i8> %res | |
538 } | |
539 | |
540 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) { | |
541 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_512 | |
542 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} | |
543 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) | |
544 ret <64 x i8> %res | |
545 } | |
546 | |
547 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | |
548 ;CHECK-LABEL: test_mask_packus_epi16_rm_512 | |
549 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 | |
550 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
551 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) | |
552 ret <64 x i8> %res | |
553 } | |
554 | |
555 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) { | |
556 ;CHECK-LABEL: test_mask_packus_epi16_rmk_512 | |
557 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm1 {%k1} | |
558 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
559 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) | |
560 ret <64 x i8> %res | |
561 } | |
562 | |
563 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) { | |
564 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_512 | |
565 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} | |
566 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
567 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask) | |
568 ret <64 x i8> %res | |
569 } | |
570 | |
571 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64) | |
572 | |
573 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { | |
574 ;CHECK-LABEL: test_mask_adds_epi16_rr_512 | |
575 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 | |
576 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | |
577 ret <32 x i16> %res | |
578 } | |
579 | |
580 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { | |
581 ;CHECK-LABEL: test_mask_adds_epi16_rrk_512 | |
582 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} | |
583 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | |
584 ret <32 x i16> %res | |
585 } | |
586 | |
587 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { | |
588 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_512 | |
589 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} | |
590 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | |
591 ret <32 x i16> %res | |
592 } | |
593 | |
594 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | |
595 ;CHECK-LABEL: test_mask_adds_epi16_rm_512 | |
596 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 | |
597 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
598 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | |
599 ret <32 x i16> %res | |
600 } | |
601 | |
602 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | |
603 ;CHECK-LABEL: test_mask_adds_epi16_rmk_512 | |
604 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} | |
605 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
606 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | |
607 ret <32 x i16> %res | |
608 } | |
609 | |
610 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { | |
611 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_512 | |
612 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} | |
613 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
614 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | |
615 ret <32 x i16> %res | |
616 } | |
617 | |
618 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
619 | |
620 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { | |
621 ;CHECK-LABEL: test_mask_subs_epi16_rr_512 | |
622 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 | |
623 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | |
624 ret <32 x i16> %res | |
625 } | |
626 | |
627 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { | |
628 ;CHECK-LABEL: test_mask_subs_epi16_rrk_512 | |
629 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} | |
630 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | |
631 ret <32 x i16> %res | |
632 } | |
633 | |
634 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { | |
635 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_512 | |
636 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} | |
637 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | |
638 ret <32 x i16> %res | |
639 } | |
640 | |
641 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | |
642 ;CHECK-LABEL: test_mask_subs_epi16_rm_512 | |
643 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 | |
644 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
645 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | |
646 ret <32 x i16> %res | |
647 } | |
648 | |
649 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | |
650 ;CHECK-LABEL: test_mask_subs_epi16_rmk_512 | |
651 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} | |
652 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
653 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | |
654 ret <32 x i16> %res | |
655 } | |
656 | |
657 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { | |
658 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_512 | |
659 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} | |
660 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
661 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | |
662 ret <32 x i16> %res | |
663 } | |
664 | |
665 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
666 | |
667 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { | |
668 ;CHECK-LABEL: test_mask_adds_epu16_rr_512 | |
669 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 | |
670 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | |
671 ret <32 x i16> %res | |
672 } | |
673 | |
674 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { | |
675 ;CHECK-LABEL: test_mask_adds_epu16_rrk_512 | |
676 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} | |
677 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | |
678 ret <32 x i16> %res | |
679 } | |
680 | |
681 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { | |
682 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_512 | |
683 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} | |
684 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | |
685 ret <32 x i16> %res | |
686 } | |
687 | |
688 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | |
689 ;CHECK-LABEL: test_mask_adds_epu16_rm_512 | |
690 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 | |
691 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
692 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | |
693 ret <32 x i16> %res | |
694 } | |
695 | |
696 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | |
697 ;CHECK-LABEL: test_mask_adds_epu16_rmk_512 | |
698 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} | |
699 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
700 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | |
701 ret <32 x i16> %res | |
702 } | |
703 | |
704 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { | |
705 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_512 | |
706 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} | |
707 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
708 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | |
709 ret <32 x i16> %res | |
710 } | |
711 | |
712 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
713 | |
714 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) { | |
715 ;CHECK-LABEL: test_mask_subs_epu16_rr_512 | |
716 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 | |
717 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | |
718 ret <32 x i16> %res | |
719 } | |
720 | |
721 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { | |
722 ;CHECK-LABEL: test_mask_subs_epu16_rrk_512 | |
723 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} | |
724 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | |
725 ret <32 x i16> %res | |
726 } | |
727 | |
728 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { | |
729 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_512 | |
730 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} | |
731 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | |
732 ret <32 x i16> %res | |
733 } | |
734 | |
735 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { | |
736 ;CHECK-LABEL: test_mask_subs_epu16_rm_512 | |
737 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 | |
738 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
739 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1) | |
740 ret <32 x i16> %res | |
741 } | |
742 | |
743 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { | |
744 ;CHECK-LABEL: test_mask_subs_epu16_rmk_512 | |
745 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} | |
746 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
747 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) | |
748 ret <32 x i16> %res | |
749 } | |
750 | |
751 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { | |
752 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_512 | |
753 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} | |
754 %b = load <32 x i16>, <32 x i16>* %ptr_b | |
755 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask) | |
756 ret <32 x i16> %res | |
757 } | |
758 | |
759 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
760 | |
761 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) | |
762 | |
763 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_512 | |
764 ; CHECK-NOT: call | |
765 ; CHECK: vpmaxsb %zmm | |
766 ; CHECK: {%k1} | |
767 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { | |
768 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) | |
769 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) | |
770 %res2 = add <64 x i8> %res, %res1 | |
771 ret <64 x i8> %res2 | |
772 } | |
773 | |
774 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
775 | |
776 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_512 | |
777 ; CHECK-NOT: call | |
778 ; CHECK: vpmaxsw %zmm | |
779 ; CHECK: {%k1} | |
780 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
781 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
782 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
783 %res2 = add <32 x i16> %res, %res1 | |
784 ret <32 x i16> %res2 | |
785 } | |
786 | |
787 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) | |
788 | |
789 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_512 | |
790 ; CHECK-NOT: call | |
791 ; CHECK: vpmaxub %zmm | |
792 ; CHECK: {%k1} | |
793 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { | |
794 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) | |
795 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) | |
796 %res2 = add <64 x i8> %res, %res1 | |
797 ret <64 x i8> %res2 | |
798 } | |
799 | |
800 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
801 | |
802 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_512 | |
803 ; CHECK-NOT: call | |
804 ; CHECK: vpmaxuw %zmm | |
805 ; CHECK: {%k1} | |
806 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
807 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
808 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
809 %res2 = add <32 x i16> %res, %res1 | |
810 ret <32 x i16> %res2 | |
811 } | |
812 | |
813 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) | |
814 | |
815 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_512 | |
816 ; CHECK-NOT: call | |
817 ; CHECK: vpminsb %zmm | |
818 ; CHECK: {%k1} | |
819 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { | |
820 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) | |
821 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) | |
822 %res2 = add <64 x i8> %res, %res1 | |
823 ret <64 x i8> %res2 | |
824 } | |
825 | |
826 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
827 | |
828 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_512 | |
829 ; CHECK-NOT: call | |
830 ; CHECK: vpminsw %zmm | |
831 ; CHECK: {%k1} | |
832 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
833 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
834 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
835 %res2 = add <32 x i16> %res, %res1 | |
836 ret <32 x i16> %res2 | |
837 } | |
838 | |
839 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) | |
840 | |
841 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_512 | |
842 ; CHECK-NOT: call | |
843 ; CHECK: vpminub %zmm | |
844 ; CHECK: {%k1} | |
845 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { | |
846 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) | |
847 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) | |
848 %res2 = add <64 x i8> %res, %res1 | |
849 ret <64 x i8> %res2 | |
850 } | |
851 | |
852 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
853 | |
854 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_512 | |
855 ; CHECK-NOT: call | |
856 ; CHECK: vpminuw %zmm | |
857 ; CHECK: {%k1} | |
858 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
859 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
860 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
861 %res2 = add <32 x i16> %res, %res1 | |
862 ret <32 x i16> %res2 | |
863 } | |
864 | |
865 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
866 | |
867 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512 | |
868 ; CHECK-NOT: call | |
869 ; CHECK: kmov | |
870 ; CHECK: vpermt2w %zmm{{.*}}{%k1} | |
871 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
872 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
873 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
874 %res2 = add <32 x i16> %res, %res1 | |
875 ret <32 x i16> %res2 | |
876 } | |
877 | |
878 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
879 | |
880 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512 | |
881 ; CHECK-NOT: call | |
882 ; CHECK: kmov | |
883 ; CHECK: vpermt2w %zmm{{.*}}{%k1} {z} | |
884 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
885 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
886 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
887 %res2 = add <32 x i16> %res, %res1 | |
888 ret <32 x i16> %res2 | |
889 } | |
890 | |
891 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
892 | |
893 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512 | |
894 ; CHECK-NOT: call | |
895 ; CHECK: kmov | |
896 ; CHECK: vpermi2w %zmm{{.*}}{%k1} | |
897 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
898 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
899 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
900 %res2 = add <32 x i16> %res, %res1 | |
901 ret <32 x i16> %res2 | |
902 } | |
903 | |
904 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) | |
905 | |
906 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512 | |
907 ; CHECK-NOT: call | |
908 ; CHECK: vpavgb %zmm | |
909 ; CHECK: {%k1} | |
910 define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { | |
911 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) | |
912 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) | |
913 %res2 = add <64 x i8> %res, %res1 | |
914 ret <64 x i8> %res2 | |
915 } | |
916 | |
917 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
918 | |
919 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_512 | |
920 ; CHECK-NOT: call | |
921 ; CHECK: vpavgw %zmm | |
922 ; CHECK: {%k1} | |
923 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
924 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
925 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
926 %res2 = add <32 x i16> %res, %res1 | |
927 ret <32 x i16> %res2 | |
928 } | |
929 | |
930 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) | |
931 | |
932 ; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512 | |
933 ; CHECK-NOT: call | |
934 ; CHECK: kmov | |
935 ; CHECK: vpshufb %zmm{{.*}}{%k1} | |
936 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { | |
937 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) | |
938 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) | |
939 %res2 = add <64 x i8> %res, %res1 | |
940 ret <64 x i8> %res2 | |
941 } | |
942 | |
943 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32) | |
944 | |
945 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512 | |
946 ; CHECK-NOT: call | |
947 ; CHECK: kmov | |
948 ; CHECK: vpabsw{{.*}}{%k1} | |
949 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { | |
950 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) | |
951 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1) | |
952 %res2 = add <32 x i16> %res, %res1 | |
953 ret <32 x i16> %res2 | |
954 } | |
955 | |
956 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64) | |
957 | |
958 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512 | |
959 ; CHECK-NOT: call | |
960 ; CHECK: kmov | |
961 ; CHECK: vpabsb{{.*}}{%k1} | |
962 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { | |
963 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) | |
964 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1) | |
965 %res2 = add <64 x i8> %res, %res1 | |
966 ret <64 x i8> %res2 | |
967 } | |
968 | |
969 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
970 | |
971 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_512 | |
972 ; CHECK-NOT: call | |
973 ; CHECK: kmov | |
974 ; CHECK: {%k1} | |
975 ; CHECK: vpmulhuw {{.*}}encoding: [0x62 | |
976 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
977 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
978 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
979 %res2 = add <32 x i16> %res, %res1 | |
980 ret <32 x i16> %res2 | |
981 } | |
982 | |
983 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
984 | |
985 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_512 | |
986 ; CHECK-NOT: call | |
987 ; CHECK: kmov | |
988 ; CHECK: {%k1} | |
989 ; CHECK: vpmulhw {{.*}}encoding: [0x62 | |
990 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
991 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
992 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
993 %res2 = add <32 x i16> %res, %res1 | |
994 ret <32 x i16> %res2 | |
995 } | |
996 | |
997 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
998 | |
999 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_512 | |
1000 ; CHECK-NOT: call | |
1001 ; CHECK: kmov | |
1002 ; CHECK: {%k1} | |
1003 ; CHECK: vpmulhrsw {{.*}}encoding: [0x62 | |
1004 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
1005 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
1006 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
1007 %res2 = add <32 x i16> %res, %res1 | |
1008 ret <32 x i16> %res2 | |
1009 } | |
1010 | |
1011 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32) | |
1012 | |
1013 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { | |
1014 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_512: | |
1015 ; CHECK: vpmovwb %zmm0, %ymm1 {%k1} | |
1016 ; CHECK-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z} | |
1017 ; CHECK-NEXT: vpmovwb %zmm0, %ymm0 | |
1018 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) | |
1019 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) | |
1020 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) | |
1021 %res3 = add <32 x i8> %res0, %res1 | |
1022 %res4 = add <32 x i8> %res3, %res2 | |
1023 ret <32 x i8> %res4 | |
1024 } | |
1025 | |
1026 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32) | |
1027 | |
1028 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { | |
1029 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512: | |
1030 ; CHECK: vpmovwb %zmm0, (%rdi) | |
1031 ; CHECK: vpmovwb %zmm0, (%rdi) {%k1} | |
1032 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) | |
1033 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) | |
1034 ret void | |
1035 } | |
1036 | |
1037 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32) | |
1038 | |
1039 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { | |
1040 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_512: | |
1041 ; CHECK: vpmovswb %zmm0, %ymm1 {%k1} | |
1042 ; CHECK-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z} | |
1043 ; CHECK-NEXT: vpmovswb %zmm0, %ymm0 | |
1044 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) | |
1045 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) | |
1046 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) | |
1047 %res3 = add <32 x i8> %res0, %res1 | |
1048 %res4 = add <32 x i8> %res3, %res2 | |
1049 ret <32 x i8> %res4 | |
1050 } | |
1051 | |
1052 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32) | |
1053 | |
1054 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { | |
1055 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512: | |
1056 ; CHECK: vpmovswb %zmm0, (%rdi) | |
1057 ; CHECK: vpmovswb %zmm0, (%rdi) {%k1} | |
1058 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) | |
1059 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) | |
1060 ret void | |
1061 } | |
1062 | |
1063 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32) | |
1064 | |
1065 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) { | |
1066 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_512: | |
1067 ; CHECK: vpmovuswb %zmm0, %ymm1 {%k1} | |
1068 ; CHECK-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z} | |
1069 ; CHECK-NEXT: vpmovuswb %zmm0, %ymm0 | |
1070 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) | |
1071 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) | |
1072 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) | |
1073 %res3 = add <32 x i8> %res0, %res1 | |
1074 %res4 = add <32 x i8> %res3, %res2 | |
1075 ret <32 x i8> %res4 | |
1076 } | |
1077 | |
1078 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32) | |
1079 | |
1080 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) { | |
1081 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512: | |
1082 ; CHECK: vpmovuswb %zmm0, (%rdi) | |
1083 ; CHECK: vpmovuswb %zmm0, (%rdi) {%k1} | |
1084 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1) | |
1085 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2) | |
1086 ret void | |
1087 } | |
1088 | |
1089 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32) | |
1090 | |
1091 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) { | |
1092 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512: | |
1093 ; CHECK: ## BB#0: | |
1094 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] | |
1095 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} | |
1096 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0 | |
1097 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 | |
1098 ; CHECK-NEXT: retq | |
1099 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) | |
1100 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1) | |
1101 %res2 = add <32 x i16> %res, %res1 | |
1102 ret <32 x i16> %res2 | |
1103 } | |
1104 | |
1105 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16) | |
1106 | |
1107 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) { | |
1108 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_512: | |
1109 ; CHECK: ## BB#0: | |
1110 ; CHECK-NEXT: kmovw %edi, %k1 | |
1111 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} | |
1112 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 | |
1113 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 | |
1114 ; CHECK-NEXT: retq | |
1115 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) | |
1116 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1) | |
1117 %res2 = add <16 x i32> %res, %res1 | |
1118 ret <16 x i32> %res2 | |
1119 } | |
1120 | |
1121 declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) | |
1122 | |
1123 define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { | |
1124 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: | |
1125 ; CHECK: ## BB#0: | |
1126 ; CHECK-NEXT: kmovq %rdi, %k1 | |
1127 ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} | |
1128 ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm0 | |
1129 ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 | |
1130 ; CHECK-NEXT: retq | |
1131 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) | |
1132 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) | |
1133 %res2 = add <64 x i8> %res, %res1 | |
1134 ret <64 x i8> %res2 | |
1135 } | |
1136 | |
1137 declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) | |
1138 | |
1139 define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { | |
1140 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: | |
1141 ; CHECK: ## BB#0: | |
1142 ; CHECK-NEXT: kmovq %rdi, %k1 | |
1143 ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} | |
1144 ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm0 | |
1145 ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 | |
1146 ; CHECK-NEXT: retq | |
1147 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) | |
1148 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) | |
1149 %res2 = add <64 x i8> %res, %res1 | |
1150 ret <64 x i8> %res2 | |
1151 } | |
1152 | |
1153 declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
1154 | |
1155 define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
1156 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: | |
1157 ; CHECK: ## BB#0: | |
1158 ; CHECK-NEXT: kmovd %edi, %k1 | |
1159 ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} | |
1160 ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm0 | |
1161 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 | |
1162 ; CHECK-NEXT: retq | |
1163 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
1164 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
1165 %res2 = add <32 x i16> %res, %res1 | |
1166 ret <32 x i16> %res2 | |
1167 } | |
1168 | |
1169 declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) | |
1170 | |
1171 define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { | |
1172 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: | |
1173 ; CHECK: ## BB#0: | |
1174 ; CHECK-NEXT: kmovd %edi, %k1 | |
1175 ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} | |
1176 ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm0 | |
1177 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 | |
1178 ; CHECK-NEXT: retq | |
1179 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) | |
1180 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) | |
1181 %res2 = add <32 x i16> %res, %res1 | |
1182 ret <32 x i16> %res2 | |
1183 } | |
1184 | |
1185 declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64) | |
1186 | |
1187 define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) { | |
1188 ; CHECK-LABEL: test_int_x86_avx512_mask_palignr_512: | |
1189 ; CHECK: ## BB#0: | |
1190 ; CHECK-NEXT: kmovq %rdi, %k1 | |
1191 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} | |
1192 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z} | |
1193 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0 | |
1194 ; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1 | |
1195 ; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0 | |
1196 ; CHECK-NEXT: retq | |
1197 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4) | |
1198 %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4) | |
1199 %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1) | |
1200 %res3 = add <64 x i8> %res, %res1 | |
1201 %res4 = add <64 x i8> %res3, %res2 | |
1202 ret <64 x i8> %res4 | |
1203 } | |
1204 | |
1205 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) | |
1206 | |
1207 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { | |
1208 ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: | |
1209 ; CHECK: ## BB#0: | |
1210 ; CHECK-NEXT: kmovd %edi, %k1 | |
1211 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} | |
1212 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z} | |
1213 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0 | |
1214 ; CHECK-NEXT: vpaddw %zmm3, %zmm2, %zmm1 | |
1215 ; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0 | |
1216 ; CHECK-NEXT: retq | |
1217 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) | |
1218 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4) | |
1219 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1) | |
1220 %res3 = add <32 x i16> %res, %res1 | |
1221 %res4 = add <32 x i16> %res3, %res2 | |
1222 ret <32 x i16> %res4 | |
1223 } | |
1224 | |
1225 declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32) | |
1226 | |
1227 ; CHECK-LABEL: @test_int_x86_avx512_mask_psll_dq_512 | |
1228 ; CHECK-NOT: call | |
1229 ; CHECK: vpslldq | |
1230 ; CHECK: vpslldq | |
1231 define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) { | |
1232 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) | |
1233 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) | |
1234 %res2 = add <8 x i64> %res, %res1 | |
1235 ret <8 x i64> %res2 | |
1236 } | |
1237 | |
1238 declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32) | |
1239 | |
1240 ; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_dq_512 | |
1241 ; CHECK-NOT: call | |
1242 ; CHECK: vpsrldq | |
1243 ; CHECK: vpsrldq | |
1244 define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) { | |
1245 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) | |
1246 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) | |
1247 %res2 = add <8 x i64> %res, %res1 | |
1248 ret <8 x i64> %res2 | |
1249 } | |
1250 declare <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) | |
1251 | |
1252 ; CHECK-LABEL: @test_int_x86_avx512_mask_psadb_w_512 | |
1253 ; CHECK-NOT: call | |
1254 ; CHECK: vpsadbw %zmm1 | |
1255 ; CHECK: vpsadbw %zmm2 | |
1256 define <64 x i8>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ | |
1257 %res = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) | |
1258 %res1 = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) | |
1259 %res2 = add <64 x i8> %res, %res1 | |
1260 ret <64 x i8> %res2 | |
1261 } |