comparison test/CodeGen/X86/avx512bw-intrinsics.ll @ 95:afa8332a0e37 LLVM3.8

LLVM 3.8
author Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp>
date Tue, 13 Oct 2015 17:48:58 +0900
parents 60c9769439b8
children 7d135dc70f03
comparison
equal deleted inserted replaced
84:f3e34b893a5f 95:afa8332a0e37
1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding| FileCheck %s 1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw --show-mc-encoding| FileCheck %s
2 2
3 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) { 3 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
4 ; CHECK-LABEL: test_pcmpeq_b 4 ; CHECK-LABEL: test_pcmpeq_b
5 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## 5 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
6 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1) 6 %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
65 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32) 65 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
66 66
67 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 67 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
68 ; CHECK_LABEL: test_cmp_b_512 68 ; CHECK_LABEL: test_cmp_b_512
69 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ## 69 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
70 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 -1) 70 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
71 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 71 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
72 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ## 72 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ##
73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 -1) 73 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
74 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 74 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
75 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ## 75 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ##
76 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 -1) 76 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
77 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 77 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
78 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ## 78 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ##
79 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 -1) 79 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
80 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 80 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
81 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ## 81 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ##
82 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 -1) 82 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
83 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 83 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
84 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ## 84 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ##
85 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 -1) 85 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
86 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 86 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
87 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ## 87 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ##
88 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 -1) 88 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
89 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 89 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
90 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ## 90 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ##
91 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 -1) 91 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
92 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 92 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
93 ret <8 x i64> %vec7 93 ret <8 x i64> %vec7
94 } 94 }
95 95
96 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 96 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
97 ; CHECK_LABEL: test_mask_cmp_b_512 97 ; CHECK_LABEL: test_mask_cmp_b_512
98 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ## 98 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
99 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 %mask) 99 %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
100 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 100 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
101 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ## 101 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ##
102 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 %mask) 102 %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
103 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 103 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
104 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ## 104 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ##
105 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 %mask) 105 %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
106 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 106 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
107 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ## 107 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ##
108 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 %mask) 108 %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
109 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 109 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
110 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ## 110 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ##
111 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 %mask) 111 %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
112 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 112 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
113 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ## 113 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ##
114 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 %mask) 114 %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
115 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 115 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
116 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ## 116 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ##
117 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 %mask) 117 %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
118 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 118 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
119 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ## 119 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ##
120 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 %mask) 120 %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
121 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 121 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
122 ret <8 x i64> %vec7 122 ret <8 x i64> %vec7
123 } 123 }
124 124
125 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i8, i64) nounwind readnone 125 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
126 126
127 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) { 127 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
128 ; CHECK_LABEL: test_ucmp_b_512 128 ; CHECK_LABEL: test_ucmp_b_512
129 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ## 129 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ##
130 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 -1) 130 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
131 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 131 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
132 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ## 132 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ##
133 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 -1) 133 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
134 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 134 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
135 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ## 135 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ##
136 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 -1) 136 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
137 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 137 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
138 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ## 138 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ##
139 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 -1) 139 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
140 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 140 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
141 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ## 141 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ##
142 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 -1) 142 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
143 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 143 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
144 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ## 144 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ##
145 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 -1) 145 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
146 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 146 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
147 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ## 147 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ##
148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 -1) 148 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
149 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 149 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
150 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ## 150 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ##
151 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 -1) 151 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
152 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 152 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
153 ret <8 x i64> %vec7 153 ret <8 x i64> %vec7
154 } 154 }
155 155
156 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { 156 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
157 ; CHECK_LABEL: test_mask_ucmp_b_512 157 ; CHECK_LABEL: test_mask_ucmp_b_512
158 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ## 158 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ##
159 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 %mask) 159 %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
160 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0 160 %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
161 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ## 161 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ##
162 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 %mask) 162 %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
163 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1 163 %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
164 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ## 164 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ##
165 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 %mask) 165 %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
166 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2 166 %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
167 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ## 167 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ##
168 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 %mask) 168 %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
169 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3 169 %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
170 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ## 170 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ##
171 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 %mask) 171 %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
172 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4 172 %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
173 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ## 173 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ##
174 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 %mask) 174 %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
175 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5 175 %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
176 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ## 176 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ##
177 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 %mask) 177 %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
178 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6 178 %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
179 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ## 179 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ##
180 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 %mask) 180 %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
181 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7 181 %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
182 ret <8 x i64> %vec7 182 ret <8 x i64> %vec7
183 } 183 }
184 184
185 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i8, i64) nounwind readnone 185 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
186 186
187 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 187 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
188 ; CHECK_LABEL: test_cmp_w_512 188 ; CHECK_LABEL: test_cmp_w_512
189 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ## 189 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
190 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 -1) 190 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
191 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 191 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
192 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ## 192 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ##
193 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 -1) 193 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
194 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 194 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
195 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ## 195 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ##
196 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 -1) 196 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
197 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 197 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
198 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ## 198 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ##
199 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 -1) 199 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
200 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 200 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
201 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ## 201 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ##
202 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 -1) 202 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
203 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 203 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
204 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ## 204 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ##
205 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 -1) 205 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
206 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 206 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
207 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ## 207 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ##
208 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 -1) 208 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
209 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 209 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
210 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ## 210 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ##
211 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 -1) 211 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
212 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 212 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
213 ret <8 x i32> %vec7 213 ret <8 x i32> %vec7
214 } 214 }
215 215
216 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 216 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
217 ; CHECK_LABEL: test_mask_cmp_w_512 217 ; CHECK_LABEL: test_mask_cmp_w_512
218 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ## 218 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
219 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 %mask) 219 %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
220 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 220 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
221 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ## 221 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ##
222 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 %mask) 222 %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
223 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 223 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
224 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ## 224 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ##
225 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 %mask) 225 %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
226 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 226 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
227 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ## 227 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ##
228 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 %mask) 228 %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
229 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 229 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
230 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ## 230 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ##
231 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 %mask) 231 %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
232 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 232 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
233 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ## 233 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ##
234 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 %mask) 234 %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
235 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 235 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
236 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ## 236 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ##
237 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 %mask) 237 %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
238 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 238 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
239 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ## 239 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ##
240 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 %mask) 240 %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
241 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 241 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
242 ret <8 x i32> %vec7 242 ret <8 x i32> %vec7
243 } 243 }
244 244
245 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i8, i32) nounwind readnone 245 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
246 246
247 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) { 247 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
248 ; CHECK_LABEL: test_ucmp_w_512 248 ; CHECK_LABEL: test_ucmp_w_512
249 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ## 249 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ##
250 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 -1) 250 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
251 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 251 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
252 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ## 252 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ##
253 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 -1) 253 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
254 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 254 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
255 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ## 255 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ##
256 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 -1) 256 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
257 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 257 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
258 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ## 258 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ##
259 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 -1) 259 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
260 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 260 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
261 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ## 261 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ##
262 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 -1) 262 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
263 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 263 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
264 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ## 264 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ##
265 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 -1) 265 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
266 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 266 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
267 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ## 267 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ##
268 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 -1) 268 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
269 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 269 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
270 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ## 270 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ##
271 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 -1) 271 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
272 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 272 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
273 ret <8 x i32> %vec7 273 ret <8 x i32> %vec7
274 } 274 }
275 275
276 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) { 276 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
277 ; CHECK_LABEL: test_mask_ucmp_w_512 277 ; CHECK_LABEL: test_mask_ucmp_w_512
278 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ## 278 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ##
279 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 %mask) 279 %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
280 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 280 %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
281 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ## 281 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ##
282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 %mask) 282 %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
283 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1 283 %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
284 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ## 284 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ##
285 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 %mask) 285 %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
286 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2 286 %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
287 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ## 287 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ##
288 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 %mask) 288 %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
289 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3 289 %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
290 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ## 290 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ##
291 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 %mask) 291 %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
292 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4 292 %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
293 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ## 293 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ##
294 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 %mask) 294 %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
295 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5 295 %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
296 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ## 296 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ##
297 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 %mask) 297 %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
298 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6 298 %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
299 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ## 299 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ##
300 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 %mask) 300 %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
301 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7 301 %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
302 ret <8 x i32> %vec7 302 ret <8 x i32> %vec7
303 } 303 }
304 304
305 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i8, i32) nounwind readnone 305 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
306 306
307 ; CHECK-LABEL: test_x86_mask_blend_b_256 307 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
308 ; CHECK: vpblendmb 308
309 define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) { 309 ; CHECK-LABEL: test_x86_mask_blend_w_512
310 %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1] 310 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
311 ret <32 x i8> %res
312 }
313 declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly
314
315 ; CHECK-LABEL: test_x86_mask_blend_w_256
316 define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) {
317 ; CHECK: vpblendmw 311 ; CHECK: vpblendmw
318 %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1] 312 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
319 ret <16 x i16> %res 313 ret <32 x i16> %res
320 } 314 }
321 declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly 315 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
322 316
323 ; CHECK-LABEL: test_x86_mask_blend_b_512 317 ; CHECK-LABEL: test_x86_mask_blend_b_512
324 ; CHECK: vpblendmb 318 ; CHECK: vpblendmb
325 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) { 319 define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
326 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1] 320 %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
327 ret <64 x i8> %res 321 ret <64 x i8> %res
328 } 322 }
329 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly 323
330 324 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
331 ; CHECK-LABEL: test_x86_mask_blend_w_512 325 ;CHECK-LABEL: test_mask_packs_epi32_rr_512
332 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) { 326 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
333 ; CHECK: vpblendmw 327 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
334 %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1] 328 ret <32 x i16> %res
335 ret <32 x i16> %res 329 }
336 } 330
337 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly 331 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
338 332 ;CHECK-LABEL: test_mask_packs_epi32_rrk_512
339 ; CHECK-LABEL: test_x86_mask_blend_b_128 333 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
340 ; CHECK: vpblendmb 334 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
341 define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) { 335 ret <32 x i16> %res
342 %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1] 336 }
343 ret <16 x i8> %res 337
344 } 338 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
345 declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly 339 ;CHECK-LABEL: test_mask_packs_epi32_rrkz_512
346 340 ;CHECK: vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
347 ; CHECK-LABEL: test_x86_mask_blend_w_128 341 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
348 define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) { 342 ret <32 x i16> %res
349 ; CHECK: vpblendmw 343 }
350 %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1] 344
351 ret <8 x i16> %res 345 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
352 } 346 ;CHECK-LABEL: test_mask_packs_epi32_rm_512
353 declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly 347 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
348 %b = load <16 x i32>, <16 x i32>* %ptr_b
349 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
350 ret <32 x i16> %res
351 }
352
353 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
354 ;CHECK-LABEL: test_mask_packs_epi32_rmk_512
355 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
356 %b = load <16 x i32>, <16 x i32>* %ptr_b
357 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
358 ret <32 x i16> %res
359 }
360
361 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
362 ;CHECK-LABEL: test_mask_packs_epi32_rmkz_512
363 ;CHECK: vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
364 %b = load <16 x i32>, <16 x i32>* %ptr_b
365 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
366 ret <32 x i16> %res
367 }
368
369 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
370 ;CHECK-LABEL: test_mask_packs_epi32_rmb_512
371 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
372 %q = load i32, i32* %ptr_b
373 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
374 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
375 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
376 ret <32 x i16> %res
377 }
378
379 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
380 ;CHECK-LABEL: test_mask_packs_epi32_rmbk_512
381 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
382 %q = load i32, i32* %ptr_b
383 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
384 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
385 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
386 ret <32 x i16> %res
387 }
388
389 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
390 ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512
391 ;CHECK: vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
392 %q = load i32, i32* %ptr_b
393 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
394 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
395 %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
396 ret <32 x i16> %res
397 }
398
399 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
400
401 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
402 ;CHECK-LABEL: test_mask_packs_epi16_rr_512
403 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1]
404 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
405 ret <64 x i8> %res
406 }
407
408 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
409 ;CHECK-LABEL: test_mask_packs_epi16_rrk_512
410 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1]
411 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
412 ret <64 x i8> %res
413 }
414
415 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
416 ;CHECK-LABEL: test_mask_packs_epi16_rrkz_512
417 ;CHECK: vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1]
418 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
419 ret <64 x i8> %res
420 }
421
422 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
423 ;CHECK-LABEL: test_mask_packs_epi16_rm_512
424 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07]
425 %b = load <32 x i16>, <32 x i16>* %ptr_b
426 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
427 ret <64 x i8> %res
428 }
429
430 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
431 ;CHECK-LABEL: test_mask_packs_epi16_rmk_512
432 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f]
433 %b = load <32 x i16>, <32 x i16>* %ptr_b
434 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
435 ret <64 x i8> %res
436 }
437
438 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
439 ;CHECK-LABEL: test_mask_packs_epi16_rmkz_512
440 ;CHECK: vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07]
441 %b = load <32 x i16>, <32 x i16>* %ptr_b
442 %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
443 ret <64 x i8> %res
444 }
445
446 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
447
448
449 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
450 ;CHECK-LABEL: test_mask_packus_epi32_rr_512
451 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0
452 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
453 ret <32 x i16> %res
454 }
455
456 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
457 ;CHECK-LABEL: test_mask_packus_epi32_rrk_512
458 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
459 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
460 ret <32 x i16> %res
461 }
462
463 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
464 ;CHECK-LABEL: test_mask_packus_epi32_rrkz_512
465 ;CHECK: vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
466 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
467 ret <32 x i16> %res
468 }
469
470 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
471 ;CHECK-LABEL: test_mask_packus_epi32_rm_512
472 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0
473 %b = load <16 x i32>, <16 x i32>* %ptr_b
474 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
475 ret <32 x i16> %res
476 }
477
478 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
479 ;CHECK-LABEL: test_mask_packus_epi32_rmk_512
480 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
481 %b = load <16 x i32>, <16 x i32>* %ptr_b
482 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
483 ret <32 x i16> %res
484 }
485
486 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
487 ;CHECK-LABEL: test_mask_packus_epi32_rmkz_512
488 ;CHECK: vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
489 %b = load <16 x i32>, <16 x i32>* %ptr_b
490 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
491 ret <32 x i16> %res
492 }
493
494 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
495 ;CHECK-LABEL: test_mask_packus_epi32_rmb_512
496 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0
497 %q = load i32, i32* %ptr_b
498 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
499 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
500 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
501 ret <32 x i16> %res
502 }
503
504 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
505 ;CHECK-LABEL: test_mask_packus_epi32_rmbk_512
506 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
507 %q = load i32, i32* %ptr_b
508 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
509 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
510 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
511 ret <32 x i16> %res
512 }
513
514 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
515 ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512
516 ;CHECK: vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
517 %q = load i32, i32* %ptr_b
518 %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
519 %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
520 %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
521 ret <32 x i16> %res
522 }
523
524 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
525
526 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
527 ;CHECK-LABEL: test_mask_packus_epi16_rr_512
528 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0
529 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
530 ret <64 x i8> %res
531 }
532
533 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
534 ;CHECK-LABEL: test_mask_packus_epi16_rrk_512
535 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
536 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
537 ret <64 x i8> %res
538 }
539
540 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
541 ;CHECK-LABEL: test_mask_packus_epi16_rrkz_512
542 ;CHECK: vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
543 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
544 ret <64 x i8> %res
545 }
546
547 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
548 ;CHECK-LABEL: test_mask_packus_epi16_rm_512
549 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0
550 %b = load <32 x i16>, <32 x i16>* %ptr_b
551 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
552 ret <64 x i8> %res
553 }
554
555 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
556 ;CHECK-LABEL: test_mask_packus_epi16_rmk_512
557 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
558 %b = load <32 x i16>, <32 x i16>* %ptr_b
559 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
560 ret <64 x i8> %res
561 }
562
563 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
564 ;CHECK-LABEL: test_mask_packus_epi16_rmkz_512
565 ;CHECK: vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
566 %b = load <32 x i16>, <32 x i16>* %ptr_b
567 %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
568 ret <64 x i8> %res
569 }
570
571 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
572
573 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
574 ;CHECK-LABEL: test_mask_adds_epi16_rr_512
575 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0
576 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
577 ret <32 x i16> %res
578 }
579
580 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
581 ;CHECK-LABEL: test_mask_adds_epi16_rrk_512
582 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
583 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
584 ret <32 x i16> %res
585 }
586
587 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
588 ;CHECK-LABEL: test_mask_adds_epi16_rrkz_512
589 ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
590 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
591 ret <32 x i16> %res
592 }
593
594 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
595 ;CHECK-LABEL: test_mask_adds_epi16_rm_512
596 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0
597 %b = load <32 x i16>, <32 x i16>* %ptr_b
598 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
599 ret <32 x i16> %res
600 }
601
602 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
603 ;CHECK-LABEL: test_mask_adds_epi16_rmk_512
604 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
605 %b = load <32 x i16>, <32 x i16>* %ptr_b
606 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
607 ret <32 x i16> %res
608 }
609
610 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
611 ;CHECK-LABEL: test_mask_adds_epi16_rmkz_512
612 ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
613 %b = load <32 x i16>, <32 x i16>* %ptr_b
614 %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
615 ret <32 x i16> %res
616 }
617
618 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
619
620 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
621 ;CHECK-LABEL: test_mask_subs_epi16_rr_512
622 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0
623 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
624 ret <32 x i16> %res
625 }
626
627 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
628 ;CHECK-LABEL: test_mask_subs_epi16_rrk_512
629 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
630 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
631 ret <32 x i16> %res
632 }
633
634 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
635 ;CHECK-LABEL: test_mask_subs_epi16_rrkz_512
636 ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
637 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
638 ret <32 x i16> %res
639 }
640
641 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
642 ;CHECK-LABEL: test_mask_subs_epi16_rm_512
643 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0
644 %b = load <32 x i16>, <32 x i16>* %ptr_b
645 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
646 ret <32 x i16> %res
647 }
648
649 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
650 ;CHECK-LABEL: test_mask_subs_epi16_rmk_512
651 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
652 %b = load <32 x i16>, <32 x i16>* %ptr_b
653 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
654 ret <32 x i16> %res
655 }
656
657 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
658 ;CHECK-LABEL: test_mask_subs_epi16_rmkz_512
659 ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
660 %b = load <32 x i16>, <32 x i16>* %ptr_b
661 %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
662 ret <32 x i16> %res
663 }
664
665 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
666
667 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
668 ;CHECK-LABEL: test_mask_adds_epu16_rr_512
669 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0
670 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
671 ret <32 x i16> %res
672 }
673
674 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
675 ;CHECK-LABEL: test_mask_adds_epu16_rrk_512
676 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
677 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
678 ret <32 x i16> %res
679 }
680
681 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
682 ;CHECK-LABEL: test_mask_adds_epu16_rrkz_512
683 ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
684 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
685 ret <32 x i16> %res
686 }
687
688 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
689 ;CHECK-LABEL: test_mask_adds_epu16_rm_512
690 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0
691 %b = load <32 x i16>, <32 x i16>* %ptr_b
692 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
693 ret <32 x i16> %res
694 }
695
696 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
697 ;CHECK-LABEL: test_mask_adds_epu16_rmk_512
698 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
699 %b = load <32 x i16>, <32 x i16>* %ptr_b
700 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
701 ret <32 x i16> %res
702 }
703
704 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
705 ;CHECK-LABEL: test_mask_adds_epu16_rmkz_512
706 ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
707 %b = load <32 x i16>, <32 x i16>* %ptr_b
708 %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
709 ret <32 x i16> %res
710 }
711
712 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
713
714 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
715 ;CHECK-LABEL: test_mask_subs_epu16_rr_512
716 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0
717 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
718 ret <32 x i16> %res
719 }
720
721 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
722 ;CHECK-LABEL: test_mask_subs_epu16_rrk_512
723 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
724 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
725 ret <32 x i16> %res
726 }
727
728 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
729 ;CHECK-LABEL: test_mask_subs_epu16_rrkz_512
730 ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
731 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
732 ret <32 x i16> %res
733 }
734
735 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
736 ;CHECK-LABEL: test_mask_subs_epu16_rm_512
737 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0
738 %b = load <32 x i16>, <32 x i16>* %ptr_b
739 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
740 ret <32 x i16> %res
741 }
742
743 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
744 ;CHECK-LABEL: test_mask_subs_epu16_rmk_512
745 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
746 %b = load <32 x i16>, <32 x i16>* %ptr_b
747 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
748 ret <32 x i16> %res
749 }
750
751 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
752 ;CHECK-LABEL: test_mask_subs_epu16_rmkz_512
753 ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
754 %b = load <32 x i16>, <32 x i16>* %ptr_b
755 %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
756 ret <32 x i16> %res
757 }
758
759 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
760
761 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
762
763 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_512
764 ; CHECK-NOT: call
765 ; CHECK: vpmaxsb %zmm
766 ; CHECK: {%k1}
767 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
768 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
769 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
770 %res2 = add <64 x i8> %res, %res1
771 ret <64 x i8> %res2
772 }
773
774 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
775
776 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_512
777 ; CHECK-NOT: call
778 ; CHECK: vpmaxsw %zmm
779 ; CHECK: {%k1}
780 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
781 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
782 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
783 %res2 = add <32 x i16> %res, %res1
784 ret <32 x i16> %res2
785 }
786
787 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
788
789 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_512
790 ; CHECK-NOT: call
791 ; CHECK: vpmaxub %zmm
792 ; CHECK: {%k1}
793 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
794 %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
795 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
796 %res2 = add <64 x i8> %res, %res1
797 ret <64 x i8> %res2
798 }
799
800 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
801
802 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_512
803 ; CHECK-NOT: call
804 ; CHECK: vpmaxuw %zmm
805 ; CHECK: {%k1}
806 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
807 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
808 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
809 %res2 = add <32 x i16> %res, %res1
810 ret <32 x i16> %res2
811 }
812
813 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
814
815 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_512
816 ; CHECK-NOT: call
817 ; CHECK: vpminsb %zmm
818 ; CHECK: {%k1}
819 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
820 %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
821 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
822 %res2 = add <64 x i8> %res, %res1
823 ret <64 x i8> %res2
824 }
825
826 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
827
828 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_512
829 ; CHECK-NOT: call
830 ; CHECK: vpminsw %zmm
831 ; CHECK: {%k1}
832 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
833 %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
834 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
835 %res2 = add <32 x i16> %res, %res1
836 ret <32 x i16> %res2
837 }
838
839 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
840
841 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_512
842 ; CHECK-NOT: call
843 ; CHECK: vpminub %zmm
844 ; CHECK: {%k1}
845 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
846 %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
847 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
848 %res2 = add <64 x i8> %res, %res1
849 ret <64 x i8> %res2
850 }
851
852 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
853
854 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_512
855 ; CHECK-NOT: call
856 ; CHECK: vpminuw %zmm
857 ; CHECK: {%k1}
858 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
859 %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
860 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
861 %res2 = add <32 x i16> %res, %res1
862 ret <32 x i16> %res2
863 }
864
865 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
866
867 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512
868 ; CHECK-NOT: call
869 ; CHECK: kmov
870 ; CHECK: vpermt2w %zmm{{.*}}{%k1}
871 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
872 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
873 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
874 %res2 = add <32 x i16> %res, %res1
875 ret <32 x i16> %res2
876 }
877
878 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
879
880 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512
881 ; CHECK-NOT: call
882 ; CHECK: kmov
883 ; CHECK: vpermt2w %zmm{{.*}}{%k1} {z}
884 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
885 %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
886 %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
887 %res2 = add <32 x i16> %res, %res1
888 ret <32 x i16> %res2
889 }
890
891 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
892
893 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512
894 ; CHECK-NOT: call
895 ; CHECK: kmov
896 ; CHECK: vpermi2w %zmm{{.*}}{%k1}
897 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
898 %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
899 %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
900 %res2 = add <32 x i16> %res, %res1
901 ret <32 x i16> %res2
902 }
903
904 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
905
906 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512
907 ; CHECK-NOT: call
908 ; CHECK: vpavgb %zmm
909 ; CHECK: {%k1}
910 define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
911 %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
912 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
913 %res2 = add <64 x i8> %res, %res1
914 ret <64 x i8> %res2
915 }
916
917 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
918
919 ; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_512
920 ; CHECK-NOT: call
921 ; CHECK: vpavgw %zmm
922 ; CHECK: {%k1}
923 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
924 %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
925 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
926 %res2 = add <32 x i16> %res, %res1
927 ret <32 x i16> %res2
928 }
929
930 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
931
932 ; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
933 ; CHECK-NOT: call
934 ; CHECK: kmov
935 ; CHECK: vpshufb %zmm{{.*}}{%k1}
936 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
937 %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
938 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
939 %res2 = add <64 x i8> %res, %res1
940 ret <64 x i8> %res2
941 }
942
943 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
944
945 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512
946 ; CHECK-NOT: call
947 ; CHECK: kmov
948 ; CHECK: vpabsw{{.*}}{%k1}
949 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
950 %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
951 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
952 %res2 = add <32 x i16> %res, %res1
953 ret <32 x i16> %res2
954 }
955
956 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
957
958 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512
959 ; CHECK-NOT: call
960 ; CHECK: kmov
961 ; CHECK: vpabsb{{.*}}{%k1}
962 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
963 %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
964 %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
965 %res2 = add <64 x i8> %res, %res1
966 ret <64 x i8> %res2
967 }
968
969 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
970
971 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_512
972 ; CHECK-NOT: call
973 ; CHECK: kmov
974 ; CHECK: {%k1}
975 ; CHECK: vpmulhuw {{.*}}encoding: [0x62
976 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
977 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
978 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
979 %res2 = add <32 x i16> %res, %res1
980 ret <32 x i16> %res2
981 }
982
983 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
984
985 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_512
986 ; CHECK-NOT: call
987 ; CHECK: kmov
988 ; CHECK: {%k1}
989 ; CHECK: vpmulhw {{.*}}encoding: [0x62
990 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
991 %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
992 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
993 %res2 = add <32 x i16> %res, %res1
994 ret <32 x i16> %res2
995 }
996
997 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
998
999 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_512
1000 ; CHECK-NOT: call
1001 ; CHECK: kmov
1002 ; CHECK: {%k1}
1003 ; CHECK: vpmulhrsw {{.*}}encoding: [0x62
1004 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1005 %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1006 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1007 %res2 = add <32 x i16> %res, %res1
1008 ret <32 x i16> %res2
1009 }
1010
1011 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
1012
1013 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1014 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
1015 ; CHECK: vpmovwb %zmm0, %ymm1 {%k1}
1016 ; CHECK-NEXT: vpmovwb %zmm0, %ymm2 {%k1} {z}
1017 ; CHECK-NEXT: vpmovwb %zmm0, %ymm0
1018 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1019 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1020 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1021 %res3 = add <32 x i8> %res0, %res1
1022 %res4 = add <32 x i8> %res3, %res2
1023 ret <32 x i8> %res4
1024 }
1025
1026 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1027
1028 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1029 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
1030 ; CHECK: vpmovwb %zmm0, (%rdi)
1031 ; CHECK: vpmovwb %zmm0, (%rdi) {%k1}
1032 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1033 call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1034 ret void
1035 }
1036
1037 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
1038
1039 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1040 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
1041 ; CHECK: vpmovswb %zmm0, %ymm1 {%k1}
1042 ; CHECK-NEXT: vpmovswb %zmm0, %ymm2 {%k1} {z}
1043 ; CHECK-NEXT: vpmovswb %zmm0, %ymm0
1044 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1045 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1046 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1047 %res3 = add <32 x i8> %res0, %res1
1048 %res4 = add <32 x i8> %res3, %res2
1049 ret <32 x i8> %res4
1050 }
1051
1052 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1053
1054 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1055 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
1056 ; CHECK: vpmovswb %zmm0, (%rdi)
1057 ; CHECK: vpmovswb %zmm0, (%rdi) {%k1}
1058 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1059 call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1060 ret void
1061 }
1062
1063 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
1064
1065 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
1066 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
1067 ; CHECK: vpmovuswb %zmm0, %ymm1 {%k1}
1068 ; CHECK-NEXT: vpmovuswb %zmm0, %ymm2 {%k1} {z}
1069 ; CHECK-NEXT: vpmovuswb %zmm0, %ymm0
1070 %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
1071 %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
1072 %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
1073 %res3 = add <32 x i8> %res0, %res1
1074 %res4 = add <32 x i8> %res3, %res2
1075 ret <32 x i8> %res4
1076 }
1077
1078 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
1079
1080 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
1081 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
1082 ; CHECK: vpmovuswb %zmm0, (%rdi)
1083 ; CHECK: vpmovuswb %zmm0, (%rdi) {%k1}
1084 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
1085 call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
1086 ret void
1087 }
1088
1089 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
1090
1091 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
1092 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
1093 ; CHECK: ## BB#0:
1094 ; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1095 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
1096 ; CHECK-NEXT: vpmaddubsw %zmm1, %zmm0, %zmm0
1097 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1098 ; CHECK-NEXT: retq
1099 %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
1100 %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
1101 %res2 = add <32 x i16> %res, %res1
1102 ret <32 x i16> %res2
1103 }
1104
1105 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
1106
1107 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
1108 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
1109 ; CHECK: ## BB#0:
1110 ; CHECK-NEXT: kmovw %edi, %k1
1111 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
1112 ; CHECK-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0
1113 ; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
1114 ; CHECK-NEXT: retq
1115 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
1116 %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
1117 %res2 = add <16 x i32> %res, %res1
1118 ret <16 x i32> %res2
1119 }
1120
1121 declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1122
1123 define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1124 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
1125 ; CHECK: ## BB#0:
1126 ; CHECK-NEXT: kmovq %rdi, %k1
1127 ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1}
1128 ; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm0
1129 ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1130 ; CHECK-NEXT: retq
1131 %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1132 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1133 %res2 = add <64 x i8> %res, %res1
1134 ret <64 x i8> %res2
1135 }
1136
1137 declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
1138
1139 define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
1140 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
1141 ; CHECK: ## BB#0:
1142 ; CHECK-NEXT: kmovq %rdi, %k1
1143 ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1}
1144 ; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm0
1145 ; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0
1146 ; CHECK-NEXT: retq
1147 %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
1148 %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
1149 %res2 = add <64 x i8> %res, %res1
1150 ret <64 x i8> %res2
1151 }
1152
1153 declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1154
1155 define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1156 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
1157 ; CHECK: ## BB#0:
1158 ; CHECK-NEXT: kmovd %edi, %k1
1159 ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1}
1160 ; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm0
1161 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1162 ; CHECK-NEXT: retq
1163 %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1164 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1165 %res2 = add <32 x i16> %res, %res1
1166 ret <32 x i16> %res2
1167 }
1168
1169 declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
1170
1171 define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
1172 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
1173 ; CHECK: ## BB#0:
1174 ; CHECK-NEXT: kmovd %edi, %k1
1175 ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1}
1176 ; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm0
1177 ; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0
1178 ; CHECK-NEXT: retq
1179 %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
1180 %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
1181 %res2 = add <32 x i16> %res, %res1
1182 ret <32 x i16> %res2
1183 }
1184
1185 declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
1186
1187 define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
1188 ; CHECK-LABEL: test_int_x86_avx512_mask_palignr_512:
1189 ; CHECK: ## BB#0:
1190 ; CHECK-NEXT: kmovq %rdi, %k1
1191 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
1192 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
1193 ; CHECK-NEXT: vpalignr $2, %zmm1, %zmm0, %zmm0
1194 ; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1
1195 ; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0
1196 ; CHECK-NEXT: retq
1197 %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
1198 %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
1199 %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
1200 %res3 = add <64 x i8> %res, %res1
1201 %res4 = add <64 x i8> %res3, %res2
1202 ret <64 x i8> %res4
1203 }
1204
1205 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
1206
1207 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
1208 ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
1209 ; CHECK: ## BB#0:
1210 ; CHECK-NEXT: kmovd %edi, %k1
1211 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
1212 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
1213 ; CHECK-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm0
1214 ; CHECK-NEXT: vpaddw %zmm3, %zmm2, %zmm1
1215 ; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0
1216 ; CHECK-NEXT: retq
1217 %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
1218 %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
1219 %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
1220 %res3 = add <32 x i16> %res, %res1
1221 %res4 = add <32 x i16> %res3, %res2
1222 ret <32 x i16> %res4
1223 }
1224
1225 declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
1226
1227 ; CHECK-LABEL: @test_int_x86_avx512_mask_psll_dq_512
1228 ; CHECK-NOT: call
1229 ; CHECK: vpslldq
1230 ; CHECK: vpslldq
1231 define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) {
1232 %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
1233 %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
1234 %res2 = add <8 x i64> %res, %res1
1235 ret <8 x i64> %res2
1236 }
1237
1238 declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
1239
1240 ; CHECK-LABEL: @test_int_x86_avx512_mask_psrl_dq_512
1241 ; CHECK-NOT: call
1242 ; CHECK: vpsrldq
1243 ; CHECK: vpsrldq
1244 define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) {
1245 %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
1246 %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
1247 %res2 = add <8 x i64> %res, %res1
1248 ret <8 x i64> %res2
1249 }
1250 declare <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
1251
1252 ; CHECK-LABEL: @test_int_x86_avx512_mask_psadb_w_512
1253 ; CHECK-NOT: call
1254 ; CHECK: vpsadbw %zmm1
1255 ; CHECK: vpsadbw %zmm2
1256 define <64 x i8>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
1257 %res = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
1258 %res1 = call <64 x i8> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
1259 %res2 = add <64 x i8> %res, %res1
1260 ret <64 x i8> %res2
1261 }