150
|
1 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_70 \
|
|
2 // RUN: -fcuda-is-device -target-feature +ptx60 \
|
|
3 // RUN: -S -emit-llvm -o - -x cuda %s \
|
|
4 // RUN: | FileCheck -check-prefix=CHECK_M16 %s
|
|
5 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_70 \
|
|
6 // RUN: -fcuda-is-device -target-feature +ptx61 -DPTX61 \
|
|
7 // RUN: -S -emit-llvm -o - -x cuda %s \
|
|
8 // RUN: | FileCheck -check-prefixes=CHECK_M16,CHECK_M32_M8 %s
|
207
|
9 // Make sure builtins still work with the latest combination of GPU & PTX.
|
|
10 // RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_86 \
|
|
11 // RUN: -fcuda-is-device -target-feature +ptx72 -DPTX61 \
|
|
12 // RUN: -S -emit-llvm -o - -x cuda %s \
|
|
13 // RUN: | FileCheck -check-prefixes=CHECK_M16,CHECK_M32_M8 %s
|
150
|
14 // RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \
|
|
15 // RUN: -DPTX61 -fcuda-is-device -S -o /dev/null -x cuda -verify=pre-sm_70 %s
|
|
16 // RUN: %clang_cc1 -triple nvptx-unknown-unknown \
|
|
17 // RUN: -target-cpu sm_70 -target-feature +ptx60 \
|
|
18 // RUN: -DPTX61 -fcuda-is-device -S -o /dev/null -x cuda -verify=pre-ptx61 %s
|
|
19
|
|
20 #if !defined(CUDA_VERSION)
|
|
21 #define __device__ __attribute__((device))
|
|
22 #define __global__ __attribute__((global))
|
|
23 #define __shared__ __attribute__((shared))
|
|
24 #define __constant__ __attribute__((constant))
|
|
25
|
|
26 typedef unsigned long long uint64_t;
|
|
27 #endif
|
|
28 // We have to keep all builtins that depend on particular target feature in the
|
|
29 // same function, because the codegen will stop after the very first function
|
|
30 // that encounters an error, so -verify will not be able to find errors in
|
|
31 // subsequent functions.
|
|
32
|
|
33 // CHECK-LABEL: nvvm_wmma_m16n16k16
|
|
34 __device__ void nvvm_wmma_m16n16k16(int *src, int *dst,
|
|
35 float *fsrc, float *fdst,
|
|
36 int ldm) {
|
|
37 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16
|
207
|
38 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
39 __hmma_m16n16k16_ld_a(dst, src, ldm, 0);
|
|
40 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16
|
207
|
41 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_a' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
42 __hmma_m16n16k16_ld_a(dst, src+1, ldm, 1);
|
|
43
|
|
44 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16
|
207
|
45 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
46 __hmma_m16n16k16_ld_b(dst, src, ldm, 0);
|
|
47 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16
|
207
|
48 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_b' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
49 __hmma_m16n16k16_ld_b(dst, src+2, ldm, 1);
|
|
50
|
|
51 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16
|
207
|
52 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
53 __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0);
|
|
54 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16
|
207
|
55 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
56 __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1);
|
|
57
|
|
58 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32
|
207
|
59 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
60 __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0);
|
|
61 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32
|
207
|
62 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
63 __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1);
|
|
64
|
|
65 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16
|
207
|
66 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
67 __hmma_m16n16k16_st_c_f16(dst, src, ldm, 0);
|
|
68 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16
|
207
|
69 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
70 __hmma_m16n16k16_st_c_f16(dst, src, ldm, 1);
|
|
71
|
|
72 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32
|
207
|
73 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
74 __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0);
|
|
75 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32
|
207
|
76 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
77 __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1);
|
|
78
|
|
79 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16
|
207
|
80 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
81 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0);
|
|
82 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite
|
207
|
83 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
84 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1);
|
|
85 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16
|
207
|
86 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
87 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0);
|
|
88 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite
|
207
|
89 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
90 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1);
|
|
91 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16
|
207
|
92 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
93 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0);
|
|
94 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite
|
207
|
95 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
96 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1);
|
|
97 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16
|
207
|
98 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
99 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0);
|
|
100 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite
|
207
|
101 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
102 __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1);
|
|
103
|
|
104 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32
|
207
|
105 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
106 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
|
|
107 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite
|
207
|
108 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
109 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
|
|
110 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32
|
207
|
111 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
112 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
|
|
113 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite
|
207
|
114 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
115 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
|
|
116 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32
|
207
|
117 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
118 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
|
|
119 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite
|
207
|
120 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
121 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
|
|
122 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32
|
207
|
123 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
124 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
|
|
125 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite
|
207
|
126 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
127 __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
|
|
128
|
|
129 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16
|
207
|
130 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
131 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0);
|
|
132 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite
|
207
|
133 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
134 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1);
|
|
135 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16
|
207
|
136 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
137 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0);
|
|
138 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite
|
207
|
139 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
140 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1);
|
|
141 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16
|
207
|
142 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
143 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0);
|
|
144 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite
|
207
|
145 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
146 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1);
|
|
147 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16
|
207
|
148 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
149 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0);
|
|
150 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite
|
207
|
151 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
152 __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1);
|
|
153
|
|
154 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32
|
207
|
155 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
156 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
|
|
157 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite
|
207
|
158 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
159 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
|
|
160 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32
|
207
|
161 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
162 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
|
|
163 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite
|
207
|
164 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
165 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
|
|
166 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32
|
207
|
167 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
168 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
|
|
169 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite
|
207
|
170 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
171 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
|
|
172 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32
|
207
|
173 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
174 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
|
|
175 // CHECK_M16: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite
|
207
|
176 // pre-sm_70-error-re@+1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx60{{.*}}}}
|
150
|
177 __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
|
|
178 }
|
|
179
|
|
180 #ifdef PTX61
|
|
181 // CHECK-LABEL: nvvm_wmma_m32n8k16
|
|
182 __device__ void nvvm_wmma_m32n8k16(int *src, int *dst,
|
|
183 float *fsrc, float *fdst,
|
|
184 int ldm) {
|
|
185 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.row.stride.f16
|
207
|
186 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
187 __hmma_m32n8k16_ld_a(dst, src, ldm, 0);
|
|
188 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.a.col.stride.f16
|
207
|
189 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
190 __hmma_m32n8k16_ld_a(dst, src+1, ldm, 1);
|
|
191
|
|
192 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.row.stride.f16
|
207
|
193 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
194 __hmma_m32n8k16_ld_b(dst, src, ldm, 0);
|
|
195 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.b.col.stride.f16
|
207
|
196 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
197 __hmma_m32n8k16_ld_b(dst, src+2, ldm, 1);
|
|
198
|
|
199 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f16
|
207
|
200 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
201 __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 0);
|
|
202 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f16
|
207
|
203 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
204 __hmma_m32n8k16_ld_c_f16(dst, src, ldm, 1);
|
|
205
|
|
206 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.row.stride.f32
|
207
|
207 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
208 __hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 0);
|
|
209 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.load.c.col.stride.f32
|
207
|
210 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
211 __hmma_m32n8k16_ld_c_f32(fdst, fsrc, ldm, 1);
|
|
212
|
|
213 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f16
|
207
|
214 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
215 __hmma_m32n8k16_st_c_f16(dst, src, ldm, 0);
|
|
216 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f16
|
207
|
217 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
218 __hmma_m32n8k16_st_c_f16(dst, src, ldm, 1);
|
|
219
|
|
220 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.row.stride.f32
|
207
|
221 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
222 __hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 0);
|
|
223 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.store.d.col.stride.f32
|
207
|
224 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
225 __hmma_m32n8k16_st_c_f32(fdst, fsrc, ldm, 1);
|
|
226
|
|
227 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16
|
207
|
228 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
229 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 0);
|
|
230 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f16.satfinite
|
207
|
231 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
232 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 0, 1);
|
|
233 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16
|
207
|
234 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
235 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 0);
|
|
236 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f16.satfinite
|
207
|
237 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
238 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 1, 1);
|
|
239 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16
|
207
|
240 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
241 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 0);
|
|
242 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f16.satfinite
|
207
|
243 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
244 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 2, 1);
|
|
245 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16
|
207
|
246 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
247 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 0);
|
|
248 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f16.satfinite
|
207
|
249 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
250 __hmma_m32n8k16_mma_f16f16(dst, src, src, src, 3, 1);
|
|
251
|
|
252 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32
|
207
|
253 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
254 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
|
|
255 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f16.f32.satfinite
|
207
|
256 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
257 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
|
|
258 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32
|
207
|
259 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
260 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
|
|
261 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f16.f32.satfinite
|
207
|
262 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
263 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
|
|
264 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32
|
207
|
265 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
266 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
|
|
267 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f16.f32.satfinite
|
207
|
268 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
269 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
|
|
270 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32
|
207
|
271 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
272 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
|
|
273 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f16.f32.satfinite
|
207
|
274 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
275 __hmma_m32n8k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
|
|
276
|
|
277 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16
|
207
|
278 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
279 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 0);
|
|
280 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f16.satfinite
|
207
|
281 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
282 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 0, 1);
|
|
283 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16
|
207
|
284 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
285 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 0);
|
|
286 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f16.satfinite
|
207
|
287 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
288 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 1, 1);
|
|
289 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16
|
207
|
290 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
291 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 0);
|
|
292 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f16.satfinite
|
207
|
293 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
294 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 2, 1);
|
|
295 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16
|
207
|
296 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
297 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 0);
|
|
298 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f16.satfinite
|
207
|
299 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
300 __hmma_m32n8k16_mma_f32f16(fdst, src, src, src, 3, 1);
|
|
301
|
|
302 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32
|
207
|
303 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
304 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
|
|
305 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.row.f32.f32.satfinite
|
207
|
306 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
307 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
|
|
308 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32
|
207
|
309 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
310 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
|
|
311 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.row.col.f32.f32.satfinite
|
207
|
312 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
313 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
|
|
314 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32
|
207
|
315 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
316 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
|
|
317 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.row.f32.f32.satfinite
|
207
|
318 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
319 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
|
|
320 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32
|
207
|
321 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
322 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
|
|
323 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m32n8k16.mma.col.col.f32.f32.satfinite
|
207
|
324 // pre-ptx61-error-re@+1 {{'__hmma_m32n8k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
325 __hmma_m32n8k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
|
|
326
|
|
327
|
|
328 // m8n32k16 variants.
|
|
329
|
|
330 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.row.stride.f16
|
207
|
331 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
332 __hmma_m8n32k16_ld_a(dst, src, ldm, 0);
|
|
333 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.a.col.stride.f16
|
207
|
334 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_a' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
335 __hmma_m8n32k16_ld_a(dst, src+1, ldm, 1);
|
|
336
|
|
337 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.row.stride.f16
|
207
|
338 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
339 __hmma_m8n32k16_ld_b(dst, src, ldm, 0);
|
|
340 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.b.col.stride.f16
|
207
|
341 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_b' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
342 __hmma_m8n32k16_ld_b(dst, src+2, ldm, 1);
|
|
343
|
|
344 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f16
|
207
|
345 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
346 __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 0);
|
|
347 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f16
|
207
|
348 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
349 __hmma_m8n32k16_ld_c_f16(dst, src, ldm, 1);
|
|
350
|
|
351 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.row.stride.f32
|
207
|
352 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
353 __hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 0);
|
|
354 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.load.c.col.stride.f32
|
207
|
355 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_ld_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
356 __hmma_m8n32k16_ld_c_f32(fdst, fsrc, ldm, 1);
|
|
357
|
|
358 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f16
|
207
|
359 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
360 __hmma_m8n32k16_st_c_f16(dst, src, ldm, 0);
|
|
361 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f16
|
207
|
362 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
363 __hmma_m8n32k16_st_c_f16(dst, src, ldm, 1);
|
|
364
|
|
365 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.row.stride.f32
|
207
|
366 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
367 __hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 0);
|
|
368 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.store.d.col.stride.f32
|
207
|
369 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_st_c_f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
370 __hmma_m8n32k16_st_c_f32(fdst, fsrc, ldm, 1);
|
|
371
|
|
372 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16
|
207
|
373 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
374 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 0);
|
|
375 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f16.satfinite
|
207
|
376 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
377 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 0, 1);
|
|
378 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16
|
207
|
379 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
380 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 0);
|
|
381 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f16.satfinite
|
207
|
382 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
383 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 1, 1);
|
|
384 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16
|
207
|
385 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
386 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 0);
|
|
387 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f16.satfinite
|
207
|
388 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
389 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 2, 1);
|
|
390 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16
|
207
|
391 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
392 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 0);
|
|
393 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f16.satfinite
|
207
|
394 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
395 __hmma_m8n32k16_mma_f16f16(dst, src, src, src, 3, 1);
|
|
396
|
|
397 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32
|
207
|
398 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
399 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
|
|
400 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f16.f32.satfinite
|
207
|
401 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
402 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
|
|
403 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32
|
207
|
404 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
405 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
|
|
406 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f16.f32.satfinite
|
207
|
407 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
408 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
|
|
409 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32
|
207
|
410 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
411 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
|
|
412 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f16.f32.satfinite
|
207
|
413 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
414 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
|
|
415 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32
|
207
|
416 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
417 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
|
|
418 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f16.f32.satfinite
|
207
|
419 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f16f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
420 __hmma_m8n32k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
|
|
421
|
|
422 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16
|
207
|
423 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
424 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 0);
|
|
425 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f16.satfinite
|
207
|
426 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
427 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 0, 1);
|
|
428 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16
|
207
|
429 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
430 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 0);
|
|
431 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f16.satfinite
|
207
|
432 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
433 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 1, 1);
|
|
434 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16
|
207
|
435 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
436 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 0);
|
|
437 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f16.satfinite
|
207
|
438 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
439 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 2, 1);
|
|
440 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16
|
207
|
441 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
442 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 0);
|
|
443 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f16.satfinite
|
207
|
444 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f16' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
445 __hmma_m8n32k16_mma_f32f16(fdst, src, src, src, 3, 1);
|
|
446
|
|
447 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32
|
207
|
448 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
449 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
|
|
450 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.row.f32.f32.satfinite
|
207
|
451 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
452 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
|
|
453 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32
|
207
|
454 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
455 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
|
|
456 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.row.col.f32.f32.satfinite
|
207
|
457 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
458 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
|
|
459 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32
|
207
|
460 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
461 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
|
|
462 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.row.f32.f32.satfinite
|
207
|
463 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
464 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
|
|
465 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32
|
207
|
466 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
467 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
|
|
468 // CHECK_M32_M8: call {{.*}} @llvm.nvvm.wmma.m8n32k16.mma.col.col.f32.f32.satfinite
|
207
|
469 // pre-ptx61-error-re@+1 {{'__hmma_m8n32k16_mma_f32f32' needs target feature (sm_70{{.*}},(ptx61{{.*}}}}
|
150
|
470 __hmma_m8n32k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
|
|
471 }
|
|
472 #endif
|